######################################################################## # This script is for reanalyzing the F1 data considering the SNPs # between AJ and B6 in the revision of the paper. # 6/21/2006 8:52:11 AM # software package: rmaanova0.98-7 (maybe 1.00 because installed 1.00, but # version function says 0.98, and matestHyunaF.R ########################################################################### ########### test for the SNP removal #################################### # read in the data raw = ReadAffy() result = rma(raw) write.exprs(result, file='rma.expr.affy.txt') # modify the CDF file to remove the SNP containing probes library(ProbeFilter) xyList<-scan("F:/xcui/MicroarrayDataSets/SNPremoveSoftware/AJB6/probe_snp_AJ_B6_noGenotype.txt", what=list('numeric', 'numeric', 'char', 'char', 'numeric'), sep='\t', skip=1) #data@cdfName<-'Mm430_Mm_ENTREZG_7' ProbeFilter(raw, xyList=xyList) rslt<-rma(raw) setwd("F:/xcui/JAX/MicroarrayAnalysis/Churchill_heterosis_ABF1/Affy/Analysis06/revise") write.exprs(rslt, file='rma.expr.customcdf.txt') ################## read in preprocessed data raw.data.custom <- read.madata(datafile="rma.expr.customcdf.txt",designfile="AffyDesignFinal.txt",pmt=2,spotflag=F, cloneid=1, header=T) # already logarithm. raw.data.affy <- read.madata(datafile="rma.expr.affy.txt",designfile="AffyDesignFinal.txt",pmt=2,spotflag=F, cloneid=1, header=T) # find the probe sets that are removed from the custom CDF comp = raw.data.affy$cloneid %in% raw.data.custom$cloneid idx = which(comp==FALSE) raw.data.affy$cloneid[idx] #[1] "1416809_at" "1435628_x_at" "1435697_a_at" "1436822_x_at" "1438050_x_at" "1438936_s_at" "1452452_at" "1452540_a_at" # look at the numerical differences between the two CDF plot(raw.data.affy$data[-idx,1], raw.data.custom$data[,1]) # quite bit of difference. all probe sets change numerically. # read in annotation (description, probeID=cloneid, unigeneID). anno <- read.delim("../description.txt") # Note: anno order differs from cloneid in raw.data ############### redo analysis on data with SNP probes removed #################### Data <- createData(raw.data.custom) id = Data$cloneid # replace the data fieled with raw data to avoid log again Data$data <- raw.data.custom$data save(Data,file="Data.RData") # recalculate the column means Data$colmeans = colMeans(Data$data) save(Data, file="Data.RData") ############### compute the variance components using fitmaanova ################ DataAJ <- subset(Data,arrays=c(1:6)) DataB6 <- subset(Data,arrays=c(13:18)) DataAB <- subset(Data,arrays=c(7:12)) DataBA <- subset(Data,arrays=c(19:24)) model.var <- makeModel(DataAJ,formula=~Sample, random=~Sample) anova.AJ <- fitmaanova(DataAJ,model.var) anova.AB <- fitmaanova(DataAB,model.var) anova.B6 <- fitmaanova(DataB6,model.var) anova.BA <- fitmaanova(DataBA,model.var) save(anova.AJ, anova.AB, anova.B6, anova.BA, file="anova.strainSep.RData") ###################################### fitmaanova #################### model.affy <- makeModel(DataAB,formula=~Sample+Strain, random=~Sample) save(DataAB,DataBA, model.affy, file="DataModelABBA.affy.RData") anova.AB.affy <- fitmaanova(DataAB, model.affy) anova.BA.affy <- fitmaanova(DataBA, model.affy) save(anova.AB.affy,anova.BA.affy, file="anova.ABBA.affy.RData") load(file="anova.ABBA.affy.RData") # estimate the variances of grp, mouse, and meas. model.affy2 <- makeModel(DataAB,formula=~Sample+Strain, random=~Sample+Strain) anova.AB.affy2 <- fitmaanova(DataAB, model.affy2) # load in the list of probesets affected by SNP snp.list = read.delim("probeSetsABsnp.txt",header=F, sep="\t") snp.list = as.character(snp.list[[1]]) # load in the original Data for comparison load(file="../Data.RData") id.org = Data$cloneid ######################### F test of the 12 samples for clustering the samples ########## source("matestHyunaF.R") Data <- createData(raw.data.custom) Data$data <- raw.data.custom$data # recalculate the column means Data$colmeans = colMeans(Data$data) model <- makeModel(Data,formula=~Sample) ftest.sample <- matest(Data,model,term="Sample", n.perm=100,nnodes=1, shuffle="sample", test.method=c(1,0,1,1)) ftest.sample = adjPval(ftest.sample, "adaptive") save(ftest.sample, file="ftest.sample.yuna.RData") idx <- which(ftest.sample$Fs$adjPvalperm<0.01) length(idx) #10464 #10372 idx <- which(ftest.sample$Fs$adjPvalperm<0.005) length(idx) #8994 #8919 idx <- which(ftest.sample$Fs$adjPvalperm<0.05) length(idx) #15861 #15649 idx <- which(ftest.sample$Fs$adjPvalperm<0.1) length(idx) #19888 #19693 idx<-which(ftest.sample$Fs$Pvalperm<0.00001) length(idx)#3024 #2618 idx<-which(ftest.sample$Fs$Pvalperm<0.0001) length(idx)#5402 #5352 idx<-which(ftest.sample$Fs$Pvalperm<0.001) length(idx)#8399 #8362 ftest.sample$obsAnova$Sample.level <- c("AJ","AJ","AJ","AJxB6","AJxB6","AJxB6","B6","B6","B6","B6xAJ","B6xAJ","B6xAJ") volcano(ftest.sample, threshold=c(0.001,0.0001,0.001,0.001), method=c("fdr","fdr","fdr","fdr"), title="Volcano Plot", highlight.flag=TRUE, onScreen=TRUE) ## compare the clustering idx<-which(ftest.sample$Fs$adjPvalperm<0.001) length(idx)#6148 #6084 load(file="../ftest.sample.yuna.RData") idx.org = which(ftest.sample$Fs$adjPvalperm<0.001) length(intersect(setdiff( id.org[idx.org],id[ idx]), snp.list)) #83 length(setdiff( id.org[idx.org],id[ idx])) #111 length(setdiff( id[ idx],id.org[idx.org])) #47 length(intersect(setdiff( id[ idx],id.org[idx.org]), snp.list)) #45 cluster <- macluster(ftest.sample$obsAnova, term="Sample", idx.gene=idx, what = "sample") par(mfcol=c(1,1)) source("../consensusNoTitle.R") tmp = consensus(cluster, level = 0.8) # Note: The same structure as before with one bootstrapping number increase. ################### F test for mouse ########################3 model <- makeModel(Data, formula=~Sample+Strain) ftest.mouse <- matest(Data,model,term="Sample", n.perm=100,nnodes=1, shuffle="sample") ftest.mouse = adjPval(ftest.mouse, "adaptive") save(ftest.mouse, file="ftest.mouse.yuna.RData") idx<-which(ftest.mouse$Fs$Pvalperm<0.001) length(idx)#1356 #1239 idx <- which(ftest.mouse$Fs$adjPvalperm<0.05) length(idx) #1666 $1452 ############### F test for overall group or genotype effect Using Hyuna's new perm ################################ Note: the permutations were run on cheaha. library(maanova) load(file="Data.RData") model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample) ftest.strain <- matest(Data,model.strain,term="Strain", n.perm=100, nnodes=1, shuffle="sample") ftest.strain = adjPval(ftest.strain, "adaptive") save(ftest.strain, file="ftest.strain.yuna.RData") load("ftest.strain.yuna.RData") # number of significant genes length(which(ftest.strain$Fs$adjPvalperm<0.05))# 7871 #7819 length(which(ftest.strain$Fs$Pvalperm<0.001)) #3131 #3110 # overlap with other tests at FDR0.05 idx.1 = which(ftest.strain$Fs$adjPvalperm<0.05) #7871 idx.2 = which(ftest.mouse$Fs$adjPvalper<0.05) #1666 length(intersect(idx.1, idx.2)) #268 ########## matest for a and d effects ################################ ##### compare AB and BA vs mean of the parental strains # # Note: strain.level order: A, AxB, B, BxA # contrast matrix # A AxB B BxA #[1,] 1.0 0 -1.0 0 #[2,] 0.5 -1 0.5 0 #[3,] 0.5 0 0.5 -1 #[4,] 0.0 1 0.0 -1 #load(file="Data.RData") model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample) model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample) ttest.ab <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(1,0,-1,0),1,byrow=T),test.method=c(1,0,0,1),n.perm=100, nnodes=1, shuffle="sample") ttest.ab = adjPval(ttest.ab, "adaptive") save (ttest.ab, file="ttest.ab.yuna.RData") load(file="ttest.ab.yuna.RData") length(which(ttest.ab$Fs$adjPvalperm<0.05))# 8363 # 8846 length(which(ttest.ab$Fs$Pvalperm<0.001)) #3769 #3959 idx = which(ttest.ab$Fs$adjPvalperm<0.05) fd.wSNP = ttest.ab$obsAnova$Strain[,1]-ttest.ab$obsAnova$Strain[,3] load(file="../ttest.ab.yuna.RData") fd.woSNP = ttest.ab$obsAnova$Strain[,1]-ttest.ab$obsAnova$Strain[,3] idx.org = which(ttest.ab$Fs$adjPvalperm<0.05) length(intersect(setdiff( id.org[idx.org],id[ idx]), snp.list)) #95 length(setdiff( id.org[idx.org],id[ idx])) #96 # look at the level of A vs B to see whether these are all A0))/length(snp.list) # 0.5065161 #### end length(setdiff( id[ idx],id.org[idx.org])) #579 length(intersect(setdiff( id[ idx],id.org[idx.org]), snp.list)) #143 length(intersect(snp.list, id.org[idx])) #768 # compare the two F1s model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample) ttest.f1s <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(0,1,0,-1),1,byrow=T),test.method=c(1,0,0,1),n.perm=100, nnodes=1, shuffle="sample") ttest.f1s = adjPval(ttest.f1s, "adaptive") save (ttest.f1s, file="ttest.f1s.yuna.RData") load("ttest.f1s.yuna.RData") length(which(ttest.f1s$Fs$adjPvalperm<0.05))# 755 #137 length(which(ttest.f1s$Fs$Pvalperm<0.001)) #820 #539 idx = which(ttest.f1s$Fs$adjPvalperm<0.05) load(file="../ttest.f1s.yuna.RData") idx.org = which(ttest.f1s$Fs$adjPvalperm<0.05) length(intersect(setdiff( id.org[idx.org],id[ idx]), snp.list)) #57 length(setdiff( id.org[idx.org],id[ idx])) #620 length(setdiff( id[ idx],id.org[idx.org])) #2 length(intersect(setdiff( id[ idx],id.org[idx.org]), snp.list)) #2 # test to compare the two F1s and the two parents model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample) ttest.ab.f1s <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(1,-1,1,-1),1,byrow=T),test.method=c(1,0,0,1),n.perm=100, nnodes=1, shuffle="sample") ttest.ab.f1s = adjPval(ttest.ab.f1s, "adaptive") save (ttest.ab.f1s, file="ttest.ab.f1.yuna.RData") load("ttest.ab.f1.yuna.RData") length(which(ttest.ab.f1$Fs$adjPvalperm<0.05)) # # 41 idx = which(ttest.ab.f1$Fs$adjPvalperm<0.05) load("../ttest.ab.f1.yuna.RData") idx.org = which(ttest.ab.f1$Fs$adjPvalperm<0.05) #68 length(intersect(setdiff( id.org[idx.org],id[ idx]), snp.list)) #6 length(setdiff( id.org[idx.org],id[ idx])) #64 length(setdiff( id[ idx],id.org[idx.org])) #64 length(intersect(setdiff( id[ idx],id.org[idx.org]), snp.list)) #2 # test for dominance of AB ttest.f1ab <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(0.5,-1,0.5,0),1,byrow=T),test.method=c(1,0,0,1),n.perm=100, nnodes=1, shuffle="sample") # Note: strain.level order: A, AxB, B, BxA ttest.f1ab = adjPval(ttest.f1ab, "adaptive") save (ttest.f1ab, file="ttest.f1ab.yuna.RData") load ( file="ttest.f1ab.yuna.RData") length(which(ttest.f1ab$Fs$adjPvalperm<0.05)) # #321 idx = which(ttest.f1ab$Fs$adjPvalperm<0.05) load ( file="../ttest.f1ab.yuna.RData") idx.org = which(ttest.f1ab$Fs$adjPvalperm<0.05) #612 length(intersect(setdiff( id.org[idx.org],id[ idx]), snp.list)) #29 length(setdiff( id.org[idx.org],id[ idx])) #294 length(setdiff( id[ idx],id.org[idx.org])) #3 length(intersect(setdiff( id[ idx],id.org[idx.org]), snp.list)) #3 # test for dominance of BA ttest.f1ba <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(0.5,0.5,-1),1,byrow=T),test.method=c(1,0,0,1),n.perm=100, nnodes=1, shuffle="sample") # Note: strain.level order: A, AxB, B, BxA ttest.f1ba = adjPval(ttest.f1ba, "adaptive") save (ttest.f1ba, file="ttest.f1ba.yuna.RData") load ( file="ttest.f1ba.yuna.RData") length(which(ttest.f1ba$Fs$adjPvalperm<0.05))# 23 #20 idx = which(ttest.f1ba$Fs$adjPvalperm<0.05) load ( file="../ttest.f1ba.yuna.RData") idx.org = which(ttest.f1ba$Fs$adjPvalperm<0.05) length(intersect(setdiff( id.org[idx.org],id[ idx]), snp.list)) #2 length(setdiff( id.org[idx.org],id[ idx])) #3 length(setdiff( id[ idx],id.org[idx.org])) #0 length(intersect(setdiff( id[ idx],id.org[idx.org]), snp.list)) #0 # overlap between sig genes between F1s and dominant genes load("ttest.f1ab.yuna.RData") load("ttest.f1ba.yuna.RData") load("ttest.f1s.yuna.RData") idx.tmp1= which(ttest.f1ab$Fs$adjPvalperm<0.05) idx.tmp12= which(ttest.f1ba$Fs$adjPvalperm<0.05) idx.tmp2 = which(ttest.f1s$Fs$adjPvalperm<0.05) length(intersect(union(idx.tmp1,idx.tmp12), idx.tmp2))#265 #71 length(union(union(idx.tmp1,idx.tmp12), idx.tmp2)) # #390 ####################### ttest for F1 vs each parental strain ######## library(maanova) source(file="matestHyunaF.R") load(file="Data.RData") model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample) # A vs 2 AB ttest.AF1ab <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(1,-1,0,0),1,byrow=T),n.perm=100, nnodes=1, shuffle="sample") ttest.AF1ab = adjPval(ttest.AF1ab) save (ttest.AF1ab, file="ttest.AF1ab.yuna.RData") # A vs 2 BA ttest.AF1ba <- matest(Data,model.strain,term="Strain", Contrast=matrix(c( 1,0,0,-1),1,byrow=T),n.perm=100, nnodes=1, shuffle="sample") ttest.AF1ba = adjPval(ttest.AF1ba) save (ttest.AF1ba, file="ttest.AF1ba.yuna.RData") # B vs AB ttest.BF1ab <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(0,-1,1,0),1,byrow=T),n.perm=100, nnodes=1, shuffle="sample") ttest.BF1ab = adjPval(ttest.BF1ab, "adaptive") save (ttest.BF1ab, file="ttest.BF1ab.yuna.RData") # B vs BA ttest.BF1ba <- matest(Data,model.strain,term="Strain", Contrast=matrix(c(0,0,1,-1),1,byrow=T),n.perm=100, nnodes=1, shuffle="sample") ttest.BF1ba = adjPval(ttest.BF1ba, "adaptive") save (ttest.BF1ba, file="ttest.BF1ba.yuna.RData") ################# identify overdominant genes load("ttest.AF1ba.yuna.RData") load("ttest.AF1ab.yuna.RData") load("ttest.BF1ba.yuna.RData") load("ttest.BF1ab.yuna.RData") #load(file="anova.strain.RData") anova.strain = ttest.AF1ab$obsAnova # for ab idx1 <- which(anova.strain$Strain[,2]>anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,2]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,2]anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,2]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,2]anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,4]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,4]anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,4]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,4]