# This script generates all figures in the paper. # software package: maanova1.2.1, Rmaanova0.96-1 # all figures are made into black and white # the first number is from the all probeset analysis and the second number is # from revision with SNP probesets removed (6-26-06 xq) ################################################################# setwd("F:/xcui/JAX/MicroarrayAnalysis/Churchill_heterosis_ABF1/Affy/analysis06") ######################### Figure 1 in paper: clustering samples load("../ftest.sample.yuna.RData") ftest.sample$obsAnova$Sample.level <- c("AJ","AJ","AJ","AJxB6","AJxB6","AJxB6","B6","B6","B6","B6xAJ","B6xAJ","B6xAJ") #idx<-which(ftest.sample$Fs$adjPvalperm<0.05) # length(idx)#15861 idx<-which(ftest.sample$Fs$adjPvalperm<0.001) length(idx)#6148 #HC of samples cluster <- macluster(ftest.sample$obsAnova, term="Sample", idx.gene=idx, what = "sample") par(mfcol=c(1,1)) source("consensusNoTitle.R") tmp = consensus(cluster, level = 0.8) # Note: looks good. idx<-which(ftest.sample$Fs$adjPvalperm<0.05) length(idx)#15847 #HC of samples cluster <- macluster(ftest.sample$obsAnova, "Sample", idx, what = "sample") par(mfcol=c(1,1)) source("consensusNoTitle.R") consensus(cluster, level = 0.8) #HC of samples cluster <- macluster(ftest.sample$obsAnova, "Sample", idx, what = "gene") par(mfcol=c(1,1)) source("consensusNoTitle.R") consensus(cluster, level = 0.8) # kmean cluster cluster <- macluster(ftest.sample$obsAnova, term="Sample", idx.gene=idx, what = "gene", method="kmean", kmean.ngroups=7) consensus(cluster, level = 0.8) ############## plot the cdf of varianceMouse (Figure 2 in paper) # Use the shrinkage estimators to plot load("anova.strainSEp.RData") library(stepfun) # shrink the variance components Shrink.s2.aj= JSshrinker(anova.AJ$S2, anova.AJ$model$df) Shrink.s2.b6= JSshrinker(anova.B6$S2, anova.B6$model$df) Shrink.s2.ab= JSshrinker(anova.AB$S2, anova.AB$model$df) Shrink.s2.ba= JSshrinker(anova.BA$S2, anova.BA$model$df) # replot the figure again par(mfcol=c(1,1)) x <- (1:200)/1000 tmp <- ecdf(Shrink.s2.aj[,1]) y <- tmp(x) plot(x[2:201],y,type="l", col="black",xlab="Mouse Variance", ylab="Cumulative Fraction", main=" ",xlim=c(0, 0.15), ylim=c(0.8, 1)) tmp <- ecdf(Shrink.s2.b6[,1]) y <- tmp(x) lines(x[2:201],y, lty=3) tmp <- ecdf(Shrink.s2.ab[,1]) y <- tmp(x) lines(x[2:201],y, lty=5) tmp <- ecdf(Shrink.s2.ba[,1]) y <- tmp(x) lines(x[2:201],y,lty=4) legend(0.1, 0.9, c("AJ","B6","AJxB6","B6xAJ"), lty = c(1,3,5,4)) ########## Figure 3 in paper: heterability ######################################## setwd("F:/xcui/JAX/MicroarrayAnalysis/Churchill_heterosis_ABF1/Affy/analysis06/revise") load(file="Data.RData") # subset the whole data set into AB and BA F1 with the parental lines DataAB <- subset(Data, arrays=c(1:18)) DataBA <- subset(Data, arrays=cbind(t(c(1:6)),t(c(19:24)),t(c(13:18)))) #Data <- DataAB Data <- DataBA # estimate the variances of grp, mouse, and meas. model.affy2 <- makeModel(DataAB,formula=~Sample+Strain, random=~Sample+Strain) anova.AB.affy2 <- fitmaanova(DataAB, model.affy2) load("anova.AB.affy2.RData") #anova.AB.affy2$S2.level #[1] "Sample" "Strain" #load("anova.AB.affy2.RData") # raw variance s2 <- anova.AB.affy2$S2 h23 <- s2[,2]/(s2[,1]+s2[,2]+s2[,3]) H23 <- h23[which(h23>0.01)] # shrunken variance JSs2 <- JSshrinker(anova.AB.affy2$S2, anova.AB.affy2$model$df) h24 <- JSs2[,2]/(JSs2[,1]+JSs2[,2]+JSs2[,3]) H24 <- h24[which(h24>0.01)] median(h23) # 0.16 median(H23) # 0.30 median(h24) # 0.22 median(H24) # 0.38 length(H24)/length(h24) # 0.7041529 length(H23)/length(h23) # 0.6989202 # median heritability of heritable genes load("ftest.strain.yuna.RData") # number of significant genes idx.h = which(ftest.strain$Fs$adjPvalperm<0.05) median(h24[idx.h]) #0.71 # plot them together a <- hist(H23, breaks=100, main="Before Shrinkage", plot=F, freq=T ) b <- hist(H24, breaks=100,main="After Shrinkage", plot=F, freq=F) plot(c(a$breaks[1],a$breaks), c(0, a$counts,0), type="s", xlab="Heritability", ylab="Frequency", lwd=1, lty =3, col="black", xlim=c(0, 1)) lines(c(b$breaks[1],b$breaks),c(0,b$counts,0), type="s", lwd=1, lty=1,col="black") legend(0.6,600, c("naive", "shrinkage"), lty=c(3,1)) # var plot (Figure 7 in presentation) source("varplot.lim.bw.R") # look at the variance of mouse and genetic plot(anova.AB.affy2$S2[,1], anova.AB.affy2$S2[,2]) idx11=which(anova.AB.affy2$S2[,1]>0.000001) idx12=which(anova.AB.affy2$S2[,2]>0.000001) length(idx11) # 28539 idx01= intersect(idx11,idx12) #19174 cor((anova.AB.affy2$S2[idx01,1]), (anova.AB.affy2$S2[idx01,2])) # 0.2036203 cor((anova.AB.affy2$S2[idx01,1])^0.5, (anova.AB.affy2$S2[idx01,2])^0.5)# 0.4684162 cor((anova.AB.affy2$S2[idx01,1])^0.1, (anova.AB.affy2$S2[idx01,2])^0.1) # 0.4675783 plot((anova.AB.affy2$S2[idx01,2])^0.1, (anova.AB.affy2$S2[idx01,1])^0.1, pch=".", xlab='mouse varance', ylab="Strain varance", main="Strain and mouse variance") varplot.lim.bw(anova.AB.affy2, xlim=c(0,0.6)) # plot the shrunken variances temp <- anova.AB.affy2 temp$S2 <- JSs2 varplot.lim.bw(temp, xlim=c(0,0.6)) ####### try all data instead of the subset (used in version 8) load(file="Data.RData") model.strain <- makeModel(Data, formula=~Sample+Strain, random=~Sample+Strain) anova.strain.random=fitmaanova(Data,model.strain) save(anova.strain.random, file="anova.strain.random") s2 <- anova.strain.random$S2 h23 <- s2[,2]/(s2[,1]+s2[,2]+s2[,3]) H23 <- h23[which(h23>0.01)] # shrunken variance JSs2 <- JSshrinker(anova.strain.random$S2, anova.strain.random$model$df) h24 <- JSs2[,2]/(JSs2[,1]+JSs2[,2]+JSs2[,3]) H24 <- h24[which(h24>0.01)] median(h23) #0.16 # 0.1617763 median(H23) #0.26 # 0.2581955 median(h24) #0.20 # 0.200348 median(H24) #0.31 #0.3060319 length(H24)/length(h24) # 0.7412252 #0.7439736 length(H23)/length(h23) # 0.7367242 #0.7392943 length(h23)-length(H23) # 11874 #11756 length(h24)-length(H24) #11676 #11545 length(which(h23>0.5)) #6503 #6557 length(which(h23>0.5))/length(h23) #0.1441875 #0.1454106 length(which(h24>0.5)) #8131 #8197 length(which(h24>0.5))/length(h24)#0.1802843 #0.1817799 # plot them together a <- hist(H23, breaks=100, main="Before Shrinkage", plot=F, freq=T ) b <- hist(H24, breaks=100,main="After Shrinkage", plot=F, freq=F) plot(c(a$breaks[1],a$breaks), c(0, a$counts,0), type="s", xlab="Heritability", ylab="Frequency", lwd=1, lty =3, col="black", xlim=c(0, 1)) lines(c(b$breaks[1],b$breaks),c(0,b$counts,0), type="s", lwd=1, lty=1,col="black") legend(0.6,600, c("naive", "shrinkage"), lty=c(3,1)) # var plot (Figure 7 in presentation) source("varplot.lim.bw.R") varplot.lim.bw(anova.strain.random, xlim=c(0,0.6)) # plot the shrunken variances temp <- anova.strain.random temp$S2 <- JSs2 varplot.lim.bw(temp, xlim=c(0,0.6)) # comments: the shrinkage gain becomes much smaller ####### Try the BA data Data <- DataBA # estimate the variances of grp, mouse, and meas. model.affy2 <- makeModel(DataBA,formula=~Sample+Strain, random=~Sample+Strain) anova.BA.affy2 <- fitmaanova(DataBA, model.affy2) #anova.AB.affy2$S2.level #[1] "Sample" "Strain" # raw variance s2 <- anova.BA.affy2$S2 h23 <- s2[,2]/(s2[,1]+s2[,2]+s2[,3]) H23 <- h23[which(h23>0.01)] # shrunken variance JSs2 <- JSshrinker(anova.BA.affy2$S2, anova.BA.affy2$model$df) h24 <- JSs2[,2]/(JSs2[,1]+JSs2[,2]+JSs2[,3]) H24 <- h24[which(h24>0.01)] median(h23) # 0.16 median(H23) # 0.29 median(h24) # 0.22 median(H24) # 0.37 length(H24)/length(h24) # 0.7031551 length(H23)/length(h23) # 0.697612 # The question is: should we do separately or combined? The two separate set show similar result with higher percent of heritability ################# Figure 5 in paper: degree of dominance plot #################################### load(file="ftest.strain.yuna.RData") idx2 <- which(ftest.strain$Fs$adjPvalperm<0.01)#7871 load(file="anova.strain.RData") # plot the four panels together par(mfcol=c(2,2)) anova <- anova.strain d <- anova$Strain[,2]-0.5*(anova$Strain[,1]+anova$Strain[,3]) a <-0.5*(anova$Strain[,1]-anova$Strain[,3]) cor(abs(a),abs(d))#0.4080499 dd <-d/a # It could be huge because of small a #histogram temp <- dd limt <- 10 # the boundary of x length(temp[temp< -limt])# 1509 length(temp[temp > limt])# 1641 idx <- which(temp< -limt) temp[idx]=-limt idx <- which(temp>limt) temp[idx]=limt A <- hist( temp, breaks=100,plot=F, freq=T ) plot(c(A$breaks[1],A$breaks), c(0, A$density,0), type="s", xlab="d/a",ylab="Frequency", lwd=1, lty =1, col="black", xlim=c(-limt,limt), main="AxB") abline(v=1,col="black", lty=3) abline(v=-1,col="black", lty=3) abline(v=0,col="black", lty=1) # plot the scatter plot idx <- which(abs(dd)<10) plot(dd[idx],a[idx], pch=".", cex=2, xlab="d/a", ylab="a ", main="AxB", col="grey") idx <- intersect(idx,idx2) points(dd[idx],a[idx],pch=".", cex=2, col="black") # the plot for BA anova <- anova.strain d <- anova$Strain[,4]-0.5*(anova$Strain[,1]+anova$Strain[,3]) a <-0.5*(anova$Strain[,1]-anova$Strain[,3]) cor(abs(a),abs(d))#0.4080499 dd <-d/a # It could be huge because of small a #histogram temp <- dd limt <- 10 # the boundary of x length(temp[temp< -limt])# length(temp[temp > limt])# idx <- which(temp< -limt) temp[idx]=-limt idx <- which(temp>limt) temp[idx]=limt A <- hist( temp, breaks=100,plot=F, freq=T ) plot(c(A$breaks[1],A$breaks), c(0, A$density,0), type="s", xlab="d/a", ylab="Frequency", lwd=1, lty =1, col="black", xlim=c(-limt,limt), main="BxA") abline(v=1,col="black", lty=3) abline(v=-1,col="black", lty=3) abline(v=0,col="black", lty=1) # plot the scatter plot idx <- which(abs(dd)<10) plot(dd[idx],a[idx], pch=".", cex=2, xlab="d/a", ylab="a ", main="BxA", col="grey") idx <- intersect(idx,idx2) points(dd[idx],a[idx],pch=".", cex=2, col="black") # the scater plot of d/a vs a for AxB idx <- which(abs(dd)<5) plot(dd[idx],a[idx], pch=".", cex=3, xlab="d/a", ylab=" ", main="AxB") idx <- intersect(idx,idx2) points(dd[idx],a[idx],pch=".", cex=3, col="red") ################# Figure 3 in paper: ad plot load(file="FourthRun/ttest.ab.yuna.RData") load(file="FourthRun/ttest.f1ab.yuna.RData") load(file="FourthRun/ttest.f1ba.yuna.RData") anova.strain = ttest.ab$obsAnova # additive and dominant effect d.AB <- anova.strain$Strain[,2]-0.5*(anova.strain$Strain[,1]+anova.strain$Strain[,3]) a <-0.5*(anova.strain$Strain[,1] - anova.strain$Strain[,3]) idx.a <-which(ttest.ab$Fs$adjPvalperm<0.05) #8363 #8846 #8950 idx.d.AB <- which(ttest.f1ab$Fs$adjPvalperm<0.05) #612 #321 #317 idx.d.BA = which(ttest.f1ba$Fs$adjPvalperm<0.05) #23 #20 #24 length(union(idx.d.AB, idx.d.BA)) #615 #324 #321 #overdominance load("FourthRun/ttest.AF1ab.yuna.RData") load("FourthRun/ttest.BF1ab.yuna.RData") idxAF1 <- which(ttest.AF1ab$Fs$adjPvalperm<0.05)# A-AB 1578 #1121 idxBF1 <- which(ttest.BF1ab$Fs$adjPvalperm<0.05)# B-AB 3746 #3625 idxint <- intersect(idxAF1, idxBF1) #518 #406 #470 idxF1out <- which(abs(d.AB)>abs(a))#24635 #24710 #24671 idxoverd <- intersect(idxint, idxF1out)#176 #130 #158 ## ad plot par(mfcol=c(1,1)) plot(a[idx.a],d.AB[idx.a],pch=".", cex=1, xlab="a", ylab="d", col="black") points(a[idx.d.AB],d.AB[idx.d.AB],pch=21, cex=0.8,col="black" ) abline(0,1) abline(0,0) points(a[idxoverd],d.AB[idxoverd],pch=19, cex=0.5, col="grey") abline(v=0) abline(0,-1) text(2,0,"pure additive") text(0,2,"over dominance") text(1.5,1.5,"A dominance") text(-1.5,1.5,"B dominance") ### ############ histograms of p values of AJ vs B6, dominant effect of AB and over dominant of AB similar to A. load(file="FourthRun/ttest.ab.yuna.RData") pval.AvsB <- ttest.ab$Fs$Pvalperm load("FourthRun/ttest.f1ab.yuna.RData") pval.dAB <- ttest.f1ab$Fs$Pvalperm anova.strain = ttest.ab$obsAnova idx1 <- which(anova.strain$Strain[,2]>anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,2]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,2]