# This script is for examining the SNP effect through comparing the analysis results from the original data and the data with SNP probe removed. # XQ: 7/26/2006 9:35:24 AM setwd("F:/xcui/JAX/MicroarrayAnalysis/Churchill_heterosis_ABF1/Affy/Analysis06/revise") library(maanova) ######### look at the difference between whole dataset and the dataset without SNP-containing probe sets # load in the list of probesets affected by SNP snp.list = read.delim("probeSetsABsnp.txt",header=F, sep="\t") snp.list = as.character(snp.list[[1]]) #3453 # load in the original Data for comparison load(file="../Data.RData") # read in annotation (description, probeID=cloneid, unigeneID). anno <- read.delim("../../description.txt") # id of the SNP-containing probesets in original Data object idx.snp = match(snp.list, Data$cloneid) #3453 idx.no.snp = setdiff((1:length(Data$cloneid)), idx.snp) # Compare the genes that are different between AJ and B6 but also have SNP in them for their relative gene expression in AJ and B6 # load in the original test results load(file="../FourthRun/ttest.ab.yuna.RData") length(which(ttest.ab$Fs$adjPvalperm<0.05)) #8950 idx.sig = which(ttest.ab$Fs$adjPvalperm<0.05) # compare the expression level of A and B idx.sig.snp = intersect(idx.sig, idx.snp) #801 exp.snp = ttest.ab$obsAnova$Strain[idx.sig.snp,3]-ttest.ab$obsAnova$Strain[idx.sig.snp,1] length(which(exp.snp>0))#467 length(which(exp.snp<0))# 334 # chi-square test p = 3x10-6 # after removing the SNP probes load(file="../RmProbeOnly/ttest.ab.yuna.RData") length(which(ttest.ab$Fs$adjPvalperm<0.05)) #8950 idx.sig = which(ttest.ab$Fs$adjPvalperm<0.05) # compare the expression level of A and B idx.sig.snp = intersect(idx.sig, idx.snp) #661 exp.snp = ttest.ab$obsAnova$Strain[idx.sig.snp,3]-ttest.ab$obsAnova$Strain[idx.sig.snp,1] length(which(exp.snp>0)) #377 length(which(exp.snp<0)) #284 Still very different ################## rerun the tests on data with SNP-probes removed load ("Data.RData") length(Data$cloneid) #45093 Data.no.snp.pr = Data # original data load ("../Data.RData") # rerun all the tests in Cheaha/xcui/F1testRevise_7_21 #### test for 12 samples load(file="../FourthRun/ttest.sample.yuna.RData") ttest.sample.o = ttest.sample load(file="RmProbeOnly/ttest.sample.yuna.RData") idx.sig.o = which(ttest.sample.o$Fs$adjPvalperm<0.001)# 6143 idx.sig = which(ttest.sample$Fs$adjPvalperm<0.001) #6132 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig])) #6059 length(intersect(idx.sig.o, idx.snp))#542 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))# #509 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #460 ## mouse effect load(file="../FourthRun/ttest.mouse.yuna.RData") ttest.mouse.o = ttest.mouse load(file="RmProbeOnly/ttest.mouse.yuna.RData") idx.sig.o = which(ttest.mouse.o$Fs$adjPvalperm<0.05)# 1754 idx.sig = which(ttest.mouse$Fs$adjPvalperm<0.05) #1654 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig])) #1640 length(intersect(idx.sig.o, idx.snp))#136 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))# #128 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #114 idx.sig.mouse.o = idx.sig.o ### grab all the test results from Cheaha for the following analysis ## f test for the strain effect load(file="../FourthRun/ftest.strain.hyuna.RData") ftest.strain.o = ftest.strain load(file="RmProbeOnly/ftest.strain.hyuna.RData") idx.sig.o = which(ftest.strain.o$Fs$adjPvalperm<0.05)# 8979 idx.sig = which(ftest.strain$Fs$adjPvalperm<0.05) # 7613 #8621 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig])) #7613 #8502 length(intersect(idx.sig.o, idx.snp)) #793 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))#766 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #648 # overlap with mouse sig genes length(intersect(idx.sig.o, idx.sig.mouse.o)) #312 ## compare A and B load(file="../FourthRun/ttest.ab.yuna.RData") ttest.ab.o = ttest.ab load(file="RmProbeOnly/ttest.ab.yuna.RData") idx.sig.o = which(ttest.ab.o$Fs$adjPvalperm<0.05)# 8950 idx.sig = which(ttest.ab$Fs$adjPvalperm<0.05) #7869 #8447 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig])) #7869 there is no new gene coming us as significant #8327 length(intersect(idx.sig.o, idx.snp))#801 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))#792 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #672 # look at the changed snp-ps in their fold change # before specific idx.c = (intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))) idx.b = Data$cloneid[(intersect(idx.sig.o, idx.snp)) ] idx.a = intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]) ab.diff.b = ttest.ab.o$obsAnova$Strain[,3] - ttest.ab.o$obsAnova$Strain[,1] ab.diff.a = ttest.ab$obsAnova$Strain[,3] - ttest.ab$obsAnova$Strain[,1] # before specific plot(ab.diff.b[match( setdiff(idx.b, idx.c),Data$cloneid)],ab.diff.a[match( setdiff(idx.b, idx.c),Data.no.snp.pr$cloneid)], xlim=c(-1,3), ylim=c(-1,3) ) abline(a=0, b=1) # after specific plot(ab.diff.b[match( setdiff(idx.a, idx.c),Data$cloneid)],ab.diff.a[match( setdiff(idx.a, idx.c),Data.no.snp.pr$cloneid)], xlim=c(-1,3), ylim=c(-1,3) ) abline(a=0, b=1) abline(a=0, b=0) # all genes plot(ab.diff.b[match( union(idx.b, idx.a),Data$cloneid)],ab.diff.a[match( union(idx.b, idx.a),Data.no.snp.pr$cloneid)], xlim=c(-1,3), ylim=c(-1,3) ) abline(a=0, b=1) abline(a=0, b=0) # plot together plot( ab.diff.b[match( idx.c,Data$cloneid)],ab.diff.a[match( idx.c,Data.no.snp.pr$cloneid)], pch=".",cex=1, xlab="log2(B/A) w/ snp-pr", ylab="log2(B/A) w/o snp-pr", col="black" ) points(ab.diff.b[match( setdiff(idx.b, idx.c),Data$cloneid)],ab.diff.a[match( setdiff(idx.b, idx.c),Data.no.snp.pr$cloneid)] ,pch=21, cex=1,col="black") points( ab.diff.b[match( setdiff(idx.a, idx.c),Data$cloneid)],ab.diff.a[match( setdiff(idx.a, idx.c),Data.no.snp.pr$cloneid)], pch=3, cex=0.8, col="black") abline(a=0, b=1) abline(a=0, b=0) abline(v=0) points( ab.diff.b[match( idx.c,Data$cloneid)],ab.diff.a[match( idx.c,Data.no.snp.pr$cloneid)], pch=".",cex=1.5, col="grey") # plot in different panels plot(ab.diff.b[match( setdiff(idx.b, idx.c),Data$cloneid)],ab.diff.a[match( setdiff(idx.b, idx.c),Data.no.snp.pr$cloneid)] ,pch=".", cex=2,xlab=" ", ylab="log2(B/A) w/o snp-pr", col="black", xlim=c(-1,3), ylim=c(-1,1)) abline(a=0, b=1) abline(a=0, b=0) abline(v=0) plot( ab.diff.b[match( setdiff(idx.a, idx.c),Data$cloneid)],ab.diff.a[match( setdiff(idx.a, idx.c),Data.no.snp.pr$cloneid)], pch=".", cex=2, xlab=" ",ylab="log2(B/A) w/o snp-pr", col="black",xlim=c(-1,3), ylim=c(-1,1)) abline(a=0, b=1) abline(a=0, b=0) abline(v=0) plot( ab.diff.b[match( idx.c,Data$cloneid)],ab.diff.a[match( idx.c,Data.no.snp.pr$cloneid)], pch=".",cex=2, xlab="log2(B/A) w/ snp-pr", ylab="log2(B/A) w/o snp-pr", xlim=c(-2,3), ylim=c(-2,4) ) abline(a=0, b=1) abline(a=0, b=0) abline(v=0) #### compare the two F1s idx.sig.1 = which(ttest.f1s.1$Fs$adjPvalperm<0.05)#357 idx.sig.2 = which(ttest.f1s.2$Fs$adjPvalperm<0.05) #232 idx.sig.3 = which(ttest.f1s.3$Fs$adjPvalperm<0.05) #314 # use this in paper load("RmProbeOnly/ttest.f1s.yuna.test2.RData") idx.sig = which(ttest.f1s$Fs$adjPvalperm<0.05)# 333 #233 length(intersect( Data$cloneid[idx.sig.1], Data.no.snp.pr$cloneid[idx.sig])) #332 #226 length(intersect( Data$cloneid[idx.sig.2], Data.no.snp.pr$cloneid[idx.sig])) #218 #223 length(intersect( Data$cloneid[idx.sig.3], Data.no.snp.pr$cloneid[idx.sig])) #292 #226 # use this one length(intersect(idx.sig.1, idx.snp)) #25 length(intersect(idx.sig.2, idx.snp)) #14 length(intersect(idx.sig.3, idx.snp)) #22 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))#16 length(intersect(Data$cloneid[intersect(idx.sig.1, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #9 length(intersect(Data$cloneid[intersect(idx.sig.2, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #6 length(intersect(Data$cloneid[intersect(idx.sig.3, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #9 ##### compare the two hybrids with the two parents load("../FourthRun/ttest.ab.f1.yuna.RData") ttest.ab.f1.o = ttest.ab.f1 load("RmProbeOnly/ttest.ab.f1.yuna.RData") idx.sig.o = which(ttest.ab.f1.o$Fs$adjPvalperm<0.05) #67 idx.sig = which(ttest.ab.f1$Fs$adjPvalperm<0.05) #60 #65 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig]))#60 #64 length(intersect(idx.sig.o, idx.snp)) #7 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))# 5 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #4 ##### dominance of AB load ( file="../FourthRun/ttest.f1ab.yuna.RData") ttest.f1ab.o = ttest.f1ab load ( file="RmProbeOnly/ttest.f1ab.yuna.RData") idx.sig.o = which(ttest.f1ab.o$Fs$adjPvalperm<0.05) #317 idx.sig = which(ttest.f1ab$Fs$adjPvalperm<0.05) #442 #571 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig]))#295 #313 length(intersect(idx.sig.o, idx.snp)) #22 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))#46 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #### dominance of BA load ( file="../FourthRun/ttest.f1ba.yuna.RData") ttest.f1ba.o = ttest.f1ba load ( file="RmProbeOnly/ttest.f1ba.yuna.RData") idx.sig.o = which(ttest.f1ba.o$Fs$adjPvalperm<0.05) #23 idx.sig = which(ttest.f1ba$Fs$adjPvalperm<0.05) #20 #21 length(intersect( Data$cloneid[idx.sig.o], Data.no.snp.pr$cloneid[idx.sig]))#20 #21 length(intersect(idx.sig.o, idx.snp)) #4 length(intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp]))#2 length(intersect(Data$cloneid[intersect(idx.sig.o, idx.snp)], intersect(Data.no.snp.pr$cloneid[idx.sig], Data$cloneid[idx.snp])) ) #2 ##### overdominant genes ## for dominance of ab load("../FourthRun/ttest.AF1ba.yuna.RData") load("../FourthRun/ttest.AF1ab.yuna.RData") load("../FourthRun/ttest.BF1ba.yuna.RData") load("../FourthRun/ttest.BF1ab.yuna.RData") ttest.AF1ba.o = ttest.AF1ba ttest.AF1ab.o = ttest.AF1ab ttest.BF1ba.o = ttest.BF1ba ttest.BF1ab.o = ttest.BF1ab anova.strain = ttest.AF1ab.o$obsAnova idx1 <- which(anova.strain$Strain[,2]>anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,2]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,2]anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,2]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,2]anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,4]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,4]anova.strain$Strain[,1] & anova.strain$Strain[,1]>anova.strain$Strain[,3]) # to compare with A idx2 <- which(anova.strain$Strain[,4]anova.strain$Strain[,3] & anova.strain$Strain[,3]>anova.strain$Strain[,1]) # to compare with B idx22 <- which(anova.strain$Strain[,4]