##################################################################### # # matest.R # modified form of matest.R. Derive pool.be p-values. # Currently only for t1 and ts. No random effect and pooling method # should be 'sample'... did not try for more than 2 conditions.. # library(maanova); load('matest.R') # this will over write the existing code. # 2/3/06 Hyuna Yang # # copyright (c) 2001-2004, Hao Wu and Gary A. Churchill, The Jackson Lab. # Written Apr, 2004 # # Licensed under the GNU General Public License version 2 (June, 1991) # # Part of the R/maanova package # # This is the function to do F permutation test # ###################################################################### matest <- function(data, model, term, Contrast, n.perm=1000, nnodes=1, test.type=c("ttest","ftest"), shuffle.method=c("sample", "resid"), MME.method=c("REML","noest","ML"), test.method=c(1,0,0,1), pval.pool=TRUE, verbose=TRUE){ if (class(data) != "madata") stop("data is not an object of class madata.") if (class(model) != "mamodel") stop("model is not an object of class mamodel.") # get the methods shuffle.method <- match.arg(shuffle.method) MME.method <- match.arg(MME.method) # local variables nreps <- data$n.rep ngenes <- data$n.gene narrays <- data$n.array ########## initialize cluster ############ if( (n.perm>1) & (nnodes>1) ) { if(!require(snow)) stop("You have to install SNOW package to use cluster") # make cluster object cl <- makeMPIcluster(nnodes) # correct the possible correlation problem clusterApply(cl, runif(length(cl), max=1000000000), set.seed) } ################################################################ # if any tested term is random, issue a warning and rebuild model ################################################################ # take the parsed formula from model parsed.formula <- model$parsed.formula # identify the terms termidx <- locateTerm(parsed.formula$labels, term) if( is.null(termidx) ) stop("The term(s) to be tested is not in formula.") if(any(parsed.formula$random[termidx] == 1)) { warning("Testing random terms. Random term will be tested as fixed terms.") # rebuild random formula and model object # random terms random.term <- parsed.formula$labels[parsed.formula$random==1] # random term to be tested random.term.test <- parsed.formula$labels[termidx] random.term.test <- random.term.test[parsed.formula$random[termidx]==1] # make the random term to be tested fixed, # remake the formula for random random.term <- setdiff(random.term, random.term.test) if(length(random.term) == 0) { random <- ~1 stop("Cannot test the only random term in the model.") } else { random <- paste(random.term, collapse="+") random <- paste("~", random, sep="") } # remake covariate cov.term <- parsed.formula$labels[parsed.formula$covariate==1] if(length(cov.term) == 0) cov <- ~1 else { cov <- paste(cov.term, collapse="+") cov <- paste("~", cov, sep="") } # remake model model <- makeModel(data, formula=model$formula, random=as.formula(random), covariate=as.formula(cov)) } ########################################## # for contrast matrix - make one if not given, # check the validity if given ########################################## # for backward compatibility, if user provide Contrast matrix # this will be a T-test by default, unless user specify the test type if(missing(Contrast)) {# no Contrast matrix, make it Contrast <- makeContrast(model, term) nContrast <- 1 # this must be a F-test if(missing(test.type)) is.ftest <- TRUE else { # test.type is given test.type <- match.arg(test.type) if(test.type=="ttest") is.ftest <- FALSE else is.ftest <- TRUE if(test.type=="ttest") # cannot be T-test stop("You must specify a Contrast matrix for doing T-test") } } else { # given contrast matrix. # check if the contrast matrix is valid checkContrast(model, term, Contrast) # test type is T-test by default for backward compatibility test.type <- match.arg(test.type) if(test.type=="ftest") { is.ftest <- TRUE nContrast <- 1 } else { # this is T-test, but will return F values is.ftest <- FALSE # number of tests requested nContrast <- dim(Contrast)[1] # number of t-tests (comparisons) } } # do F test on the observed data if(verbose) cat("Doing F-test on observed data ...\n") # fit ANOVA model anovaobj <- fitmaanova(data, model, method=MME.method, verbose=FALSE) ftest.obs <- matest.engine(anovaobj, term, test.method=test.method, Contrast=Contrast, is.ftest=is.ftest, verbose=FALSE) # get the results mv <- ftest.obs$mv dfnu <- ftest.obs$dfnu; dfFtest <- ftest.obs$dfFtest partC <- ftest.obs$partC # initialize output object ftest <- NULL # general info in the output object ftest$model <- model ftest$term <- term ftest$dfde <- ftest.obs$dfFtest ftest$dfnu <- dfnu ftest$obsAnova <- anovaobj ftest$Contrast <- Contrast if(!is.ftest) class(ftest) <- c("matest", "ttest") else class(ftest) <- c("matest", "ftest") # calculate P values # F1 if(test.method[1] == 1) { ftest$F1 <- NULL ftest$F1$Fobs <- ftest.obs$F1 ftest$F1$Ptab <- 1 - pf(ftest$F1$Fobs, dfnu, dfFtest) if(n.perm > 1) { ftest$F1$Pvalperm <- array(0, c(ngenes, nContrast)) ftest$F1$Pvalmax <- array(0, c(ngenes, nContrast)) #ftest$F1$Pvalsubperm <- array(0, c(ngenes, nContrast)) } } #F2 if(test.method[2] == 1) { ftest$F2$Fobs <- ftest.obs$F2 ftest$F2$Ptab <- 1 - pchisq(ftest$F2$Fobs*dfnu, dfnu) if(n.perm > 1) { ftest$F2$Pvalperm <- array(0, c(ngenes, nContrast)) ftest$F2$Pvalmax <- array(0, c(ngenes, nContrast)) #ftest$F2$Pvalsubperm <- array(0, c(ngenes, nContrast)) # xq add } } #F3 if(test.method[3] == 1) { ftest$F3$Fobs <- ftest.obs$F3; ftest$F3$Ptab <- 1 - pchisq(ftest$F3$Fobs*dfnu, dfnu) if(n.perm > 1) { ftest$F3$Pvalperm <- array(0, c(ngenes, nContrast)) ftest$F3$Pvalmax <- array(0, c(ngenes, nContrast)) #ftest$F3$Pvalsubperm <- array(0, c(ngenes, nContrast)) # xq add } } #Fs if(test.method[4] == 1) { ftest$Fs$Fobs <- ftest.obs$Fs ftest$Fs$Ptab <- 1 - pf(ftest$Fs$Fobs, dfnu, dfFtest) if(n.perm > 1) { ftest$Fs$Pvalperm <- array(0, c(ngenes, nContrast)) ftest$Fs$Pvalmax <- array(0, c(ngenes, nContrast)) #ftest$Fs$Pvalsubperm <- array(0, c(ngenes, nContrast)) } } # return if no permutation test if(n.perm == 1) { warning("You are not doing permutation test. Only observed values are calculated.") return(ftest) } ######################################## # start to do permutation test ######################################## if(verbose) cat("Doing permutation. This may take a long time ... \n") # permutation - use MPI cluster if specified if(nnodes > 1) { # use MPI cluster # use cluster call to do permutation # calculate the number of permutation needed in each node nperm.cluster <- rep(floor((n.perm-1)/nnodes), nnodes) # maybe some leftovers leftover <- n.perm - 1 - sum(nperm.cluster) if(leftover > 0) nperm.cluster[1:leftover] <- nperm.cluster[1:leftover] + 1 # load library on all nodes clusterEvalQ(cl, library(maanova)) # use clusterApply to run permutation on all nodes # note that the only different parameter passed to function # is ftest.mixed.perm. So in ftest.mixed.perm I put nperm # as the first argument so I can use clusterApply cat(paste("Doing permutation on", nnodes, "cluster nodes ... \n")) pstar.nodes <- clusterApply(cl, nperm.cluster, matest.perm, ftest, data, model, term, Contrast, anovaobj$S2, mv, is.ftest, partC, MME.method, test.method, shuffle.method, pval.pool) # how to display permutation number? # after it's done, gather the results from nodes ffields <- c("F1","F2","F3","Fs") for(i in 1:nnodes) { if(nperm.cluster[i] > 0) { pstar <- pstar.nodes[[i]] for(itest in 1:4) { if(test.method[itest] == 1) { ftest[[ffields[itest]]]$Pvalperm <- ftest[[ffields[itest]]]$Pvalperm + pstar[[ffields[itest]]]$Pperm ftest[[ffields[itest]]]$Pvalmax <- ftest[[ffields[itest]]]$Pvalmax + pstar[[ffields[itest]]]$Pmax } } } } # clear cluster results to save some memory rm(pstar.nodes) # stop the cluster stopCluster(cl) } else { # no cluster, do it on single node # original pooling # pstar <- matest.perm(ngenes, n.perm, ftest, data, model, term, Contrast, anovaobj$S2, # mv, is.ftest, partC, MME.method, test.method, shuffle.method, # pval.pool) ###### using subset of data, Fs should be included and Fs only. It can not handle a situation that one ###### array is toally missing. hsidx = which(ftest$F1$Fobs <=qf(.9,ftest$dfnu,ftest$dfde)); sdata = subset.madata(data, genes=hsidx) sS2=anovaobj$S2[hsidx,]; pstar <- matest.perm(ngenes, n.perm, ftest, sdata, model, term, Contrast, sS2,mv, is.ftest, partC, MME.method, test.method, shuffle.method,pval.pool) # update the pvalues ffields <- c("F1","F2","F3","Fs") for(itest in 1:4) { if(test.method[itest] == 1) { ftest[[ffields[itest]]]$Pvalperm <- ftest[[ffields[itest]]]$Pvalperm + pstar[[ffields[itest]]]$Pperm #ftest[[ffields[itest]]]$Pvalsubperm <- # ftest[[ffields[itest]]]$Pvalsubperm + pstar1[[ffields[itest]]]$Pperm ftest[[ffields[itest]]]$Pvalmax <- ftest[[ffields[itest]]]$Pvalmax + pstar[[ffields[itest]]]$Pmax } } } # finish permutation loop # calculate the pvalues. Note that the current object contains the "counts" ffields <- c("F1","F2","F3","Fs") for(itest in 1:4) { if(test.method[itest] == 1) { # Pvalperm if(pval.pool){ #ftest[[ffields[itest]]]$Pvalperm <- # ftest[[ffields[itest]]]$Pvalperm / (n.perm*ngenes) ftest[[ffields[itest]]]$Pvalperm <- ftest[[ffields[itest]]]$Pvalperm / (n.perm*(sdata$n.gene)) # ftest[[ffields[itest]]]$Pvalsubperm <- # ftest[[ffields[itest]]]$Pvalsubperm / (n.perm* (sdata$n.gene)) } else ftest[[ffields[itest]]]$Pvalperm <- ftest[[ffields[itest]]]$Pvalperm / n.perm # for Pvalmax ftest[[ffields[itest]]]$Pvalmax <- ftest[[ffields[itest]]]$Pvalmax / n.perm } } # add some other info to the return object ftest$n.perm <- n.perm ftest$shuffle <- shuffle.method ftest$pval.pool <- pval.pool # return ftest } ###################################################################### # # function to perform permutation F test # ####################################################################### matest.perm <- function(ngenes, n.perm, FobsObj, data, model, term, Contrast, inits20, mv, is.ftest, partC, MME.method, test.method, shuffle.method, pool.pval) { # local variables # number of contrasts if(is.ftest) # this is F-test nContrast <- 1 else # this is T-test nContrast <- dim(Contrast)[1] # initialize result # the result is the number of permutation F values bigger than the observed # F value for each gene. Plus the maximum permutation F value for each permutaion Pval <- NULL if(test.method[1] == 1) { Pval$F1$Pmax <- array(0, c(ngenes, nContrast)) Pval$F1$Pperm <- array(0, c(ngenes, nContrast)) } if(test.method[2] == 1) { Pval$F2$Pmax <- array(0, c(ngenes, nContrast)) Pval$F2$Pperm <- array(0, c(ngenes, nContrast)) } if(test.method[3] == 1) { Pval$F3$Pmax <- array(0, c(ngenes, nContrast)) Pval$F3$Pperm <- array(0, c(ngenes, nContrast)) } if(test.method[4] == 1) { Pval$Fs$Pmax <- array(0, c(ngenes, nContrast)) Pval$Fs$Pperm <- array(0, c(ngenes, nContrast)) } # fstar <- NULL # if(test.method[1] == 1) # fstar$Fstar1 <- array(0, c(ngenes, nContrast, n.perm)) # if(test.method[2] == 1) # fstar$Fstar2 <- array(0, c(ngenes, nContrast, n.perm)) # if(test.method[3] == 1) # fstar$Fstar3 <- array(0, c(ngenes, nContrast, n.perm)) # if(test.method[4] == 1) # fstar$Fstars <- array(0, c(ngenes, nContrast, n.perm)) # residual shuffle is only for fixed model if( (model$mixed==1) & (shuffle.method=="resid") ) { warning("You can only do sample shuffling for mixed model test") shuffle.method <- "sample" } # prepare permutation if(shuffle.method == "sample") { # base used in shuffling shuffle.base <- NULL # if this is fixed model, shuffle tested term randomly # if this is mixed model, find nesting terms if(model$mixed) { nesting <- model$nesting # exclude the term itself #diag(nesting) <- NA # find the nesting terms idx.nesting <- which(nesting[term[1],]==1) # for multiple terms if(length(term) > 1) { for(iterm in 2:length(term)) { tmpidx <- which(nesting[term[iterm],]==1) idx.nesting <- intersect(idx.nesting, tmpidx) } } # only the random terms can be shuffle base idx.nesting <- intersect(idx.nesting, which(model$parsed.formula$random==1)) # if there's no base if(length(idx.nesting) == 0) shuffle.base <- NULL if(length(idx.nesting) > 1) { # if we have multiple base here, find the one on the lowest level # the term with the least number of columns should be the one #index for random terms in model idx.random <- which(model$parsed.formula$random==1) # index for nesting terms in random terms tmpidx <- NULL for(itmp in 1:length(idx.random)) tmpidx <- c(tmpidx, which(idx.nesting[itmp]==idx.random)) #tmpidx <- which(idx.nesting==idx.random) # number of columns for these nesting random terms dimZ.nesting <- model$dimZ[tmpidx] # which one is the smallest? idx.smallest <- which(dimZ.nesting == min(dimZ.nesting)) # this one should be idx.nesting idx.nesting <- idx.nesting[idx.smallest] } # make shuffle base if(length(idx.nesting) == 0) shuffle.base <- NULL else { base.term <- model$parsed.formula$labels[idx.nesting] # make shuffle base designObj <- makeDesign(model$design) shuffle.base <- designObj[[base.term]] } } } else if(shuffle.method=="resid") { # residual shuffling - for fixed model only # this is to shuffle the null model residuals # make a null model with the tested term excluded # terms in the null model. note that this is only for fixed model terms.null <- setdiff(model$parsed.formula$labels, term) if(length(terms.null) == 0) # null model has no terms nullformula <- "~1" else { nullformula <- paste(terms.null, collapse="+") nullformula <- paste("~", nullformula, sep="") } nullmodel <- makeModel(data, formula=as.formula(nullformula)) # fit anova model on null model anova0 <- fitmaanova(data, nullmodel, verbose=FALSE) resid0 <- data$data - anova0$yhat nresid <- length(resid0) data.perm <- data } ############################## # permutation loop ############################## for(b in 1:n.perm) { if(round((b+1)/100) == (b+1)/100) cat(paste("Finish permutation # ", b+1, "\n")) design.perm <- model$design # for sample shuffling if(shuffle.method == "sample") { if(is.null(shuffle.base)) { # shuffle the design - this could be complicated design.perm <- shuffle.maanova(data, model, term) } else { # shuffle tested terms according to shuffle base # number of samples to be shuffled tmpSample <- shuffle.base[model$design$Sample != 0] tmpSample.unique <- unique(tmpSample) nsample <- length(tmpSample.unique) # index for non-reference sample idx.noref <- model$design$Sample!=0 # permutation index idx.perm <- sample(nsample) # shuffle the design and remake a model object design.perm <- model$design for(i in 1:nsample) { # index to be replaced # it should be non-ref sample idx <- which( (shuffle.base == tmpSample.unique[i]) & (idx.noref) ) # index used to take value newidx <- which( (shuffle.base==tmpSample.unique[idx.perm[i]]) & (idx.noref) ) newvalue <- model$design[[term]][newidx[1]] design.perm[[term]][idx] <- newvalue } } # remake model model.perm <- makeModel(data, design.perm, model$formula, model$random, model$covariate) # fit anova model anovaobj.perm <- fitmaanova(data, model.perm, inits20=inits20, method=MME.method, verbose=FALSE) } else if(shuffle.method=="resid") { # residual shuffling - this is for fixed model # global shuffle residual without replacement idx <- sample(nresid, replace=FALSE) # remake data data.perm$data <- anova0$yhat + resid0[idx] # fit anova model anovaobj.perm <- fitmaanova(data.perm, model, verbose=FALSE) } # start to do F-test # provide partC for fixed model test if(model$mixed == 0) # if this is fixed model, pass in partC # so we can save some calculation time ftest.perm <- matest.engine(anovaobj.perm, term, mv, test.method, Contrast, is.ftest, partC, verbose=FALSE) else ftest.perm <- matest.engine(anovaobj.perm, term, mv, test.method, Contrast, is.ftest, verbose=FALSE) # update the result ffields <- c("F1","F2","F3","Fs") for(icon in 1:nContrast) { # loop for multiple contrasts for(i in 1:4) { # loop for different F test methods if(test.method[i] == 1) { # if this F test is requested fobs <- FobsObj[[ffields[i]]]$Fobs[,icon] fstar <- ftest.perm[[ffields[i]]][,icon] fstar.max <- max(fstar) # For Fstarmax if(test.method[i] == 1) Pval[[ffields[i]]]$Pmax[,icon] <- Pval[[ffields[i]]]$Pmax[,icon] + (fstar.max>fobs) # for P value count if(pool.pval) { # use pooled p value Fobs.rank <- rank(fobs) Fstar.rank <- rank(c(fobs, fstar)) Pval[[ffields[i]]]$Pperm[,icon] <- Pval[[ffields[i]]]$Pperm[,icon] + data$n.gene - (Fstar.rank[1:ngenes]-Fobs.rank) } else { # not pool tmp <- fstar > fobs Pval[[ffields[i]]]$Pperm[,icon] <- Pval[[ffields[i]]]$Pperm[,icon] + tmp } } } } } # return Pval }