################################################################### #A script for covariance analysis and graphical modeling at various #levels of conditioning #Writen by Renhua Li #11/29/2006 ################################################################### rm(list=ls()) #library(mimR) setwd("C:/renhua/data/RANDY/Leehrae/hlb/com_HLB_LEEH") ############################################## ##read two data sets in a tabde limeted text file ############################################## hlb <- read.delim("hlb.txt", header = TRUE, sep = "\t",na.strings = "-", ) leeh <- read.delim("leeh.txt", header = TRUE, sep = "\t",na.strings = "-", ) class(hlb) colnames(hlb) colnames(leeh) dim(hlb) ############################################ #extract two subdatasets with common strains ############################################ ############################################ hlbstrains <- hlb$strain leehstrains <- leeh$strain hlb <- as.matrix(hlb) leeh <- as.matrix(leeh) subhlb <- hlb[which(hlbstrains %in% leehstrains),] dim(subhlb) subhlb #22 common strains with both sexes HLBtraitnames <- colnames(hlb[,c(2,4,17,18, 19, 20, 13)]) subhlb2 <- subhlb[,c(2,4,17,18,19,20,13)] #X11(); par(mfrow=c(3,3)) #for (i in 1:(length(HLBtraitnames)-1)) { # hist(log(as.numeric(HLBset[,i+1])), breaks=25,main="", xlab = HLBtraitnames[[i+1]]) #} #subhlb2[,2:5] <- log(as.numeric(subhlb2[,2:4])) #dim(subhlb2) #hf = as.vector(rep(1, 44)) #HLB <- cbind(subhlb2[,1:4],hf) setwd("C:/renhua/data/RANDY/Leehrae/hlb/com_HLB_LEEH") subhlb3 = data.frame(subhlb2) write.table(subhlb2, file = "HLB22strains.txt", append = FALSE, quote = F, sep = " ", eol = "\n", na = "-", dec = ".", row.names = F, col.names = TRUE, qmethod = "escape") ############################################## idx <- which(leeh[,1] %in% subhlb[,1]) #pay attention to the order!!! LRtraitnames <- colnames(leeh[,c(2,3,11,9,7,16,15)]) subleeh <- leeh[idx,c(2,3,11,9,7,16,15)] #X11(); par(mfrow=c(2,3)) #for (i in 1:(length(LRtraitnames)-1)) { # hist(log(as.numeric(LRset[,i+1])), breaks=25,main="", xlab = LRtraitnames[[i+1]]) #} #subleeh[,2:5] <- log(as.numeric(subleeh[,2:5])) #dim(subleeh) #lf = as.vector(rep(0, 44)) #LEEH <- cbind(subleeh[,1:4],lf) #setwd("c:/data/mimRdata/hlb") #save(subhlb, file ="subhlb.Rdata") #save(subleeh, file="subleeh.Rdata") setwd("C:/renhua/data/RANDY/Leehrae/hlb/com_HLB_LEEH") subleeh2 = data.frame(subleeh) write.table(subleeh2, file = "LEEH22strains.txt", append = FALSE, quote = F, sep = " ", eol = "\n", na = "-999", dec = ".", row.names = F, col.names = TRUE, qmethod = "escape") ######################### #Combining two data sets ######################### com <- as.data.frame(rbind(subhlb2,subleeh)) #covariance of the traits 1-8 comcov <- cov(com[,1:5], use = "pairwise.complete.obs", method = "pearson") ####################### #corr of dbwt and IGF-1 ####################### dbwt =as.vector(as.numeric( subhlb2[,3])) -as.vector(as.numeric( subleeh[,3])) plot(dbwt, leeh[idx,15], xlab="dbwt", ylab= "IGF-1") corr = cor(dbwt, as.vector(as.numeric(leeh[idx,15])), na.rm = TRUE, use = "pairwise.complete.obs") ##################### #fitting a mix model ##################### strain <- list(1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22) dstrain = rep(as.numeric(strain), 2) diet = c(rep(1,44), rep(2,44)) com <- cbind(com[,1:6], dstrain, diet) idx <- !is.na(com[,2]) colnames(com)[[3]] = 'BWT' colnames(com)[[5]] = 'diet' colnames(com)[[6]] = 'strain' comdata <- as.data.frame(com[idx,]) names(comdata) # change these 3 back to numeric variables comdata[,3]=as.numeric(as.character(comdata[,3])) comdata[,4]=as.numeric(as.character(comdata[,4])) comdata[,2]=as.numeric(as.character(comdata[,2])) gfit <- glm(aBMD ~ BWT + fat2lean + diet + sex + diet:fat2lean + strain, data = comdata) library(nlme) newdata = groupedData(formula = aBMD ~ BWT + fat2lean |strain/diet/sex, data = comdata) BWT = newdata[,3] fat2lean = newdata[,4] diet = newdata[,5] strain = newdata[,6] sex = newdata[,1] aBMD = newdata[,2] fm1 <- nlme(aBMD ~ strain + BWT + fat2lean + diet + sex + diet:fat2lean, data = newdata, fixed = aBMD ~ BWT + sex + fat2lean + diet+ diet:fat2lean, random = aBMD ~ strain/sex/diet, start = c(strain = 9, BWT = 3.493540, fat2lean = -1.3463504, aBMD =-3.065726, diet = 1, sex = 1, diet:fat2lean = -1.3)) summary(fm1) ############################################ ## plot histograms for traits in HLB subdata ############################################ library(qtl) load("subhlb.Rdata") load("subleeh.Rdata") dim(subhlb) #44 x 20 dim(subleeh) #44 x 16 class(subhlb) colnames(subhlb) colnames(subleeh) hlbtraitnames<- colnames(subhlb[,c(2,19,20,4,13,18)]) hlb6 <- subhlb[,c(2,19,20,4,13,18)] class(hlb6) X11(); par(mfrow=c(2,3)) for (i in 1:length(hlbtraitnames)) { hist(as.numeric(hlb6[,i]), breaks=25,main="", xlab = hlbtraitnames[[i]]) } ##log transformed traits X11(); par(mfrow=c(2,3)) for (i in 1:(length(hlbtraitnames)-1)) { hist(sqrt(as.numeric(hlb6[,i+1])), breaks=25,main="", xlab = hlbtraitnames[[i+1]]) } graphics.off() ################################################################## #sqrt transformed data and pairwise covariance for the subhlb data ################################################################## #sqrt thensformation for the traits hlb6[,2:6] <- sqrt(as.numeric(hlb6[,2:6])) dim(hlb6) #44 x 6 #covariance of the traits 1-6 hlbcovtrait6 <- cov(hlb6[,1:6], use = "pairwise.complete.obs", method = "pearson") setwd("c:/data/mimRdata/hlb") write(hlbcovtrait6, "hlbcovtrait6", sep = "\t", ncolumns=6) ############################################## #subsets of males and females in the hlb6 data ############################################## hlbfidx <- which(hlb6[,1] == 0) hlbmidx <- which(hlb6[,1] == 1) hlb6female <- hlb6[hlbfidx,] hlb6male <- hlb6[hlbmidx,] ###covariance of traits 2-6 in females hlbcovfemale <- cov(hlb6female[,2:6], use = "pairwise.complete.obs", method = "pearson") setwd("c:/data/mimRdata/hlb") write(hlbcovfemale, "hlbcovfemale", sep = "\t", ncolumns=5) ###covariance of traits 2-6 in males hlbcovmale <- cov(hlb6male[,2:6], use = "pairwise.complete.obs", method = "pearson") setwd("c:/data/mimRdata/hlb") write(hlbcovmale, "hlbcovmale", sep = "\t", ncolumns=5) ############################################## #subsets of males and females in the Leeh data ############################################## leehfidx <- which(leeh6[,1] == 0) leehmidx <- which(leeh6[,1] == 1) leeh6female <- leeh6[leehfidx,] leeh6male <- leeh6[leehmidx,] ###covariance of traits 2-6 in females leehcovfemale <- cov(leeh6female[,2:6], use = "pairwise.complete.obs", method = "pearson") setwd("c:/data/mimRdata/hlb") write(leehcovfemale, "leehcovfemale", sep = "\t", ncolumns=5) ###covariance of traits 2-6 in males leehcovmale <- cov(leeh6male[,2:6], use = "pairwise.complete.obs", method = "pearson") setwd("c:/data/mimRdata/hlb") write(leehcovmale, "leehcovmale", sep = "\t", ncolumns=5) ################################################### ## plot histograms for traits in Leeh Rae's subdata ################################################### library(qtl) setwd("c:/data/mimRdata/hlb") load("subleeh.Rdata") dim(subleeh) #44 x 16 class(subleeh) colnames(subleeh) leehtraitnames <- colnames(subleeh[,c(2,7,16,3,15,9)]) leeh6 <- subleeh[,c(2,7,16,3,15,9)] class(leeh6) X11(); par(mfrow=c(2,3)) for (i in 1:length(leehtraitnames)) { hist(as.numeric(leeh6[,i]), breaks=25,main="", xlab = leehtraitnames[[i]]) } ##log transformed traits X11(); par(mfrow=c(2,3)) for (i in 1:(length(leehtraitnames)-1)) { hist(sqrt(as.numeric(leeh6[,i+1])), breaks=25,main="", xlab = leehtraitnames[[i+1]]) } graphics.off() #imputeMissing() ################################################################## #sqrt transformed data and pairwise covariance for the subleeh data ################################################################## #sqrt thensformation for the traits leeh6[,2:6] <- sqrt(as.numeric(leeh5[,2:6])) dim(leeh6) #44 x 6 #covariance of the traits 1-6 leehcovtrait6 <- cov(leeh6[,1:6], use = "pairwise.complete.obs", method = "pearson") setwd("c:/data/mimRdata/hlb") write(leehcovtrait6, "leehcovtrait6", sep = "\t", ncolumns=6) ################################################################### #combining the two datasets as one data treating diet as a variable ################################################################### ##################################################### ##basic statistics and 0 order conditional covariance ##################################################### mean(math, use = "all.obs", method = "pearson") cor(math, use = "all.obs", method = "pearson") zocov <- cov(math, use = "all.obs", method = "pearson") ############################################## ##full conditional covariance and correlation ############################################## #get an concentration (C) matrix invzocov <- solve(zocov) dim(invzocov) #5 5 #inverse the signs of the off-diagnal elements in the inverse matrix fcov2<- matrix(0,5,5) for (i in 1:5) { for (j in 1:5) { ifelse(i == j,fcov2[i,j] <- invzocov[i,j],fcov2[i,j] <- (-1)*invzocov[i,j]) } } pcorr <- matrix(0,nrow=5,ncol=5) for (i in 1:5) { for (j in 1:5) { ifelse(i != j, pcorr[i,j] <- (-1)*invzocov[i,j]/sqrt(invzocov[i,i]*invzocov[j,j]), pcorr[i,j] <- 1) } } pcorr rm(i j) ######################################## ##first order coditional covariance ######################################## library(mimR) data(math) class(math) #math <- as.matrix(math) n <- ncol(math) focc <- list(matrix(rep(0,(n-2)*(n-2)), (n-2),(n-2))) for (i in 1:(n - 1)) { for (j in (i + 1):n) { temp <- 1:n k <- as.list(which(temp != i & temp != j)) rm(temp) for (h in 1:length(k) ) { d = as.numeric(k[h]) #traitnames <- as.list(colnames(math)) #traitnames <- c(traitnames[i],traitnames[j],traitnames[d], subdata <- math[,c(i,j,d)] cvsubdata <- cov(subdata, use = "all.obs", method = "pearson") invs <- solve(cvsubdata) #switch the sign of the off diagonal elements for (x in 1:3) { for (y in 1:3) { ifelse(x == y,invs[x,y] <- invs[x,y],invs[x,y] <- (-1)*invs[x,y]) } } for (m in 1:((n-1)*(n-1)*(n-2))) { focc[[m]] <- invs } rm(invs) } } } library(help="stats") math$L <- factor(NA,levels=1:2) gmd.math <- as.gmData(math) latent(gmd.math) <- "L" m1 <- mim("..", data = gmd.math, fit = FALSE) #source("http://bioconductor.org/getBioC.R") #getBioC("Rgraphviz") library(Rgraphviz) library(graph) display(m1) #sts <- momentstats(math[,1:5],means=NULL, covariance=TRUE) m2 <- editmim(m1,deleteEdge=paste(names(math)[1:5],collapse=':')) display(m2) m2f <- imputeMissing(m2) m2.imp <- retrieveData() m2f <- fit(m2) data(rats) gmd.rats <- as.gmData(rats) m2 <- mim("..", data=gmd.rats) display(m1) mf2 <- fit(m2) parms <- fitted(mf2) parms minfo <- modelInfo(mf2) minfo print() properties() library(mimR) # Create som models (no data needed!) gmd.rats.nodata <- gmData(c("Sex","Drug","W1","W2"), factor=c(2,3,FALSE,FALSE), vallabels=list("Sex"=c("M","F"), "Drug"=c("D1","D2","D3"))) m12 <- mim("Sex:Drug/Sex:Drug:W1+Sex:Drug:W2/W1:W2", data=gmd.rats.nodata) summary(m12) m.main <- mim(".", data=gmd.rats.nodata) m.sat <- mim("..", data=gmd.rats.nodata) m.hsat <- mim("..h", data=gmd.rats.nodata) summary(m.main); summary(m.sat); summary(m.hsat) # Next we need some data to work with data(rats) gmd.rats <- as.gmData(rats) vallabels(gmd.rats) observations(gmd.rats) m1 <- mim("Sex:Drug/Sex:Drug:W1+Sex:Drug:W2/W1:W2", data=gmd.rats) m.main <- mim(".", data=gmd.rats, marginal=c("Sex", "Drug", "W1")) m.sat <- mim("..", data=gmd.rats, marginal=c("Sex", "Drug", "W1")) m.hsat <- mim("..h", data=gmd.rats, marginal=c("Sex", "Drug", "W1")) m.f <- stepwise(m.main, "f") # forward display(m.m) display(m.sat) m.b <- stepwise(m.sat, "s") # backward, exact tests display(m.sat) m1f <- fit(m1) summary(m1f) m.main <- fit(mim(".", data=gmd.rats)) m.sat <- fit(mim("..", data=gmd.rats)) m.hsat <- fit(mim("..h", data=gmd.rats)) summary(m.main); summary(m.sat); summary(m.hsat) # To generate an nth order hierarchical log-linear model for discrete # data you can do data(HairEyeColor) mim(nthOrderModel(names(dimnames(HairEyeColor)),order=2),data=as.gmData(HairEyeColor))