## Data downloaded from http://www.affymetrix.com/analysis/download_center2.affx ## HUMAN GENOME U95 DATA SET, a subset of the data used to develop and validate MAS 5.0 ## 59 cell files are downloaded and saved to "C:\presentations\041215 tutorial\Affy\case study\data" ## install Bioconductor source("http://www.bioconductor.org/getBioC.R") ## this will download the getBioC functionality into your R session. getBioC("affy") ## this will install "affy" and "exprs" packages ## read in all cell files in the specified directory library("affy") ## load the affy package library("matchprobes") ## gcrma package requires matchprobes package library("gcrma") ## load gcrma package dataDir<-"C:\\presentations\\041215 tutorial\\Affy\\case study\\data" workingDir<-"C:\\presentations\\041215 tutorial\\Affy\\case study" fileNames<-dir(dataDir) fileNames<-paste(dataDir, "\\", fileNames, sep="") LatinData<-ReadAffy(filenames=fileNames[c(13:20)]) ## Arbitrarily select 8 arrays for analysis: "1521m99hpp_av06.CEL" "1521n99hpp_av06.CEL" "1521o99hpp_av06.CEL" "1521p99hpp_av06.CEL" ## "1521q99hpp_av06.CEL" "1521r99hpp_av06.CEL" "1521s99hpp_av06.CEL" "1521t99hpp_av06.CEL". The first four are replicates ## and the next four are another replicates ## compute expression measures by dChip (PM-MM model) LatinData.dchip<-expresso(LatinData, pmcorrect.method="subtractmm", bgcorrect.method="none", normalize.method="invariantset", summary.method="liwong") write.table(exprs(LatinData.dchip), paste(workingDir, "\\LatinData_dchip_PMMM.txt", sep=""), sep="\t") ## compute expression measures by dChip (PM only model) LatinData.dchip.PM<-expresso(LatinData, pmcorrect.method="pmonly", bgcorrect.method="none", normalize.method="invariantset", summary.method="liwong") write.table(exprs(LatinData.dchip.PM), paste(workingDir, "\\LatinData_dchip_PM.txt", sep=""), sep="\t") ## compute expression measures by MAS 5.0 LatinData.mas5<-expresso(LatinData, pmcorrect.method="mas", bgcorrect.method="mas", summary.method="mas") LatinData.mas5.norm<-affy.scalevalue.exprSet(LatinData.mas5) write.table(exprs(LatinData.mas5.norm), paste(workingDir, "\\LatinData_mas5_norm.txt", sep=""), sep="\t") ## compute expression measures by RMA LatinData.rma<-expresso(LatinData, bgcorrect.method="rma", normalize.method="quantiles", pmcorrect.method="pmonly", summary.method="medianpolish") write.table(exprs(LatinData.rma), paste(workingDir, "\\LatinData_rma.txt", sep=""), sep="\t") ## compute expression measure by gcRMA LatinData.gcrma<-gcrma(LatinData, type="affinities") write.table(exprs(LatinData.gcrma), paste(workingDir, "\\LatinData_gcrma_noMM.txt", sep=""), sep="\t") LatinData.gcrma.MM<-gcrma(LatinData, type="fullmodel") write.table(exprs(LatinData.gcrma.MM), paste(workingDir, "\\LatinData_gcrma_MM.txt", sep=""), sep="\t") evaluate<-function(exprs.all, names) { par(mfrow=c(3,2)) for(i in 1:length(exprs.all)) { a<-matrix(NA, nrow(exprs.all[[i]]), 2) a[,1]<-apply(exprs.all[[i]], 1, mean, na.rm=TRUE) a[,2]<-apply(exprs.all[[i]], 1, sd, na.rm=TRUE) plot(c(0, 12), c(0, 3), type="n", xlab="mean log intensities", ylab="sd log intensities") points(a[,1], a[,2], pch=".") title(names[i]) } } evaluate1<-function(exprs.all, names) { for(i in 1:length(exprs.all)) { a<-cor(exprs.all[[i]], use="pairwise.complete.obs") print(paste(names[i], ":", mean(a[a<1]))) } } ## get expression measure matrix for the first four replicates LatinData.mas5.norm.matrix<-log2(exprs(LatinData.mas5.norm)[,1:4]) LatinData.dchip.matrix<-log2(exprs(LatinData.dchip)[,1:4]) LatinData.dchip.PM.matrix<-log2(exprs(LatinData.dchip.PM)[,1:4]) LatinData.rma.matrix<-log2(exprs(LatinData.rma)[,1:4]) LatinData.rma.matrix1<-exprs(LatinData.rma)[,1:4] LatinData.gcrma.matrix<-exprs(LatinData.gcrma)[,1:4] LatinData.gcrma.MM.matrix<-exprs(LatinData.gcrma.MM)[,1:4] ## Plot evaluation figure. RMA plot looks strange. If look into the result, RMA seems to have taken log transformation jpeg(filename=paste(workingDir, "\\eval_replicate_1.jpg", sep=""), width=1024, height=768) exprs.all<-list(LatinData.mas5.norm.matrix, LatinData.dchip.matrix, LatinData.dchip.PM.matrix, LatinData.rma.matrix) evaluate(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA")) dev.off() evaluate1(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA")) ## Assume the RMA result has taken log transformation. Plot evaluation figure again. jpeg(filename=paste(workingDir, "\\eval_replicate_1_new.jpg", sep=""), width=1200, height=1024) exprs.all<-list(LatinData.mas5.norm.matrix, LatinData.dchip.matrix, LatinData.dchip.PM.matrix, LatinData.rma.matrix1, LatinData.gcrma.matrix, LatinData.gcrma.MM.matrix) evaluate(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA", "GCRMA (no MM)", "GCRMA (MM)")) dev.off() evaluate1(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA", "GCRMA(no MM)", "GCRMA(MM)")) ## print out of evaluate1 ##[1] "MAS5 : 0.893039182394166" ##[1] "dChip(PM/MM) : 0.960383080590284" ##[1] "dchip(PM only) : 0.99404264534601" ##[1] "RMA : 0.99781912159669" ##[1] "GCRMA(no MM) : 0.9992816227403" ##[1] "GCRMA(MM) : 0.998767869268427" ## get expression measure matrix for the next four replicates LatinData.mas5.norm.matrix<-log2(exprs(LatinData.mas5.norm)[,5:8]) LatinData.dchip.matrix<-log2(exprs(LatinData.dchip)[,5:8]) LatinData.dchip.PM.matrix<-log2(exprs(LatinData.dchip.PM)[,5:8]) LatinData.rma.matrix<-log2(exprs(LatinData.rma)[,5:8]) LatinData.rma.matrix1<-exprs(LatinData.rma)[,5:8] LatinData.gcrma.matrix<-exprs(LatinData.gcrma)[,5:8] LatinData.gcrma.MM.matrix<-exprs(LatinData.gcrma.MM)[,5:8] ## Assume the RMA result has taken log transformation. Plot evaluation figure again. jpeg(filename=paste(workingDir, "\\eval_replicate_2_new.jpg", sep=""), width=1200, height=1024) exprs.all<-list(LatinData.mas5.norm.matrix, LatinData.dchip.matrix, LatinData.dchip.PM.matrix, LatinData.rma.matrix1, LatinData.gcrma.matrix, LatinData.gcrma.MM.matrix) evaluate(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA", "GCRMA (no MM)", "GCRMA (MM)")) dev.off() evaluate1(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA", "GCRMA(no MM)", "GCRMA(MM)")) exprs.all<-list(LatinData.mas5.norm.matrix, LatinData.dchip.matrix, LatinData.dchip.PM.matrix, LatinData.rma.matrix1) evaluate(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA")) dev.off() evaluate1(exprs.all, c("MAS5","dChip(PM/MM)","dchip(PM only)","RMA")) ## print out of evaluate1 ##[1] "MAS5 : 0.900234102618892" ##[1] "dChip(PM/MM) : 0.962125798614082" ##[1] "dchip(PM only) : 0.996571922761332" ##[1] "RMA : 0.997819867501692" ##[1] "GCRMA(no MM) : 0.999446021907957" ##[1] "GCRMA(MM) : 0.998953186433171"