Mercurial > hg > LGDataverses
view DVN-web/installer/dvninstall/config/dvn_data_functions.R @ 6:1b2188262ae9
adding the installer.
author | "jurzua <jurzua@mpiwg-berlin.mpg.de>" |
---|---|
date | Wed, 13 May 2015 11:50:21 +0200 |
parents | |
children |
line wrap: on
line source
library(foreign) library(stats) library(methods) library(UNF) library(R2HTML) options(digits.secs = 3) ############ parameters ######################## univarstathdr<-c("Valid Cases", "Missing Cases(NAs)", "Total", "Mean", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Mode", "Minimum","1st Quartile","Median","3rd Quartile","Maximum","Range","Interquartile Range","Normality Test(Shapiro-Wilk Statistic)", "Normality Test(Shapiro-Wilk Statistic: p value)") imgprfx1<-c("<img src=\"http://") imgprfx2<-c("/nph-dmpJpg.pl?jpgfn=") imgsffx1<-c("\" >\n") imgsffx2<-c("\" >\n") ############# parameters ####################### # Note: # - The parameter na.strings is set to "NA", even though in the DVN tab files Missing Values are encoded as empty strings; # this may be some sort of a legacy thing (may be older files still had "NA"s in them as this was written?). After calling # this function, read.table141vdc, the DVN application classes (for ex., DvnRforeignFileConversionServiceImpl.java) make # another call to reset all the empties to NA. Some functions further down in this file also do that explicitly. # - I changed the strip.white parameter to FALSE (-- L.A., 05/07/2013); having it set to TRUE was resulting in the dropping # the empty entries that were supposed to represent Missing Values, when the subset contained a single numeric column, # no matter what the na.strings= was set to. read.table141vdc<-function (file, header = FALSE, sep = "\t", quote = "", dec = ".", col.names=NULL, na.strings = "NA",colClasses = NA, colClassesx = NA, nrows = -1, skip = 0, check.names = TRUE,fill = !blank.lines.skip, strip.white = FALSE, blank.lines.skip = FALSE, comment.char = "", varFormat=list()) { if (is.character(file)) { file <- file(file, "r") on.exit(close(file)) } if (!inherits(file, "connection")) stop("argument 'file' must be a character string or connection") if (!isOpen(file)) { open(file, "r") on.exit(close(file)) } if (skip > 0) readLines(file, skip) cols<- length(colClassesx) if (is.null(col.names)) col.names<-paste("V", 1:cols, sep = "") if(check.names) col.names <- make.names(col.names, unique = TRUE) what <- rep(list(""), cols) names(what) <- col.names known <- colClasses %in% c("logical", "integer", "numeric", "complex", "character") what[known] <- sapply(colClasses[known], do.call, list(0)) data <- scan(file = file, what = what, sep = sep, quote = quote, dec = dec, nmax = nrows, skip = 0, na.strings = na.strings, quiet = TRUE, fill = fill, strip.white = strip.white, blank.lines.skip = blank.lines.skip, multi.line = FALSE, comment.char = comment.char) nlines <- length(data[[1]]) if (cols != length(data)) { warning(paste("cols =", cols, " != length(data) =", length(data))) cols <- length(data) } #cat("colClassesx:\n") #cat(paste(class(colClassesx),"\n")) #cat(paste(colClassesx,"\n",sep=" ")) #cat(paste(class(varFormat),"\n")) #cat(paste(length(varFormat),"\n")) #cat("varFormat:\n") #cat(paste(varFormat,"\n",sep=" ")) saved.options <- options(digits.secs = 3) for (i in 1:cols) { #if (known[i]) next #data[[i]] <- as(data[[i]], colClasses[i]) #cat(paste(class(data[[i]]),"\n")) #cat(paste(mode(data[[i]]),"\n")) if (colClassesx[i] == 0) { # Make sure the character values are handled as such: #data[[i]]<-I(data[[i]]); data[[i]]<-as.character(data[[i]]); # And replace empty strings with NAs: data[[i]][ data[[i]] == '' ]<-NA # And remove the double quotes we had put around the non-missing # string values as they were stored in the TAB files: data[[i]]<-sub("^\"", "", data[[i]]) data[[i]]<-sub("\"$", "", data[[i]]) if (is.null(unlist(varFormat[col.names[i]]))){ #cat("before-s=",i, "\n") data[[i]] <- as(data[[i]], "character") #cat("after-s=",i, "\n") } else if (!is.null(unlist(varFormat[col.names[i]]))){ if (varFormat[col.names[i]] == 'D'){ #cat("before-d=",i, "\n") #data[[i]]<-as.Date(data[[i]], "%Y-%m-%d") data[[i]]<-as.Date(data[[i]]); #cat("after-d=",i, "\n") colClassesx[i]<-1 } else if (varFormat[col.names[i]] == 'T'){ #cat("before-t=",i,"\n") data[[i]]<-as.POSIXct(strptime(data[[i]], "%T")) #cat("after-t=", i,"\n") colClassesx[i]<-1 } else if (varFormat[col.names[i]] == 'DT'){ data[[i]]<-as.POSIXct(strptime(data[[i]], "%F %H:%M:%OS")) colClassesx[i]<-1 } else if (varFormat[col.names[i]] == 'JT'){ data[[i]]<-as.POSIXct(strptime(data[[i]], "%j %H:%M:%OS")) colClassesx[i]<-1 } } } else if (colClassesx[i] == 3) { # special case for Boolean/logical variables: # (these will be passed from the application as vectors of 0s and 1s) # also, note that this type will be used only when the subset is # created as part of the "save-as" functionality. When it's for # analysis, the DVN "boolean" variable will be of type 1, because # they will be handled as regular integer categoricals with the labels # "TRUE" and "FALSE". -- L.A. #print(data[[i]]) for (j in 1:length(data[[i]])) { if (!is.na(data[[i]][j]) && data[[i]][j] == "") { data[[i]][j]<-NA } } #print(data[[i]]) data[[i]]<-as.logical(as.numeric(data[[i]])) #print(data[[i]]) } else { data[[i]]<-type.convert(data[[i]], dec = dec) #cat("data[[", i, "]]:", class(data[[i]]), "\n", sep="") #if ( (class(data[[i]]) == "numeric") & (colClassesx[i]==1) ) { # colClassesx[i]<-2 #} } } options(saved.options) class(data) <- "data.frame" row.names(data) <- as.character(seq(len = nlines)) attr(data, "var.type")<-colClassesx #cat("end of read.table141vdc\n") data } # end of read.table141vdc transformrecoded <-function(x, recodedvarsindx = 2, dec = ".", col.names = NULL, colClassesx = undef, varFormat = list()){ #cat("inside transformrecoded\n") #cat(paste(col.names,"\n",sep="")) for (i in recodedvarsindx:length(x)) { #i = recodedindx[j] #cat("index: ") #cat(i) #cat("\n") #cat(paste(class(x[[i]]),"\n")) #cat(paste(mode(x[[i]]),"\n")) #cat(paste(varFormat[col.names[i]],"\n")) #cat(paste(unlist(varFormat[col.names[i]]),"\n")) testbool<-is.null(unlist(varFormat[col.names[i]])) #cat(as.character(testbool)) if (!is.null(unlist(varFormat[col.names[i]]))){ #cat("inside the if loop.\n") if (varFormat[col.names[i]] == 'D'){ x[[i]]<-as.Date(x[[i]]) #cat("x[[i]] is a Date;\n") colClassesx[i]<-1 } else if (varFormat[col.names[i]] == 'T'){ x[[i]]<-as.POSIXct(strptime(x[[i]], "%T")) colClassesx[i]<-1 } else if (varFormat[col.names[i]] == 'DT'){ x[[i]]<-as.POSIXct(strptime(x[[i]], "%F %H:%M:%OS")) colClassesx[i]<-1 } else if (varFormat[col.names[i]] == 'JT'){ x[[i]]<-as.POSIXct(strptime(x[[i]], "%j %H:%M:%OS")) colClassesx[i]<-1 } } } x } ########################################################### createvalindex <-function(dtfrm, attrname=NULL){ # this version relies on the list-based approach # completely new final [without old cod block] if (is.null(dtfrm)) { stop("dataframe is not specified\n") } else if (is.null(attrname)){ stop("attrname is is not specified\n") } else if (!exists('dtfrm')) { stop("dataframe is not found\n") } else if (!is.data.frame(dtfrm) ) { stop("Specified object is not a data.frame\n") } #DBG<-TRUE DBG<-FALSE try ( { if (attrname == 'val.index') { tabletype<-'val.table' valtable<-attr(dtfrm, 'val.table') } else if (attrname == 'missval.index') { tabletype<-'missval.table' valtable<-attr(dtfrm, 'missval.table') } else stop ("Specified attrname must be either val.index or missval.index\n") if (DBG) {cat("\nattribute name=",attrname,"\n")} if (length(valtable)) { vlindex <- list(); vlst <- list(); lstall<-list() vltbl<-list() if (DBG) { cat("length(",attrname,")=",length(valtable),"\n") cat("varidset(",attrname,")=",names(valtable),"\n") } nameset<-names(valtable) if (DBG) { str(nameset) cat("\nnameset:", paste(nameset,collapse="|"), "\n",sep="") } for (i in 1:(length(valtable))){ if (DBG) { cat("var=",i,"\n", sep="") cat("\tlstall:", paste(if (length(lstall)) {as.vector(lstall,mode="integer")} else {"empty"}, collapse=","), "\n",sep="") } nameseti<-nameset[i] if (!is.null(lstall[[as.character(i)]])){next} lsti<-list() # set i to the new list lsti[[as.character(i)]]<-i lstall[[as.character(i)]]<-i vlindex[[as.character(nameseti)]]<-nameset[i] vltbl[[as.character(nameseti)]]<-valtable[[i]] if (DBG) {cat("\tlsti:", paste(as.vector(lsti, mode="integer"),collapse=","), "\n",sep="")} for (j in i:length(valtable)){ if (!is.null(lstall[[as.character(j)]])){next} if (attrname == 'val.index') { if ( identical( names(valtable[[i]]), names(valtable[[j]]) ) & identical(valtable[[i]], valtable[[j]]) ) { if (DBG) {cat("\tVL:new duplicate (var#) to be added:", j,"\n",sep="")} lsti[[as.character(j)]]<-j vlindex[[as.character(nameset[j])]]<-nameseti lstall[[as.character(j)]]<-j } } else if (attrname == 'missval.index') { if ( identical(valtable[[i]], valtable[[j]]) ) { if (DBG) {cat("\tMSVL: new duplicate (var#) to be added:", j,"\n",sep="")} lsti[[as.character(j)]]<-j vlindex[[as.character(nameset[j])]]<-nameseti lstall[[as.character(j)]]<-j } } } if (DBG) {cat("\tlsti to be attached to vlst:", paste(as.vector(lsti, mode="integer"),collapse=","), "\n",sep="")} if (length(lsti)){ vlst[[nameseti]]<-nameset[as.vector(lsti, mode="integer")] } } if (DBG) { cat("\nvlst=attr(dtfrm,'val.list') <- vlst\n") str(vlst) cat("\nvlindex=attr(dtfrm,'val.index') <- vlindex\n") str(vlindex) cat("\nvltbl=attr(dtfrm,'val.table')<- valtablex\n") str(vltbl) cat("\nnames(vltbl): equivalent to tmpunique\n") cat("unique var IDs:", paste(names(vltbl),collapse="|"), "\n",sep="") } attr(dtfrm, attrname)<-vlindex if (attrname == 'val.index') { attr(dtfrm, 'val.list') <- vlst attr(dtfrm, 'val.table') <- vltbl } else if (attrname == 'missval.index') { attr(dtfrm, 'missval.list') <- vlst attr(dtfrm, 'missval.table')<-vltbl } } else { # no value labels #vlindex<-rep(NA, dim(dtfrm)[2]) attr(dtfrm, attrname)<-NULL if (attrname == 'val.index') { attr(dtfrm, 'val.list')<- NA } else if (attrname == 'missval.index') { attr(dtfrm, 'missval.list') <- NA } } invisible(dtfrm) }) # end try } # end of createvalindex ########################################################### # 2 table functions that return univariate statistics # continuous case frqtbl.ctn<-function(x){ frqtbl<-list() tbl1<-table(x, useNA='ifany') frqtbl[['Mode']]<-NA if (length(x) > length(tbl1)) { frqtbl[['Mode']]<- names(tbl1)[which.max(tbl1)] } frqtbl } frqtbl.dsc<-function(x){ frqtbl<-list() DBG<-FALSE # ftbl: frequency table ftbl<-table(x, useNA='ifany') # get the mode frqtbl[['Mode']]<-NA frqtbl[['freqtbl']]<-NA frqtbl[['pcnttbl']]<- NA if (length(x) > length(ftbl)){ frqtbl[['Mode']]<-names(ftbl[which.max(ftbl)]) if ((length(ftbl)<=50)){ # ptbl: percentage table ptbl<-100*(ftbl/sum(ftbl)) # set up the return list frqtbl[['freqtbl']]<- ftbl frqtbl[['pcnttbl']]<- ptbl if (DBG){ cat("\ttable header:",paste(dimnames(ftbl)[[1]], collapse='|'), "\n") cat("\ttable frequency:",paste(ftbl, collapse='|'), "\n") cat("\tstatistical mode:", frqtbl[['Mode']], "\n") cat("\tstatistical mode(freq):", tbl1[which.max(ftbl)], "\n") } } } frqtbl } sw.stat<-function(x,N){ DBG<-TRUE DBG<-FALSE SW<-list() SW$value <- NA SW$Pvalue <- NA if ((N >= 3) & (N <= 5000)) { shpr <- try(shapiro.test(x)) if (attr(shpr, "class") == 'htest') { if(DBG) {cat("sw statistics assigned\n")} SW$value <- shpr[[1]][[1]] SW$Pvalue <- shpr[[2]] } if(DBG) {cat("sw statistics end\n")} } SW } univarStat.cntn<-function(varseti){ options(digits=3) DBG<-TRUE DBG<-FALSE if(DBG) {cat("pass the point univStat(continuous)\n")} N<-sum(complete.cases(varseti)) svnm<-summary(varseti) if (N) { min.value <- svnm[[1]] q1.value <- svnm[[2]] #median.value <- median(varseti) median.value <- svnm[[3]] q3.value <- svnm[[5]] max.value <- svnm[[6]] range.value <- svnm[[6]]-svnm[[1]] iqr.value <- svnm[[5]]-svnm[[1]] mean.value <- svnm[[4]] } else { min.value <- NA q1.value <- NA median.value <- NA q3.value <- NA max.value <- NA range.value <- NA iqr.value <- NA mean.value <- NA } stdv.value <- sd(varseti, na.rm=T) z0 <- scale(varseti) if (N >= 2) {cv.value <- stdv.value/svnm[[4]] } else {cv.value <- NA} if (N >= 3) {skewness.value <- (N/(N-1)/(N-2))*sum((z0)^3, na.rm=T)} else {skewness.value <- NA} if (N >= 4) {kurtosis.value <- ((N*(N+1)/(N-1))*sum((z0)^4, na.rm=T) - 3*(N-1)^2)/(N-2)/(N-3)} else {kurtosis.value <-NA} # find the maximum frequency cell # index: which.max(table(dtfrm[[i]])) maxfreq<-frqtbl.ctn(x=varseti)[["Mode"]] SW<-sw.stat(x=varseti,N=N) statset<- list( Vald = N, Invald = sum(is.na(varseti)), Total = length(varseti), Mean = mean.value, Stdev = stdv.value, Skewness = skewness.value, Kurtosis = kurtosis.value, CV = cv.value, Mode = maxfreq, Minimum = min.value, Q1 = q1.value, Median = median.value, Q3 = q3.value, Maximum = max.value, Range = range.value, I.Q.R = iqr.value, S.W.statistic = SW$value, S.W.P.value = SW$Pvalue ) statset } univarStat.dscrt<-function(varseti, ordnl=TRUE){ DBG<-TRUE DBG<-FALSE if(DBG) {cat("pass the point univStat(discrete)\n")} N<-sum(complete.cases(varseti)) if (ordnl){ median.value <-NULL if (N) {median.value <- median(varseti, na.rm=TRUE) } } tmpfrq<-frqtbl.dsc(x=varseti) statset<- list( Vald = N, Invald = sum(is.na(varseti)), Total = length(varseti), Mode = tmpfrq[["Mode"]], freqtbl = tmpfrq[["freqtbl"]], pcnttbl = tmpfrq[["pcnttbl"]] ) if (ordnl){ statset$Median<-median.value } statset } univarStat<-function(dtfrm){ DBG<-TRUE DBG<-FALSE if(DBG) { cat("\n\nEntered the function univarStat\n") NAMESET<-names(dtfrm) } STATLST<-list() # create temp vars VARTYPE<-attr(dtfrm, "var.type") for (i in 1: dim(dtfrm)[2]) { try ({ varseti<-dtfrm[[i]] if(DBG) {cat("variable name =",NAMESET[i],"\n")} N<-sum(complete.cases(varseti)) if (VARTYPE[i]== 2) { STATLST[[as.character(i)]]<-univarStat.cntn(varseti=varseti) } else if (VARTYPE[i] == 1) { STATLST[[as.character(i)]]<-univarStat.dscrt(varseti=varseti) } else if (VARTYPE[i] == 0) { STATLST[[as.character(i)]]<-univarStat.dscrt(varseti=varseti,ordnl=FALSE) } else { STATLST[[as.character(i)]]<-NULL } }) # end of try } # end of the loop attr(dtfrm, "univarStat.lst")<-STATLST invisible(dtfrm) } # end of univarStat ########################################################### univarChart<-function(dtfrm, analysisoptn=NULL, imgflprfx=NULL, standalone=T){ # description # to print univariate charts # # arguments # dtfrm[[i]] variable name # analysisoptn Analysis option # imgflprfx temporary image file prefix # local variable # varlabel variable label (local variable) # No return value; each image file is written in /tmp # $RvlsPrfx = "$TMPDIR/Rvls.$PRCSSID"; # note: value labels will be printed in html tables # unvlst[[as.character(i)]]<-statset # new list-based notations # USL<-attr(dtfrm,"univarStat.lst") # chartset[["hstbx"]]<-hstgrmfile # chartset[["qqplt"]]<-qqpltfile # chartset[["brchrt"]]<-barpltfile # USL[[as.character(i)]][["freqtbl"]] # chrtlst[[as.character(i)]]<-chartset DBG<-FALSE #DBG<-TRUE if (is.null(analysisoptn)){ analysisoptn<-c(1,1,0) } if (is.null(imgflprfx)) { PRCID<-format(Sys.time(), "R%Y%m%d_%H%M%S") #imgflprfx<-paste("c:/asone/R/temp/",PRCID,sep="") imgflprfx<-PRCID if (DBG) {cat("\nprocessID=",imgflprfx,"\n", sep="")} } # function defintions varlabel.chrt<-function(lblset){ DBG<-FALSE #DBG<-TRUE # variable label processing if (DBG) {cat("\nEntered varlabel.chrt\n")} if (nchar(lblset[["varlabel"]])>45) { varlabel<- paste(substr(lblset[["varlabel"]], 1, 45), "...") } else { varlabel<-lblset[["varlabel"]] } lblset[["varlabel"]]<-paste(lblset[["varname"]], ": ", varlabel, sep="") lblset } univarChart.cntn<-function(varseti, imgflprfx, labelset) { DBG<-FALSE #DBG<-TRUE chartset<-list() if (DBG) {cat ("univarChart.cntn:varname:", labelset[["varname"]], "\n")} #histgram/boxplot hstgrmfile<-paste(imgflprfx, labelset[["varname"]],"hs.jpg", sep=".") bitmap(hstgrmfile, type = "jpeg", height = 3.5, width = 3, res=100, pointsize=9) layout(matrix(c(1,2),nrow=2,ncol=1), widths=c(1), heights=c(5,1)) par(mar=c(4,4,1,1), mgp=c(2, 0.5, 0), tcl=-0.25, cex.axis=0.9, cex.lab=0.9) hist(varseti, main="", xlab=labelset[["varlabel"]], col="lightgrey") par(mar=c(2,4,0,1)) boxplot(varseti, main="", xlab="", ylab="", col="lightgrey", horizontal=T) dev.off() #par(def.par) if (!standalone){ tmpvsldirhs<-unlist(strsplit(hstgrmfile,"/")) hstgrmfile<-paste(tmpvsldirhs[(length(tmpvsldirhs)-1):length(tmpvsldirhs)],collapse="/") } chartset[["hstbx"]]<-hstgrmfile #qq-plot qqpltfile<-paste(imgflprfx, labelset[["varname"]],"qq.jpg", sep=".") bitmap(qqpltfile, type = "jpeg", height = 3, width = 3, res= 100, pointsize=8.5) par(tcl=-0.25, cex.axis=0.9, cex.lab=1.0) qqnorm(varseti, main="Normal Q-Q Plot", ylab=labelset[["varlabel"]], pch=15) qqline(varseti) dev.off() #par(def.par) if (!standalone){ tmpvsldirqq<-unlist(strsplit(qqpltfile,"/")) qqpltfile<-paste(tmpvsldirqq[(length(tmpvsldirqq)-1):length(tmpvsldirqq)],collapse="/") } chartset[["qqplt"]]<-qqpltfile chartset } univarChart.dscrt<-function(frqtbl, imgflprfx, labelset){ DBG<-FALSE #DBG<-TRUE chartset<-list() if (DBG) {cat ("univarChart.dscrt:varname:", labelset[["varname"]], "\n")} barpltfile<-paste(imgflprfx, labelset[["varname"]], "bp.jpg", sep=".") bitmap(barpltfile, type = "jpeg", height = 3, width = 3, res= 100, pointsize=8.5) par(tcl=-0.25, cex.axis=0.9, cex.lab=1.0) barplot(frqtbl, col="lightgrey", main="", xlab=labelset[["varlabel"]], ylab="Frequency") dev.off() #par(def.par) if (!standalone){ tmpvsldirbp<-unlist(strsplit(barpltfile,"/")) barpltfile<-paste(tmpvsldirbp[(length(tmpvsldirbp)-1):length(tmpvsldirbp)],collapse="/") } chartset[["brchrt"]]<-barpltfile chartset } ############################ # implementation varlabels<-attr(dtfrm,"var.labels") varnames<-names(dtfrm) vartypes<-attr(dtfrm,"var.type") STATLST<-NULL if (!is.null(attr(dtfrm,"univarStat.lst"))) { STATLST<-attr(dtfrm,"univarStat.lst") } chrtlst<-list() for (i in 1: dim(dtfrm)[2]){ try( { if (DBG) {cat("univarChart:",i,"-th var\n")} chrtlbl<-list(varname=varnames[i], varlabel=varlabels[i]) labelset<-varlabel.chrt(lblset=chrtlbl) varseti<-dtfrm[[i]] if (is.null(STATLST[[as.character(i)]])) { tmpvald<-sum(complete.cases(varseti)) } else { tmpvald<-STATLST[[as.character(i)]][["Vald"]] } if (DBG) {cat("tmpvald=",tmpvald,"\n")} chartset<-list() if (vartypes[i]==2) { #Continuous Variable if (analysisoptn[2] & tmpvald) { chrtlst[[as.character(i)]]<-univarChart.cntn(varseti=varseti, imgflprfx=imgflprfx, labelset=labelset) } } else { #Discrete Variable #bar plot if (analysisoptn[2] & tmpvald ) { # chart option is chosen if (analysisoptn[1]){ # univariate statistics option is chosen -> freq table is available # note: univariate statistics option is not chosen, tmpfrqtbl is NA tmpfrqtbl<-STATLST[[as.character(i)]][["freqtbl"]] } else { # calculate statistics if (vartypes[i]==1) { statlst<-univarStat.dscrt(varseti=varseti) } else { statlst<-univarStat.dscrt(varseti=varseti,ordnl=FALSE) } tmpfrqtbl<-statlst[["freqtbl"]] } chartset[["brchrt"]]<-NA if( (length(tmpfrqtbl)<=10) & (length(tmpfrqtbl)>1) ) { chartset<-univarChart.dscrt(frqtbl=tmpfrqtbl, imgflprfx=imgflprfx, labelset=labelset) } else if (class(tmpfrqtbl)=="table") { # number of categories <= 50 # no chart but table if (!analysisoptn[1]){ STATLST[[as.character(i)]]<-statlst } } else if (is.na(tmpfrqtbl)) { # no table available if (!analysisoptn[1]){ STATLST[[as.character(i)]]<-statlst } } chrtlst[[as.character(i)]]<-chartset } } # end of D case }) # end of try } # end of var-wise-loop attr(dtfrm, "univarChart.lst")<-chrtlst if (is.null(attr(dtfrm,"univarStat.lst")) ) { attr(dtfrm,"univarStat.lst")<-STATLST } invisible(dtfrm) } # end of univarChart ####################################################################### univarStatHtml<-function(dtfrm, tmpimgfile, analysisoptn, tmphtmlfile, standalone=T){ # Description # # arguments # dtfrm variable furnished with attributes # tmpimgfile temporary image file prefix: =$SRVRCGI=$SERVER$CGIDIR # analysisoptn analysis option # nrows local variable # tmphtmlfile temporary html file # file tmphtmlfile DBG<-TRUE DBG<-FALSE # open the connection whtml<-file(tmphtmlfile, "w") on.exit(close(whtml)) # color parameters # legend: c(1:background, 2:table header, 3: table body(o), 4: table body(e)) # clschm <-c("#FFFFFF", "#CCFFCC","#e0ffff","#f0fff0") # green-based palette # blue-based palette #clschm <-c("#FFFFFF", "#e6e6fa","#ffffff","#f5f5f5") clschm <-c("dvnUnvStatTbl", "dvnUnvStatTblHdr","dvnUnvStatTblRowO","dvnUnvStatTblRowE") # table parameters # legend: c(border, cellspacing) tblprm <-c(0, 2) #cat("\nEntered the function univarStatHtml\n") # values for local tests # set localtest 0 after local tests localtest<-TRUE localtest<-FALSE if (localtest){ tmpimgfile<-c("") imgprfx1<-c("<img src=\"") imgprfx2<-c("") univarstathdr<-c("Valid Cases", "Invalid Cases(NAs)", "Total", "Mean", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Mode", "Minimum","1st Quartile","Median","3rd Quartile","Maximum","Range","Interquartile Range","Normality Test:Shapiro-Wilk Statistic", "(Shapiro-Wilk Statistic: p value)") } if (standalone) { imgflprfx<-paste(imgprfx1,tmpimgfile,imgprfx2,sep="") } else { imgflprfx<-"<img src=\"" } # constant for rendering a table for univariate statistics(continuous vars only) uslstlen<-length(univarstathdr) nrows <-ceiling(uslstlen/2) blnkcell<-uslstlen%%2==TRUE nameset<-names(dtfrm) varlabelset<-attr(dtfrm,"var.labels") CHRTLST<-attr(dtfrm, "univarChart.lst") STATLST<-attr(dtfrm, "univarStat.lst") VARTYPE<-attr(dtfrm, "var.type") VALINDEX<-attr(dtfrm, "val.index") VALTABLE<-attr(dtfrm, "val.table") pt.varheader<-function(namesi, varlabelsi=NA) {h3<-paste("<h3>", namesi, if (!is.na(varlabelsi)) {paste(": ", varlabelsi, sep="")}, "</h3>\n",sep="");h3} ################### # continuous case univarStatHtml.cntn<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi){ # statlst STATLST[[as.character(i)]] # imgfllst imgfllst=CHRTLST[[as.character(i)]] # cmbntn analysisoptn # function definition sections # create the first tr tag: chart part pt.tr1<-function(imgfllst, cmbntn){ tr1<-"" if (cmbntn[2]) { if (cmbntn[1]) { colspan<-" colspan=\"2\"" } else { colspan<-""} # both if(!is.null(imgfllst[["hstbx"]])){ tr1.l<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["hstbx"]],imgsffx1,"</td>\n",sep="") } else { tr1.l<-paste("<td",colspan,">\n<p><B><font color=red>Histogram/Boxplot Not Available</font></B></p>\n</td>\n") } if(!is.null(imgfllst[["qqplt"]])) { tr1.r<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["qqplt"]],imgsffx1,"</td>\n",sep="") } else { tr1.r<-paste("<td",colspan,">\n<p><B><font color=red>Normal Q-Q plot Not Available</font></B></p>\n</td>\n",sep="") } tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") } tr1 } # create the 2nd and thereafter tr tags: statistics part pt.tr2<-function(statlst, cmbntn){ tr2<-"" if (cmbntn[1]) { # statistics on # table header tr2<-paste("<tr class=\"",clschm[2],"\">\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n</tr>\n",sep="") # statistical data # when # of statistics is not even if (blnkcell){ univarstathdr[length(statlst)+1]<-" "} # table body for (j in 1:nrows) { if (j%%2==FALSE) colorprm <- clschm[3] else colorprm <-clschm[4] tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n", "<td align=\"left\">",univarstathdr[j],"</td>\n", "<td align=\"right\">", prettyNum(statlst[[j]]),"</td>\n", "<td align=\"left\">",univarstathdr[j+nrows],"</td>\n", "<td align=\"right\">", if ( (j==nrows) & (blnkcell) ) {" "} else {prettyNum(statlst[[j+nrows]])},"</td>\n</tr>\n", sep="") } } tr2 } # create the chart/statistics table segment pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst){ tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn) tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") tbl } # create per variable html segment pt.varunit.cntn<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} ## end of function definitions ## # implementation pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn) ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) ptvarunitc<-pt.varunit.cntn(vhdr=ptvarheader, vcntnts=pttbl) ptvarunitc } # end of continuous case ###################### # discrete case univarStatHtml.dscrt<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi, vltbl) { # statlst STATLST[[as.character(i)]] # imgfllst imgfllst=CHRTLST[[as.character(i)]] # cmbntn analysisoptn # function definition sections #statlst[["freqtbl"]] # mode and median even if a freq table is not available nrw<-3 # add one for "total" row #if (!is.na(statlst$freqtbl)) {nrw<-length(statlst$freqtbl)+1+nrw} if (class(statlst$freqtbl)=="table") {nrw<-length(statlst$freqtbl)+nrw} # nrws: rowspan parameter value if the chart option is chosen nrws<-nrw+1 pt.tr1<-function(imgfllst, cmbntn){ try({ # tr1.l: chart part tr1.l<-"" sprsstr1r<-FALSE if (cmbntn[2]) { rowspan<-"" if (cmbntn[1]) { rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") } if(!is.na(imgfllst[["brchrt"]])){ tr1.l<-paste("<td",rowspan," valign=\"top\">\n",imgflprfx,imgfllst[["brchrt"]], imgsffx1, "</td>\n", sep="") } else { if (class(statlst$freqtbl)=="table"){ rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") tr1.l<-paste("<td",rowspan," valign=\"top\">\n<p><B><small>The number of categories is more than 10 or equal to 1.<br>Table substitutes for Bar plot</small></B></p>\n</td>\n",sep="") cmbntn[1]<-1 } else { tr1.lm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") tr1.lhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") tr1.l<-paste(tr1.lm,tr1.lhdr, sep="") sprsstr1r<-TRUE } } } # tr1.r: freq/pcnt table header part tr1.r<-"" if (cmbntn[1]) { if (class(statlst$freqtbl)=="table"){ tr1.r<-paste("<td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") } else if (!sprsstr1r){ tr1.rm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") tr1.rhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") tr1.r<-paste(tr1.rm,tr1.rhdr, sep="") } } tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") }) # end of try } # create the 2nd and thereafter tr tags: statistics part pt.tr2<-function(statlst, cmbntn, vltbl, imgfllst){ try({ tr2<-"" tableon<-FALSE if ( cmbntn[2]){ if (is.na(imgfllst[["brchrt"]])){ tableon<-TRUE } } if (cmbntn[1] | tableon) { if (class(statlst$freqtbl)=="table") {tblkey<-names(statlst$freqtbl)} # if freqtbl is NA, tblkey becomes NULL for (j in 1:nrw) { if (j%%2==FALSE) { colorprm <- clschm[3]} else {colorprm <-clschm[4]} if (j < (nrw -2)) { catgrylbl<-"" if (!is.null(vltbl)){ if(!is.null(vltbl[[tblkey[j]]])) { catgrylbl<-paste("(",vltbl[[tblkey[j]]],")",sep="") } } tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">",tblkey[j],catgrylbl,"</td>\n<td align=\"right\">",statlst$freqtbl[[j]],"</td>\n<td align=\"right\">", signif(statlst$pcnttbl[[j]],3),"</td>\n</tr>\n", sep="") } else if (j == (nrw -2)) { #cat("entering the total row\n") tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">Total</td>\n<td align=\"right\">",statlst$Vald+statlst$Invald,"</td>\n<td align=\"right\">100</td>\n</tr>\n", sep="") } else if (j == (nrw -1)) { # median #cat("entering the median\n") median.vl<- "Not Available" median.lbl<-"" if (!is.null(statlst$Median)) { median.vl<- as.character(statlst$Median) if (!is.null(vltbl) && (nrw>3)){ if (!is.null(vltbl[[median.vl]])) { median.lbl<-paste("(",vltbl[[median.vl]],")",sep="") } } } tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Median</td>\n<td align=\"right\">",median.vl,"</td>\n<td align=\"right\">",median.lbl,"</td>\n</tr>\n", sep="") } else if (j == nrw) { # mode #cat("entering the Mode\n") mode.vl<-"Not Available" mode.lbl<-"" if (!is.null(statlst$Mode)) { mode.vl<-statlst$Mode if (!is.null(vltbl) && (nrw>3) ) { if (!is.null(vltbl[[mode.vl]])) { mode.lbl<-paste("(",vltbl[[mode.vl]], ")", sep="") } } } tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Mode</td>\n<td align=\"right\">",mode.vl,"</td>\n<td align=\"right\">",mode.lbl,"</td>\n</tr>\n", sep="") } } } tr2 }) # end of try } # create the chart/statistics table segment pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst,vltbl=vltbl){ try({ tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn, vltbl=vltbl,imgfllst=imgfllst) tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") tbl }) } # create per variable html segment pt.varunit.dscrt<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} ## end of function definitions ## # implementation try({ #cat("enters the discrete html body function\n", sep="") pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn, vltbl=vltbl) ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) ptvarunitd<-pt.varunit.dscrt(vhdr=ptvarheader, vcntnts=pttbl) ptvarunitd }) } # end of discrete case # main # implementation rawVarName <- nameset if (length(attr(dtfrm, "Rsafe2raw"))>0){ Rsafe2raw <- attr(dtfrm, "Rsafe2raw") for (i in 1:length(nameset)){ if (!is.null(Rsafe2raw[[nameset[i]]])){ rawVarName[i] <- Rsafe2raw[[nameset[i]]]; } } } for (i in 1:dim(dtfrm)[2]){ try({ if (VARTYPE[i]==2) { varsgmnt.c<-univarStatHtml.cntn(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i]) cat(file=whtml, varsgmnt.c, sep="") } else { if (DBG) {cat(i,"-th var before entering the discrete html function\n", sep="")} #cat("check the value table=",VALTABLE[[VALINDEX[[i]]]],"\n", sep="") if (is.null(VALINDEX[[as.character(i)]])){valtable<-NULL} else {valtable<-VALTABLE[[VALINDEX[[as.character(i)]]]]} varsgmnt.d<-univarStatHtml.dscrt(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i], vltbl=valtable) cat(file=whtml, varsgmnt.d, sep="") } }) # end of try } # end of var-wise for-loop } #end of the function univarStatHtml ########################################################### univarDataDwnld<-function(dtfrm, dwnldoptn, dsnprfx) { # dtfrm(=z1) dataset to be downloaded # dwnldoptn(=z2) data download option # dsnprfx(=z3) dataset name prefix # The portion of code immediately below has been added to # convert extra metadata, such as value labels, supplied in # the proprietary attributes (below) into standard R notations, # such as "comments" for variable labels and "factors" for # value labels. # # This is still work in progress! -- L.A. NAMESET<-names(dtfrm) VARLABELS<-attr(dtfrm,"var.labels") attr(x,"orig.names")<-attr(dtfrm,"var.labels") CHRTLST<-attr(dtfrm, "univarChart.lst") STATLST<-attr(dtfrm, "univarStat.lst") VARTYPE<-attr(dtfrm, "var.type") VALINDEX<-attr(dtfrm, "val.index") VALTABLE<-attr(dtfrm, "val.table") MISSVALINDEX <- attr(x,"missval.index") MISSVALTABLE <- attr(x,"missval.table") recodemiss<-TRUE recodefactors<-TRUE dropfactorlevels<-FALSE orderfactors<-TRUE for (i in 1:length(x)) { cat("inside the for loop\n") cat("class: ") cat(class(x[[i]])) cat("\n") # Recoding discrete, categorical variables as R factors; # But, (experimental...) only if there are value labels supplied. # This means, among other things, that an ingested R character, # or integer vector would stay a vector, and not a factor, # in a saved-as-R subset. # -- L.A. if (!is.null(VARTYPE) && VARTYPE[i]<2 && recodefactors) { # Additionally, if we are saving as Stata, we're only # recoding discrete numeric values (vartype 1), but not Strings. # This is because of the nature of factors in R. # TODO: add documentation for all of this! # -- L.A. #if ((dwnldoptn != 'D03') || (VARTYPE[i] == 1)) { if (((dwnldoptn != 'D03') || (VARTYPE[i] == 1)) && !(is.null(VALINDEX[[as.character(i)]]))) { if (is.null(VALINDEX[[as.character(i)]])) { vti <- NULL } else { vti <- VALTABLE[[VALINDEX[[as.character(i)]]]] cat(paste(class(vti),"\n")) cat(paste(length(vti),"\n")) cat(paste("VTI", vti, "\n", sep=" : ")) } if (dropfactorlevels) { vtilevels<-NULL } else { if (is.numeric(x[[i]])) { vtilevels<-as.numeric(names(vti)) } else { vtilevels<-names(vti) } } # save / re-attach date/time-related class name classToken <- class(x[[i]]) vlevsi <- as.list(sort(unique.default(c(x[[i]],vtilevels)))) if ((classToken[1] == "Date") || (classToken[1] == "POSIXt")) { class(vlevsi)<- classToken } names(vlevsi)<-vlevsi tmatch<-na.omit(match(names(vti),names(vlevsi))) if (length(tmatch)>0) { names(vlevsi)[tmatch] <- vti } mti<-integer(0); mti<-integer(0); if (recodemiss && !is.null(MISSVALINDEX[[as.character(i)]])) { mti<-MISSVALTABLE[[MISSVALINDEX[[as.character(i)]]]] tmatch<-na.omit(match(mti,vlevsi)) if (length(tmatch)>0) { vlevsi[tmatch]<-NULL } } # TODO: Add explicit nominal/ordinal/dichotomous information to meta data, instead # of assuming non-character vars are ordered if ((dwnldoptn == 'D04') && !(is.null(VALORDER[[as.character(i)]]))) { cat("ordered value labels supplied") x[[i]] <- factor(x[[i]], levels=VALORDER[[as.character(i)]], ordered=TRUE) } else { cat("no ordered value labels supplied\n") cat(paste(VARTYPE[i],"\n",sep="")) cat(paste(length(vlevsi),"\n",sep="")) orderedfct<-(orderfactors && VARTYPE[i]>0 && ((length(vlevsi)-length(mti)>2))) cat(paste(as.character(orderedfct),"\n", sep="")) paste("MTI", mti,"\n",sep=" : ") paste("VLEVSI", vlevsi,"\n",sep=" : ") x[[i]] <- factor(x[[i]], levels=vlevsi, labels=names(vlevsi), ordered=(orderfactors && VARTYPE[i]>0 && ((length(vlevsi)-length(mti)>2)))) } attr(x,"vlevsi")<-vlevsi; attr(x,"namesvlevsi")<-names(vlevsi); } } # try to add variable labels as R comments: (L.A. -- ?) comment(x[[i]]) <- VARLABELS[i] } # end of added recoding -- L.A. if (dwnldoptn == 'D01') { # In the future when a sample program file is attached to # a text file, col.names should be set to F to avoid # printing a variable list write.table(dtfrm, file=dsnprfx, sep="\t", row.names=F, na=".") } else if (dwnldoptn == 'D02') { # SPLUS: (L.A.) for (i in 1:length(x)) { if (class(x[[i]]) == 'AsIs'){ x[[i]]<-as.character(x[[i]]); } } #attach(dtfrm) dump('x', file=dsnprfx) #dump(ls(dtfrm), file=dsnprfx) #detach(dtfrm) } else if (dwnldoptn == 'D03') { # STATA: (L.A.) ## -- replaced dtfrm with x in the following paragraph; - L.A. (?) # truncate over-sized string variables MaxLenStringVar <- 127 vt <- attr(x, 'var.type') for (i in 1:length(vt)){ if (vt[i] == 0){ #cat(paste(i, "-th var is char type", sep=""), "\n") maxlen <- max(unlist(lapply(x[[i]],nchar))) if (maxlen > MaxLenStringVar) { #cat(paste(i, "-th var is over-sized string var", sep=""), "\n") x[[i]] <- strtrim(x[[i]], MaxLenStringVar) } } } write.dta(x, file=dsnprfx, version=7) } else if (dwnldoptn == 'D04') { # SAVE AS R WORKSPACE: (L.A.) save(x,file=dsnprfx) } } # end of univarDataDwnld.R ########################################################### sumStatTabAll.DDI<-function(dtfrm, file="", flid=1, jobNo=0, startno=1, novars=0){ # sumStatTabAll.DDI(dtfrm=x,file="/tmp/mry/00001/00001.stat.1.tab", flid=1) # arguments # dtfrm data frame furnished with attributes # file outp file (tab-delimited) # flid file ID # ordrDDI == 0 if no division of a job # constants entref<-c("&", "<", ">", "'", "\"") nmstr<-c("&","<", ">", "'",""") sumStatset<-c("mean", "medn", "mode", "vald", "invd", "min", "max", "stdev") DEBUG<-FALSE fileid<-paste("file", flid, sep="") #varIDprfx<-paste("v", flid, ".", sep="") # open the connection if (file!="") { if (jobNo==0){ wxml<-file(file, "w") } else if (jobNo >0) { wxml<-file(file, "a") } on.exit(close(wxml)) } else { stop("output file name is not specified\n") } # An auxiliary function that replaces the above five characters with the named entities chr2xtrf<-function(lbl){ for (i in 1:length(entref)) { lbl<-gsub(entref[i], nmstr[i], lbl, perl=T, useBytes = T) } } # xml printing up to the section 3 #if (jobNo<= 1 ){ # if (jobNo){novars<-"" } else {novars<-dim(dtfrm)[2]} if (jobNo == 0){ if (novars==0){novar<-"";} else if (novars > 0) {novar <-novars} cat(file=wxml, sep="",paste(dim(dtfrm)[1],novar,fileid,sep="\t"),"\n" ) } VARTYPE<-attr(dtfrm, "var.type") NAMES<-names(dtfrm) for (i in 1: dim(dtfrm)[2]){ # sumStatset<-c("mean", "medn", "mode", "vald", "invd", "min", "max", "stdev") tmpvari<-dtfrm[[i]] if(DEBUG) {cat("variable name =",NAMES[i],"\n")} if (VARTYPE[i] ==0){ # set '' to NA tmpvari[ tmpvari == ""]<-NA; tmpvari[ sub('^\\s+', '',tmpvari, perl = T, useBytes=T)==''] <-NA } tbl1<-table(tmpvari, useNA='ifany') N<-sum(complete.cases(tmpvari)) if (VARTYPE[i]== 2) { svnm<-summary(tmpvari) if (N) { min.value <- svnm[[1]] median.value <- median(tmpvari, na.rm=TRUE) max.value <- svnm[[6]] mean.value <- svnm[[4]] } else { min.value <- NA median.value <- NA max.value <- NA mean.value <- NA } stdv.value <- sd(tmpvari, na.rm=T) # find the maximum frequency cell # index: which.max(table(dtfrm[[i]])) maxfreq<-NA if (length(tmpvari) > length(tbl1)) { maxfreq<- names(tbl1)[which.max(tbl1)] } statset<- list( Mean = mean.value, Median = median.value, Mode.Value = maxfreq, Vald = N, Invald = sum(is.na(tmpvari)), Minimum = min.value, Maximum = max.value, Stdev = stdv.value ) #if (length(attr(tmpvari, 'Univariate'))==0){attr(dtfrm[[i]],"Univariate") <- statset} } else if ( (VARTYPE[i] < 2) & (VARTYPE[i] >=0) ){ if(DEBUG) {cat("pass the point univStat(discrete)\n")} statset<- list( Vald = N, Invald = (length(tmpvari) - N), Total = length(tmpvari) ) if (DEBUG){cat("\ttable header:",paste(dimnames(tbl1)[[1]], collapse='|'), "\n")} if (DEBUG){cat("\ttable frequency:",paste(tbl1, collapse='|'), "\n")} } else { if(DEBUG) {cat("out-of-range value", i ,"-th var =", VARTYPE[i],"\n")} } # section 4 pt.vr.1<-paste(NAMES[i],VARTYPE[i],sep='\t') pt.vr.st<-"" #iadj <- (i+startno-1) if (VARTYPE[i] == 2) { # continuous variable case #pt.vr.st<- if (!is.na(statset[[1]])) {statset[[1]]} else {"."} pt.vr.st<-"8" for (j in 1:length(sumStatset)) { pt.vr.st<-paste(pt.vr.st,if (!is.na(statset[[j]])) {statset[[j]]} else {"."}, sep="\t") } # if: end of continuous variable part } else { # discrete variable case # actual value table if (dim(tbl1)[1] <= 100) { # integer: how many value-freq sets follw? pt.vr.st<-dim(tbl1)[1] for (j in 1:(dim(tbl1)[1])) { # each value-freq set tmpvalue<-dimnames(tbl1)[[1]][j] #if (VARTYPE[i] == 0) {tmpvalue<-chr2xtrf(tmpvalue)} if (DEBUG) { cat("i=", i, "\tj=", j,"\ttmpvalue:", tmpvalue, "\n", sep="")} pt.vr.st<-paste(pt.vr.st,if (!is.na(tmpvalue)) {tmpvalue} else {"."}, tbl1[[j]],sep="\t") } } else { # for more-than-100-category vars, print 0 pt.vr.st<-0 } # for all cases, valid, invalid and N are printed for (k in 1:length(statset)){ pt.vr.st<-paste(pt.vr.st,if (!is.na(statset[[k]])) {statset[[k]]} else {"."}, sep="\t") } if (DEBUG) {cat("\n");} } # else: end of discrete case u <- unf(tmpvari, version=3) uxml<-paste(as.character(u),"\n",sep="") # dump this var cat(file=wxml,paste(pt.vr.1,pt.vr.st,uxml,sep="\t"),sep="") } # end of the variable-wise loop } #end of the sumStatTabAll.DDIx.R checkBinaryResponse<-function(binx){ bnryVarTbl <-attr(table(binx), 'dimnames')[[1]]; if (length(bnryVarTbl) == 2){ if ((bnryVarTbl[1] == 0) && (bnryVarTbl[2]==1)){ #cat('this variable is already 0-1\n'); } else { #cat('this variable needs the conversion\n'); #cat(paste( bnryVarTbl[1],' is recoded to 1; ', bnryVarTbl[2],' is recoded to 0;\n', sep='')); binx<-as.integer(binx == bnryVarTbl[1]); } } invisible(binx) } ####################################################################### univarStatHtmlBody<-function(dtfrm, whtml, analysisoptn, standalone=F){ # Description # # arguments # dtfrm variable furnished with attributes # tmpimgfile temporary image file prefix: =$SRVRCGI=$SERVER$CGIDIR # analysisoptn analysis option # nrows local variable # tmphtmlfile temporary html file # file tmphtmlfile DBG<-TRUE DBG<-FALSE # open the connection #whtml<-file(tmphtmlfile, "w") #on.exit(close(whtml)) # color parameters # legend: c(1:background, 2:table header, 3: table body(o), 4: table body(e)) # clschm <-c("#FFFFFF", "#CCFFCC","#e0ffff","#f0fff0") # green-based palette # blue-based palette #clschm <-c("#FFFFFF", "#e6e6fa","#ffffff","#f5f5f5") clschm <-c("dvnUnvStatTbl", "dvnUnvStatTblHdr","dvnUnvStatTblRowO","dvnUnvStatTblRowE") # table parameters # legend: c(border, cellspacing) tblprm <-c(0, 2) #cat("\nEntered the function univarStatHtml\n") # values for local tests # set localtest 0 after local tests localtest<-TRUE localtest<-FALSE if (localtest){ tmpimgfile<-c("") imgprfx1<-c("<img src=\"") imgprfx2<-c("") univarstathdr<-c("Valid Cases", "Invalid Cases(NAs)", "Total", "Mean", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Mode", "Minimum","1st Quartile","Median","3rd Quartile","Maximum","Range","Interquartile Range","Normality Test:Shapiro-Wilk Statistic", "(Shapiro-Wilk Statistic: p value)") } if (standalone) { imgflprfx<-paste(imgprfx1,tmpimgfile,imgprfx2,sep="") } else { imgflprfx<-"<img src=\"" } # constant for rendering a table for univariate statistics(continuous vars only) uslstlen<-length(univarstathdr) nrows <-ceiling(uslstlen/2) blnkcell<-uslstlen%%2==TRUE nameset<-names(dtfrm) varlabelset<-attr(dtfrm,"var.labels") CHRTLST<-attr(dtfrm, "univarChart.lst") STATLST<-attr(dtfrm, "univarStat.lst") VARTYPE<-attr(dtfrm, "var.type") VALINDEX<-attr(dtfrm, "val.index") VALTABLE<-attr(dtfrm, "val.table") pt.varheader<-function(namesi, varlabelsi=NA) {h3<-paste("<h3>", namesi, if (!is.na(varlabelsi)) {paste(": ", varlabelsi, sep="")}, "</h3>\n",sep="");h3} ################### # continuous case univarStatHtml.cntn<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi){ # statlst STATLST[[as.character(i)]] # imgfllst imgfllst=CHRTLST[[as.character(i)]] # cmbntn analysisoptn # function definition sections # create the first tr tag: chart part pt.tr1<-function(imgfllst, cmbntn){ tr1<-"" if (cmbntn[2]) { if (cmbntn[1]) { colspan<-" colspan=\"2\"" } else { colspan<-""} # both if(!is.null(imgfllst[["hstbx"]])){ tr1.l<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["hstbx"]],imgsffx1,"</td>\n",sep="") } else { tr1.l<-paste("<td",colspan,">\n<p><B><font color=red>Histogram/Boxplot Not Available</font></B></p>\n</td>\n") } if(!is.null(imgfllst[["qqplt"]])) { tr1.r<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["qqplt"]],imgsffx1,"</td>\n",sep="") } else { tr1.r<-paste("<td",colspan,">\n<p><B><font color=red>Normal Q-Q plot Not Available</font></B></p>\n</td>\n",sep="") } tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") } tr1 } # create the 2nd and thereafter tr tags: statistics part pt.tr2<-function(statlst, cmbntn){ tr2<-"" if (cmbntn[1]) { # statistics on # table header tr2<-paste("<tr class=\"",clschm[2],"\">\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n</tr>\n",sep="") # statistical data # when # of statistics is not even if (blnkcell){ univarstathdr[length(statlst)+1]<-" "} # table body for (j in 1:nrows) { if (j%%2==FALSE) colorprm <- clschm[3] else colorprm <-clschm[4] tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n", "<td align=\"left\">",univarstathdr[j],"</td>\n", "<td align=\"right\">", prettyNum(statlst[[j]]),"</td>\n", "<td align=\"left\">",univarstathdr[j+nrows],"</td>\n", "<td align=\"right\">", if ( (j==nrows) & (blnkcell) ) {" "} else {prettyNum(statlst[[j+nrows]])},"</td>\n</tr>\n", sep="") } } tr2 } # create the chart/statistics table segment pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst){ tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn) tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") tbl } # create per variable html segment pt.varunit.cntn<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} ## end of function definitions ## # implementation pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn) ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) ptvarunitc<-pt.varunit.cntn(vhdr=ptvarheader, vcntnts=pttbl) ptvarunitc } # end of continuous case ###################### # discrete case univarStatHtml.dscrt<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi, vltbl) { # statlst STATLST[[as.character(i)]] # imgfllst imgfllst=CHRTLST[[as.character(i)]] # cmbntn analysisoptn # function definition sections #statlst[["freqtbl"]] # mode and median even if a freq table is not available nrw<-3 # add one for "total" row #if (!is.na(statlst$freqtbl)) {nrw<-length(statlst$freqtbl)+1+nrw} if (class(statlst$freqtbl)=="table") {nrw<-length(statlst$freqtbl)+nrw} # nrws: rowspan parameter value if the chart option is chosen nrws<-nrw+1 pt.tr1<-function(imgfllst, cmbntn){ try({ # tr1.l: chart part tr1.l<-"" sprsstr1r<-FALSE if (cmbntn[2]) { rowspan<-"" if (cmbntn[1]) { rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") } if(!is.na(imgfllst[["brchrt"]])){ tr1.l<-paste("<td",rowspan," valign=\"top\">\n",imgflprfx,imgfllst[["brchrt"]], imgsffx1, "</td>\n", sep="") } else { if (class(statlst$freqtbl)=="table"){ rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") tr1.l<-paste("<td",rowspan," valign=\"top\">\n<p><B><small>The number of categories is more than 10 or equal to 1.<br>Table substitutes for Bar plot</small></B></p>\n</td>\n",sep="") cmbntn[1]<-1 } else { tr1.lm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") tr1.lhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") tr1.l<-paste(tr1.lm,tr1.lhdr, sep="") sprsstr1r<-TRUE } } } # tr1.r: freq/pcnt table header part tr1.r<-"" if (cmbntn[1]) { if (class(statlst$freqtbl)=="table"){ tr1.r<-paste("<td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") } else if (!sprsstr1r){ tr1.rm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") tr1.rhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") tr1.r<-paste(tr1.rm,tr1.rhdr, sep="") } } tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") }) # end of try } # create the 2nd and thereafter tr tags: statistics part pt.tr2<-function(statlst, cmbntn, vltbl, imgfllst){ try({ tr2<-"" tableon<-FALSE if ( cmbntn[2]){ if (is.na(imgfllst[["brchrt"]])){ tableon<-TRUE } } if (cmbntn[1] | tableon) { if (class(statlst$freqtbl)=="table") {tblkey<-names(statlst$freqtbl)} # if freqtbl is NA, tblkey becomes NULL for (j in 1:nrw) { if (j%%2==FALSE) { colorprm <- clschm[3]} else {colorprm <-clschm[4]} if (j < (nrw -2)) { catgrylbl<-"" if (!is.null(vltbl)){ if(!is.null(vltbl[[tblkey[j]]])) { catgrylbl<-paste("(",vltbl[[tblkey[j]]],")",sep="") } } tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">",tblkey[j],catgrylbl,"</td>\n<td align=\"right\">",statlst$freqtbl[[j]],"</td>\n<td align=\"right\">", signif(statlst$pcnttbl[[j]],3),"</td>\n</tr>\n", sep="") } else if (j == (nrw -2)) { #cat("entering the total row\n") tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">Total</td>\n<td align=\"right\">",statlst$Vald+statlst$Invald,"</td>\n<td align=\"right\">100</td>\n</tr>\n", sep="") } else if (j == (nrw -1)) { # median #cat("entering the median\n") median.vl<- "Not Available" median.lbl<-"" if (!is.null(statlst$Median)) { median.vl<- as.character(statlst$Median) if (!is.null(vltbl) && (nrw>3)){ if (!is.null(vltbl[[median.vl]])) { median.lbl<-paste("(",vltbl[[median.vl]],")",sep="") } } } tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Median</td>\n<td align=\"right\">",median.vl,"</td>\n<td align=\"right\">",median.lbl,"</td>\n</tr>\n", sep="") } else if (j == nrw) { # mode #cat("entering the Mode\n") mode.vl<-"Not Available" mode.lbl<-"" if (!is.null(statlst$Mode)) { mode.vl<-statlst$Mode if (!is.null(vltbl) && (nrw>3) ) { if (!is.null(vltbl[[mode.vl]])) { mode.lbl<-paste("(",vltbl[[mode.vl]], ")", sep="") } } } tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Mode</td>\n<td align=\"right\">",mode.vl,"</td>\n<td align=\"right\">",mode.lbl,"</td>\n</tr>\n", sep="") } } } tr2 }) # end of try } # create the chart/statistics table segment pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst,vltbl=vltbl){ try({ tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn, vltbl=vltbl,imgfllst=imgfllst) tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") tbl }) } # create per variable html segment pt.varunit.dscrt<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} ## end of function definitions ## # implementation try({ #cat("enters the discrete html body function\n", sep="") pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn, vltbl=vltbl) ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) ptvarunitd<-pt.varunit.dscrt(vhdr=ptvarheader, vcntnts=pttbl) ptvarunitd }) } # end of discrete case # main # implementation rawVarName <- nameset if (length(attr(dtfrm, "Rsafe2raw"))>0){ Rsafe2raw <- attr(dtfrm, "Rsafe2raw") for (i in 1:length(nameset)){ if (!is.null(Rsafe2raw[[nameset[i]]])){ rawVarName[i] <- Rsafe2raw[[nameset[i]]]; } } } for (i in 1:dim(dtfrm)[2]){ try({ if (VARTYPE[i]==2) { varsgmnt.c<-univarStatHtml.cntn(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i]) #cat(file=whtml, varsgmnt.c, sep="") HTML(file=whtml, varsgmnt.c) } else { if (DBG) {cat(i,"-th var before entering the discrete html function\n", sep="")} #cat("check the value table=",VALTABLE[[VALINDEX[[i]]]],"\n", sep="") if (is.null(VALINDEX[[as.character(i)]])){valtable<-NULL} else {valtable<-VALTABLE[[VALINDEX[[as.character(i)]]]]} varsgmnt.d<-univarStatHtml.dscrt(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i], vltbl=valtable) #cat(file=whtml, varsgmnt.d, sep="") HTML(file=whtml, varsgmnt.d) } }) # end of try } # end of var-wise for-loop } #end of the function univarStatHtml