Mercurial > hg > LGDataverses
comparison DVN-web/installer/dvninstall/config/dvn_data_functions.R @ 6:1b2188262ae9
adding the installer.
author | "jurzua <jurzua@mpiwg-berlin.mpg.de>" |
---|---|
date | Wed, 13 May 2015 11:50:21 +0200 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:dd9adfc73390 | 6:1b2188262ae9 |
---|---|
1 library(foreign) | |
2 library(stats) | |
3 library(methods) | |
4 library(UNF) | |
5 library(R2HTML) | |
6 | |
7 options(digits.secs = 3) | |
8 | |
9 | |
10 ############ parameters ######################## | |
11 univarstathdr<-c("Valid Cases", "Missing Cases(NAs)", "Total", "Mean", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Mode", "Minimum","1st Quartile","Median","3rd Quartile","Maximum","Range","Interquartile Range","Normality Test(Shapiro-Wilk Statistic)", "Normality Test(Shapiro-Wilk Statistic: p value)") | |
12 | |
13 imgprfx1<-c("<img src=\"http://") | |
14 imgprfx2<-c("/nph-dmpJpg.pl?jpgfn=") | |
15 imgsffx1<-c("\" >\n") | |
16 imgsffx2<-c("\" >\n") | |
17 | |
18 ############# parameters ####################### | |
19 # Note: | |
20 # - The parameter na.strings is set to "NA", even though in the DVN tab files Missing Values are encoded as empty strings; | |
21 # this may be some sort of a legacy thing (may be older files still had "NA"s in them as this was written?). After calling | |
22 # this function, read.table141vdc, the DVN application classes (for ex., DvnRforeignFileConversionServiceImpl.java) make | |
23 # another call to reset all the empties to NA. Some functions further down in this file also do that explicitly. | |
24 # - I changed the strip.white parameter to FALSE (-- L.A., 05/07/2013); having it set to TRUE was resulting in the dropping | |
25 # the empty entries that were supposed to represent Missing Values, when the subset contained a single numeric column, | |
26 # no matter what the na.strings= was set to. | |
27 | |
28 read.table141vdc<-function (file, header = FALSE, sep = "\t", quote = "", dec = ".", col.names=NULL, na.strings = "NA",colClasses = NA, colClassesx = NA, nrows = -1, skip = 0, check.names = TRUE,fill = !blank.lines.skip, strip.white = FALSE, blank.lines.skip = FALSE, comment.char = "", varFormat=list()) | |
29 { | |
30 if (is.character(file)) { | |
31 file <- file(file, "r") | |
32 on.exit(close(file)) | |
33 } | |
34 if (!inherits(file, "connection")) stop("argument 'file' must be a character string or connection") | |
35 if (!isOpen(file)) { | |
36 open(file, "r") | |
37 on.exit(close(file)) | |
38 } | |
39 if (skip > 0) readLines(file, skip) | |
40 | |
41 cols<- length(colClassesx) | |
42 if (is.null(col.names)) col.names<-paste("V", 1:cols, sep = "") | |
43 if(check.names) col.names <- make.names(col.names, unique = TRUE) | |
44 what <- rep(list(""), cols) | |
45 names(what) <- col.names | |
46 known <- colClasses %in% c("logical", "integer", "numeric", "complex", "character") | |
47 what[known] <- sapply(colClasses[known], do.call, list(0)) | |
48 | |
49 data <- scan(file = file, what = what, sep = sep, quote = quote, dec = dec, nmax = nrows, skip = 0, na.strings = na.strings, quiet = TRUE, fill = fill, strip.white = strip.white, blank.lines.skip = blank.lines.skip, multi.line = FALSE, comment.char = comment.char) | |
50 | |
51 nlines <- length(data[[1]]) | |
52 | |
53 if (cols != length(data)) { | |
54 warning(paste("cols =", cols, " != length(data) =", length(data))) | |
55 cols <- length(data) | |
56 } | |
57 | |
58 #cat("colClassesx:\n") | |
59 #cat(paste(class(colClassesx),"\n")) | |
60 #cat(paste(colClassesx,"\n",sep=" ")) | |
61 #cat(paste(class(varFormat),"\n")) | |
62 #cat(paste(length(varFormat),"\n")) | |
63 #cat("varFormat:\n") | |
64 #cat(paste(varFormat,"\n",sep=" ")) | |
65 | |
66 saved.options <- options(digits.secs = 3) | |
67 | |
68 for (i in 1:cols) { | |
69 #if (known[i]) next | |
70 #data[[i]] <- as(data[[i]], colClasses[i]) | |
71 #cat(paste(class(data[[i]]),"\n")) | |
72 #cat(paste(mode(data[[i]]),"\n")) | |
73 if (colClassesx[i] == 0) { | |
74 | |
75 # Make sure the character values are handled as such: | |
76 #data[[i]]<-I(data[[i]]); | |
77 data[[i]]<-as.character(data[[i]]); | |
78 # And replace empty strings with NAs: | |
79 data[[i]][ data[[i]] == '' ]<-NA | |
80 # And remove the double quotes we had put around the non-missing | |
81 # string values as they were stored in the TAB files: | |
82 | |
83 data[[i]]<-sub("^\"", "", data[[i]]) | |
84 data[[i]]<-sub("\"$", "", data[[i]]) | |
85 | |
86 if (is.null(unlist(varFormat[col.names[i]]))){ | |
87 #cat("before-s=",i, "\n") | |
88 data[[i]] <- as(data[[i]], "character") | |
89 #cat("after-s=",i, "\n") | |
90 } else if (!is.null(unlist(varFormat[col.names[i]]))){ | |
91 if (varFormat[col.names[i]] == 'D'){ | |
92 #cat("before-d=",i, "\n") | |
93 #data[[i]]<-as.Date(data[[i]], "%Y-%m-%d") | |
94 data[[i]]<-as.Date(data[[i]]); | |
95 #cat("after-d=",i, "\n") | |
96 colClassesx[i]<-1 | |
97 } else if (varFormat[col.names[i]] == 'T'){ | |
98 #cat("before-t=",i,"\n") | |
99 data[[i]]<-as.POSIXct(strptime(data[[i]], "%T")) | |
100 #cat("after-t=", i,"\n") | |
101 colClassesx[i]<-1 | |
102 } else if (varFormat[col.names[i]] == 'DT'){ | |
103 data[[i]]<-as.POSIXct(strptime(data[[i]], "%F %H:%M:%OS")) | |
104 colClassesx[i]<-1 | |
105 } else if (varFormat[col.names[i]] == 'JT'){ | |
106 data[[i]]<-as.POSIXct(strptime(data[[i]], "%j %H:%M:%OS")) | |
107 colClassesx[i]<-1 | |
108 } | |
109 } | |
110 } else if (colClassesx[i] == 3) { | |
111 | |
112 # special case for Boolean/logical variables: | |
113 # (these will be passed from the application as vectors of 0s and 1s) | |
114 # also, note that this type will be used only when the subset is | |
115 # created as part of the "save-as" functionality. When it's for | |
116 # analysis, the DVN "boolean" variable will be of type 1, because | |
117 # they will be handled as regular integer categoricals with the labels | |
118 # "TRUE" and "FALSE". -- L.A. | |
119 #print(data[[i]]) | |
120 | |
121 for (j in 1:length(data[[i]])) { | |
122 if (!is.na(data[[i]][j]) && data[[i]][j] == "") { | |
123 data[[i]][j]<-NA | |
124 } | |
125 } | |
126 | |
127 #print(data[[i]]) | |
128 | |
129 data[[i]]<-as.logical(as.numeric(data[[i]])) | |
130 #print(data[[i]]) | |
131 | |
132 | |
133 } else { | |
134 data[[i]]<-type.convert(data[[i]], dec = dec) | |
135 #cat("data[[", i, "]]:", class(data[[i]]), "\n", sep="") | |
136 #if ( (class(data[[i]]) == "numeric") & (colClassesx[i]==1) ) { | |
137 # colClassesx[i]<-2 | |
138 #} | |
139 } | |
140 } | |
141 | |
142 options(saved.options) | |
143 | |
144 class(data) <- "data.frame" | |
145 row.names(data) <- as.character(seq(len = nlines)) | |
146 attr(data, "var.type")<-colClassesx | |
147 #cat("end of read.table141vdc\n") | |
148 data | |
149 } # end of read.table141vdc | |
150 | |
151 transformrecoded <-function(x, recodedvarsindx = 2, dec = ".", col.names = NULL, colClassesx = undef, varFormat = list()){ | |
152 | |
153 #cat("inside transformrecoded\n") | |
154 #cat(paste(col.names,"\n",sep="")) | |
155 | |
156 for (i in recodedvarsindx:length(x)) { | |
157 | |
158 #i = recodedindx[j] | |
159 #cat("index: ") | |
160 #cat(i) | |
161 #cat("\n") | |
162 | |
163 #cat(paste(class(x[[i]]),"\n")) | |
164 #cat(paste(mode(x[[i]]),"\n")) | |
165 | |
166 #cat(paste(varFormat[col.names[i]],"\n")) | |
167 #cat(paste(unlist(varFormat[col.names[i]]),"\n")) | |
168 | |
169 testbool<-is.null(unlist(varFormat[col.names[i]])) | |
170 #cat(as.character(testbool)) | |
171 | |
172 | |
173 if (!is.null(unlist(varFormat[col.names[i]]))){ | |
174 #cat("inside the if loop.\n") | |
175 if (varFormat[col.names[i]] == 'D'){ | |
176 x[[i]]<-as.Date(x[[i]]) | |
177 #cat("x[[i]] is a Date;\n") | |
178 colClassesx[i]<-1 | |
179 } else if (varFormat[col.names[i]] == 'T'){ | |
180 x[[i]]<-as.POSIXct(strptime(x[[i]], "%T")) | |
181 colClassesx[i]<-1 | |
182 } else if (varFormat[col.names[i]] == 'DT'){ | |
183 x[[i]]<-as.POSIXct(strptime(x[[i]], "%F %H:%M:%OS")) | |
184 colClassesx[i]<-1 | |
185 } else if (varFormat[col.names[i]] == 'JT'){ | |
186 x[[i]]<-as.POSIXct(strptime(x[[i]], "%j %H:%M:%OS")) | |
187 colClassesx[i]<-1 | |
188 } | |
189 } | |
190 } | |
191 x | |
192 } | |
193 | |
194 ########################################################### | |
195 createvalindex <-function(dtfrm, attrname=NULL){ | |
196 # this version relies on the list-based approach | |
197 # completely new final [without old cod block] | |
198 if (is.null(dtfrm)) { | |
199 stop("dataframe is not specified\n") | |
200 } else if (is.null(attrname)){ | |
201 stop("attrname is is not specified\n") | |
202 } else if (!exists('dtfrm')) { | |
203 stop("dataframe is not found\n") | |
204 } else if (!is.data.frame(dtfrm) ) { | |
205 stop("Specified object is not a data.frame\n") | |
206 } | |
207 | |
208 #DBG<-TRUE | |
209 DBG<-FALSE | |
210 try ( { | |
211 if (attrname == 'val.index') { | |
212 tabletype<-'val.table' | |
213 valtable<-attr(dtfrm, 'val.table') | |
214 } else if (attrname == 'missval.index') { | |
215 tabletype<-'missval.table' | |
216 valtable<-attr(dtfrm, 'missval.table') | |
217 } else stop ("Specified attrname must be either val.index or missval.index\n") | |
218 | |
219 if (DBG) {cat("\nattribute name=",attrname,"\n")} | |
220 if (length(valtable)) { | |
221 vlindex <- list(); | |
222 vlst <- list(); | |
223 lstall<-list() | |
224 vltbl<-list() | |
225 if (DBG) { | |
226 cat("length(",attrname,")=",length(valtable),"\n") | |
227 cat("varidset(",attrname,")=",names(valtable),"\n") | |
228 } | |
229 nameset<-names(valtable) | |
230 if (DBG) { | |
231 str(nameset) | |
232 cat("\nnameset:", paste(nameset,collapse="|"), "\n",sep="") | |
233 } | |
234 for (i in 1:(length(valtable))){ | |
235 if (DBG) { | |
236 cat("var=",i,"\n", sep="") | |
237 cat("\tlstall:", paste(if (length(lstall)) {as.vector(lstall,mode="integer")} else {"empty"}, collapse=","), "\n",sep="") | |
238 } | |
239 nameseti<-nameset[i] | |
240 if (!is.null(lstall[[as.character(i)]])){next} | |
241 lsti<-list() | |
242 | |
243 # set i to the new list | |
244 lsti[[as.character(i)]]<-i | |
245 lstall[[as.character(i)]]<-i | |
246 vlindex[[as.character(nameseti)]]<-nameset[i] | |
247 vltbl[[as.character(nameseti)]]<-valtable[[i]] | |
248 | |
249 if (DBG) {cat("\tlsti:", paste(as.vector(lsti, mode="integer"),collapse=","), "\n",sep="")} | |
250 for (j in i:length(valtable)){ | |
251 if (!is.null(lstall[[as.character(j)]])){next} | |
252 if (attrname == 'val.index') { | |
253 if ( identical( names(valtable[[i]]), names(valtable[[j]]) ) & identical(valtable[[i]], valtable[[j]]) ) { | |
254 if (DBG) {cat("\tVL:new duplicate (var#) to be added:", j,"\n",sep="")} | |
255 lsti[[as.character(j)]]<-j | |
256 vlindex[[as.character(nameset[j])]]<-nameseti | |
257 lstall[[as.character(j)]]<-j | |
258 } | |
259 } else if (attrname == 'missval.index') { | |
260 if ( identical(valtable[[i]], valtable[[j]]) ) { | |
261 if (DBG) {cat("\tMSVL: new duplicate (var#) to be added:", j,"\n",sep="")} | |
262 lsti[[as.character(j)]]<-j | |
263 vlindex[[as.character(nameset[j])]]<-nameseti | |
264 lstall[[as.character(j)]]<-j | |
265 } | |
266 } | |
267 } | |
268 if (DBG) {cat("\tlsti to be attached to vlst:", paste(as.vector(lsti, mode="integer"),collapse=","), "\n",sep="")} | |
269 if (length(lsti)){ | |
270 vlst[[nameseti]]<-nameset[as.vector(lsti, mode="integer")] | |
271 } | |
272 } | |
273 if (DBG) { | |
274 cat("\nvlst=attr(dtfrm,'val.list') <- vlst\n") | |
275 str(vlst) | |
276 cat("\nvlindex=attr(dtfrm,'val.index') <- vlindex\n") | |
277 str(vlindex) | |
278 cat("\nvltbl=attr(dtfrm,'val.table')<- valtablex\n") | |
279 str(vltbl) | |
280 cat("\nnames(vltbl): equivalent to tmpunique\n") | |
281 cat("unique var IDs:", paste(names(vltbl),collapse="|"), "\n",sep="") | |
282 } | |
283 attr(dtfrm, attrname)<-vlindex | |
284 | |
285 if (attrname == 'val.index') { | |
286 attr(dtfrm, 'val.list') <- vlst | |
287 attr(dtfrm, 'val.table') <- vltbl | |
288 } else if (attrname == 'missval.index') { | |
289 attr(dtfrm, 'missval.list') <- vlst | |
290 attr(dtfrm, 'missval.table')<-vltbl | |
291 } | |
292 | |
293 } else { | |
294 # no value labels | |
295 #vlindex<-rep(NA, dim(dtfrm)[2]) | |
296 attr(dtfrm, attrname)<-NULL | |
297 if (attrname == 'val.index') { | |
298 attr(dtfrm, 'val.list')<- NA | |
299 } else if (attrname == 'missval.index') { | |
300 attr(dtfrm, 'missval.list') <- NA | |
301 } | |
302 } | |
303 | |
304 invisible(dtfrm) | |
305 }) # end try | |
306 } # end of createvalindex | |
307 | |
308 ########################################################### | |
309 # 2 table functions that return univariate statistics | |
310 # continuous case | |
311 | |
312 frqtbl.ctn<-function(x){ | |
313 frqtbl<-list() | |
314 tbl1<-table(x, useNA='ifany') | |
315 frqtbl[['Mode']]<-NA | |
316 if (length(x) > length(tbl1)) { | |
317 frqtbl[['Mode']]<- names(tbl1)[which.max(tbl1)] | |
318 } | |
319 frqtbl | |
320 } | |
321 | |
322 frqtbl.dsc<-function(x){ | |
323 frqtbl<-list() | |
324 DBG<-FALSE | |
325 | |
326 # ftbl: frequency table | |
327 ftbl<-table(x, useNA='ifany') | |
328 | |
329 # get the mode | |
330 frqtbl[['Mode']]<-NA | |
331 frqtbl[['freqtbl']]<-NA | |
332 frqtbl[['pcnttbl']]<- NA | |
333 if (length(x) > length(ftbl)){ | |
334 frqtbl[['Mode']]<-names(ftbl[which.max(ftbl)]) | |
335 if ((length(ftbl)<=50)){ | |
336 # ptbl: percentage table | |
337 ptbl<-100*(ftbl/sum(ftbl)) | |
338 # set up the return list | |
339 frqtbl[['freqtbl']]<- ftbl | |
340 frqtbl[['pcnttbl']]<- ptbl | |
341 if (DBG){ | |
342 cat("\ttable header:",paste(dimnames(ftbl)[[1]], collapse='|'), "\n") | |
343 cat("\ttable frequency:",paste(ftbl, collapse='|'), "\n") | |
344 cat("\tstatistical mode:", frqtbl[['Mode']], "\n") | |
345 cat("\tstatistical mode(freq):", tbl1[which.max(ftbl)], "\n") | |
346 } | |
347 } | |
348 } | |
349 | |
350 frqtbl | |
351 } | |
352 | |
353 sw.stat<-function(x,N){ | |
354 DBG<-TRUE | |
355 DBG<-FALSE | |
356 SW<-list() | |
357 SW$value <- NA | |
358 SW$Pvalue <- NA | |
359 if ((N >= 3) & (N <= 5000)) { | |
360 shpr <- try(shapiro.test(x)) | |
361 if (attr(shpr, "class") == 'htest') { | |
362 if(DBG) {cat("sw statistics assigned\n")} | |
363 SW$value <- shpr[[1]][[1]] | |
364 SW$Pvalue <- shpr[[2]] | |
365 } | |
366 if(DBG) {cat("sw statistics end\n")} | |
367 } | |
368 SW | |
369 } | |
370 | |
371 univarStat.cntn<-function(varseti){ | |
372 options(digits=3) | |
373 DBG<-TRUE | |
374 DBG<-FALSE | |
375 if(DBG) {cat("pass the point univStat(continuous)\n")} | |
376 | |
377 N<-sum(complete.cases(varseti)) | |
378 svnm<-summary(varseti) | |
379 | |
380 if (N) { | |
381 min.value <- svnm[[1]] | |
382 q1.value <- svnm[[2]] | |
383 #median.value <- median(varseti) | |
384 median.value <- svnm[[3]] | |
385 q3.value <- svnm[[5]] | |
386 max.value <- svnm[[6]] | |
387 range.value <- svnm[[6]]-svnm[[1]] | |
388 iqr.value <- svnm[[5]]-svnm[[1]] | |
389 mean.value <- svnm[[4]] | |
390 } else { | |
391 min.value <- NA | |
392 q1.value <- NA | |
393 median.value <- NA | |
394 q3.value <- NA | |
395 max.value <- NA | |
396 range.value <- NA | |
397 iqr.value <- NA | |
398 mean.value <- NA | |
399 } | |
400 | |
401 stdv.value <- sd(varseti, na.rm=T) | |
402 z0 <- scale(varseti) | |
403 if (N >= 2) {cv.value <- stdv.value/svnm[[4]] } else {cv.value <- NA} | |
404 if (N >= 3) {skewness.value <- (N/(N-1)/(N-2))*sum((z0)^3, na.rm=T)} else {skewness.value <- NA} | |
405 if (N >= 4) {kurtosis.value <- ((N*(N+1)/(N-1))*sum((z0)^4, na.rm=T) - 3*(N-1)^2)/(N-2)/(N-3)} else {kurtosis.value <-NA} | |
406 # find the maximum frequency cell | |
407 # index: which.max(table(dtfrm[[i]])) | |
408 | |
409 maxfreq<-frqtbl.ctn(x=varseti)[["Mode"]] | |
410 SW<-sw.stat(x=varseti,N=N) | |
411 statset<- list( | |
412 Vald = N, | |
413 Invald = sum(is.na(varseti)), | |
414 Total = length(varseti), | |
415 Mean = mean.value, | |
416 Stdev = stdv.value, | |
417 Skewness = skewness.value, | |
418 Kurtosis = kurtosis.value, | |
419 CV = cv.value, | |
420 Mode = maxfreq, | |
421 Minimum = min.value, | |
422 Q1 = q1.value, | |
423 Median = median.value, | |
424 Q3 = q3.value, | |
425 Maximum = max.value, | |
426 Range = range.value, | |
427 I.Q.R = iqr.value, | |
428 S.W.statistic = SW$value, | |
429 S.W.P.value = SW$Pvalue | |
430 ) | |
431 statset | |
432 } | |
433 | |
434 univarStat.dscrt<-function(varseti, ordnl=TRUE){ | |
435 DBG<-TRUE | |
436 DBG<-FALSE | |
437 | |
438 if(DBG) {cat("pass the point univStat(discrete)\n")} | |
439 N<-sum(complete.cases(varseti)) | |
440 if (ordnl){ | |
441 median.value <-NULL | |
442 if (N) {median.value <- median(varseti, na.rm=TRUE) } | |
443 } | |
444 tmpfrq<-frqtbl.dsc(x=varseti) | |
445 | |
446 statset<- list( | |
447 Vald = N, | |
448 Invald = sum(is.na(varseti)), | |
449 Total = length(varseti), | |
450 Mode = tmpfrq[["Mode"]], | |
451 freqtbl = tmpfrq[["freqtbl"]], | |
452 pcnttbl = tmpfrq[["pcnttbl"]] | |
453 ) | |
454 if (ordnl){ | |
455 statset$Median<-median.value | |
456 } | |
457 statset | |
458 } | |
459 | |
460 | |
461 univarStat<-function(dtfrm){ | |
462 DBG<-TRUE | |
463 DBG<-FALSE | |
464 if(DBG) { | |
465 cat("\n\nEntered the function univarStat\n") | |
466 NAMESET<-names(dtfrm) | |
467 } | |
468 | |
469 STATLST<-list() | |
470 | |
471 # create temp vars | |
472 VARTYPE<-attr(dtfrm, "var.type") | |
473 for (i in 1: dim(dtfrm)[2]) { | |
474 try ({ | |
475 varseti<-dtfrm[[i]] | |
476 | |
477 if(DBG) {cat("variable name =",NAMESET[i],"\n")} | |
478 | |
479 N<-sum(complete.cases(varseti)) | |
480 | |
481 if (VARTYPE[i]== 2) { | |
482 | |
483 STATLST[[as.character(i)]]<-univarStat.cntn(varseti=varseti) | |
484 | |
485 } else if (VARTYPE[i] == 1) { | |
486 | |
487 STATLST[[as.character(i)]]<-univarStat.dscrt(varseti=varseti) | |
488 | |
489 } else if (VARTYPE[i] == 0) { | |
490 | |
491 STATLST[[as.character(i)]]<-univarStat.dscrt(varseti=varseti,ordnl=FALSE) | |
492 | |
493 } else { | |
494 | |
495 STATLST[[as.character(i)]]<-NULL | |
496 | |
497 } | |
498 | |
499 }) # end of try | |
500 } # end of the loop | |
501 | |
502 attr(dtfrm, "univarStat.lst")<-STATLST | |
503 | |
504 invisible(dtfrm) | |
505 } # end of univarStat | |
506 ########################################################### | |
507 univarChart<-function(dtfrm, analysisoptn=NULL, imgflprfx=NULL, standalone=T){ | |
508 # description | |
509 # to print univariate charts | |
510 # | |
511 # arguments | |
512 # dtfrm[[i]] variable name | |
513 # analysisoptn Analysis option | |
514 # imgflprfx temporary image file prefix | |
515 | |
516 # local variable | |
517 # varlabel variable label (local variable) | |
518 # No return value; each image file is written in /tmp | |
519 # $RvlsPrfx = "$TMPDIR/Rvls.$PRCSSID"; | |
520 # note: value labels will be printed in html tables | |
521 # unvlst[[as.character(i)]]<-statset | |
522 | |
523 # new list-based notations | |
524 # USL<-attr(dtfrm,"univarStat.lst") | |
525 # chartset[["hstbx"]]<-hstgrmfile | |
526 # chartset[["qqplt"]]<-qqpltfile | |
527 # chartset[["brchrt"]]<-barpltfile | |
528 # USL[[as.character(i)]][["freqtbl"]] | |
529 # chrtlst[[as.character(i)]]<-chartset | |
530 | |
531 | |
532 DBG<-FALSE | |
533 #DBG<-TRUE | |
534 if (is.null(analysisoptn)){ | |
535 analysisoptn<-c(1,1,0) | |
536 } | |
537 | |
538 if (is.null(imgflprfx)) { | |
539 PRCID<-format(Sys.time(), "R%Y%m%d_%H%M%S") | |
540 #imgflprfx<-paste("c:/asone/R/temp/",PRCID,sep="") | |
541 imgflprfx<-PRCID | |
542 if (DBG) {cat("\nprocessID=",imgflprfx,"\n", sep="")} | |
543 } | |
544 | |
545 | |
546 | |
547 # function defintions | |
548 | |
549 | |
550 | |
551 varlabel.chrt<-function(lblset){ | |
552 DBG<-FALSE | |
553 #DBG<-TRUE | |
554 # variable label processing | |
555 if (DBG) {cat("\nEntered varlabel.chrt\n")} | |
556 | |
557 if (nchar(lblset[["varlabel"]])>45) { | |
558 varlabel<- paste(substr(lblset[["varlabel"]], 1, 45), "...") | |
559 } else { | |
560 varlabel<-lblset[["varlabel"]] | |
561 } | |
562 lblset[["varlabel"]]<-paste(lblset[["varname"]], ": ", varlabel, sep="") | |
563 lblset | |
564 } | |
565 | |
566 | |
567 | |
568 univarChart.cntn<-function(varseti, imgflprfx, labelset) { | |
569 DBG<-FALSE | |
570 #DBG<-TRUE | |
571 chartset<-list() | |
572 | |
573 if (DBG) {cat ("univarChart.cntn:varname:", labelset[["varname"]], "\n")} | |
574 | |
575 #histgram/boxplot | |
576 hstgrmfile<-paste(imgflprfx, labelset[["varname"]],"hs.jpg", sep=".") | |
577 bitmap(hstgrmfile, type = "jpeg", height = 3.5, width = 3, res=100, pointsize=9) | |
578 | |
579 layout(matrix(c(1,2),nrow=2,ncol=1), widths=c(1), heights=c(5,1)) | |
580 par(mar=c(4,4,1,1), mgp=c(2, 0.5, 0), tcl=-0.25, cex.axis=0.9, cex.lab=0.9) | |
581 | |
582 hist(varseti, main="", xlab=labelset[["varlabel"]], col="lightgrey") | |
583 | |
584 par(mar=c(2,4,0,1)) | |
585 boxplot(varseti, main="", xlab="", ylab="", col="lightgrey", horizontal=T) | |
586 | |
587 dev.off() | |
588 #par(def.par) | |
589 | |
590 if (!standalone){ | |
591 tmpvsldirhs<-unlist(strsplit(hstgrmfile,"/")) | |
592 hstgrmfile<-paste(tmpvsldirhs[(length(tmpvsldirhs)-1):length(tmpvsldirhs)],collapse="/") | |
593 } | |
594 chartset[["hstbx"]]<-hstgrmfile | |
595 | |
596 #qq-plot | |
597 qqpltfile<-paste(imgflprfx, labelset[["varname"]],"qq.jpg", sep=".") | |
598 bitmap(qqpltfile, type = "jpeg", height = 3, width = 3, res= 100, pointsize=8.5) | |
599 | |
600 par(tcl=-0.25, cex.axis=0.9, cex.lab=1.0) | |
601 qqnorm(varseti, main="Normal Q-Q Plot", ylab=labelset[["varlabel"]], pch=15) | |
602 qqline(varseti) | |
603 dev.off() | |
604 #par(def.par) | |
605 if (!standalone){ | |
606 tmpvsldirqq<-unlist(strsplit(qqpltfile,"/")) | |
607 qqpltfile<-paste(tmpvsldirqq[(length(tmpvsldirqq)-1):length(tmpvsldirqq)],collapse="/") | |
608 } | |
609 chartset[["qqplt"]]<-qqpltfile | |
610 chartset | |
611 } | |
612 | |
613 univarChart.dscrt<-function(frqtbl, imgflprfx, labelset){ | |
614 DBG<-FALSE | |
615 #DBG<-TRUE | |
616 chartset<-list() | |
617 if (DBG) {cat ("univarChart.dscrt:varname:", labelset[["varname"]], "\n")} | |
618 | |
619 barpltfile<-paste(imgflprfx, labelset[["varname"]], "bp.jpg", sep=".") | |
620 bitmap(barpltfile, type = "jpeg", height = 3, width = 3, res= 100, pointsize=8.5) | |
621 par(tcl=-0.25, cex.axis=0.9, cex.lab=1.0) | |
622 barplot(frqtbl, col="lightgrey", main="", xlab=labelset[["varlabel"]], ylab="Frequency") | |
623 dev.off() | |
624 #par(def.par) | |
625 | |
626 if (!standalone){ | |
627 tmpvsldirbp<-unlist(strsplit(barpltfile,"/")) | |
628 barpltfile<-paste(tmpvsldirbp[(length(tmpvsldirbp)-1):length(tmpvsldirbp)],collapse="/") | |
629 } | |
630 | |
631 chartset[["brchrt"]]<-barpltfile | |
632 chartset | |
633 } | |
634 | |
635 ############################ | |
636 # implementation | |
637 | |
638 | |
639 varlabels<-attr(dtfrm,"var.labels") | |
640 varnames<-names(dtfrm) | |
641 vartypes<-attr(dtfrm,"var.type") | |
642 | |
643 | |
644 STATLST<-NULL | |
645 if (!is.null(attr(dtfrm,"univarStat.lst"))) { | |
646 STATLST<-attr(dtfrm,"univarStat.lst") | |
647 } | |
648 | |
649 chrtlst<-list() | |
650 for (i in 1: dim(dtfrm)[2]){ | |
651 try( { | |
652 if (DBG) {cat("univarChart:",i,"-th var\n")} | |
653 chrtlbl<-list(varname=varnames[i], varlabel=varlabels[i]) | |
654 labelset<-varlabel.chrt(lblset=chrtlbl) | |
655 | |
656 varseti<-dtfrm[[i]] | |
657 | |
658 if (is.null(STATLST[[as.character(i)]])) { | |
659 tmpvald<-sum(complete.cases(varseti)) | |
660 } else { | |
661 tmpvald<-STATLST[[as.character(i)]][["Vald"]] | |
662 } | |
663 if (DBG) {cat("tmpvald=",tmpvald,"\n")} | |
664 | |
665 chartset<-list() | |
666 | |
667 if (vartypes[i]==2) { | |
668 #Continuous Variable | |
669 if (analysisoptn[2] & tmpvald) { | |
670 chrtlst[[as.character(i)]]<-univarChart.cntn(varseti=varseti, imgflprfx=imgflprfx, labelset=labelset) | |
671 } | |
672 } else { | |
673 #Discrete Variable | |
674 #bar plot | |
675 if (analysisoptn[2] & tmpvald ) { | |
676 | |
677 # chart option is chosen | |
678 if (analysisoptn[1]){ | |
679 # univariate statistics option is chosen -> freq table is available | |
680 # note: univariate statistics option is not chosen, tmpfrqtbl is NA | |
681 tmpfrqtbl<-STATLST[[as.character(i)]][["freqtbl"]] | |
682 } else { | |
683 # calculate statistics | |
684 if (vartypes[i]==1) { | |
685 statlst<-univarStat.dscrt(varseti=varseti) | |
686 } else { | |
687 statlst<-univarStat.dscrt(varseti=varseti,ordnl=FALSE) | |
688 } | |
689 tmpfrqtbl<-statlst[["freqtbl"]] | |
690 } | |
691 | |
692 chartset[["brchrt"]]<-NA | |
693 if( (length(tmpfrqtbl)<=10) & (length(tmpfrqtbl)>1) ) { | |
694 chartset<-univarChart.dscrt(frqtbl=tmpfrqtbl, imgflprfx=imgflprfx, labelset=labelset) | |
695 } else if (class(tmpfrqtbl)=="table") { | |
696 # number of categories <= 50 | |
697 # no chart but table | |
698 if (!analysisoptn[1]){ | |
699 STATLST[[as.character(i)]]<-statlst | |
700 } | |
701 } else if (is.na(tmpfrqtbl)) { | |
702 # no table available | |
703 if (!analysisoptn[1]){ | |
704 STATLST[[as.character(i)]]<-statlst | |
705 } | |
706 } | |
707 chrtlst[[as.character(i)]]<-chartset | |
708 } | |
709 } # end of D case | |
710 }) # end of try | |
711 } # end of var-wise-loop | |
712 attr(dtfrm, "univarChart.lst")<-chrtlst | |
713 | |
714 if (is.null(attr(dtfrm,"univarStat.lst")) ) { | |
715 attr(dtfrm,"univarStat.lst")<-STATLST | |
716 } | |
717 | |
718 invisible(dtfrm) | |
719 } # end of univarChart | |
720 ####################################################################### | |
721 univarStatHtml<-function(dtfrm, tmpimgfile, analysisoptn, tmphtmlfile, standalone=T){ | |
722 # Description | |
723 # | |
724 # arguments | |
725 # dtfrm variable furnished with attributes | |
726 # tmpimgfile temporary image file prefix: =$SRVRCGI=$SERVER$CGIDIR | |
727 # analysisoptn analysis option | |
728 # nrows local variable | |
729 # tmphtmlfile temporary html file | |
730 # file tmphtmlfile | |
731 | |
732 DBG<-TRUE | |
733 DBG<-FALSE | |
734 | |
735 # open the connection | |
736 whtml<-file(tmphtmlfile, "w") | |
737 on.exit(close(whtml)) | |
738 | |
739 # color parameters | |
740 # legend: c(1:background, 2:table header, 3: table body(o), 4: table body(e)) | |
741 # clschm <-c("#FFFFFF", "#CCFFCC","#e0ffff","#f0fff0") # green-based palette | |
742 # blue-based palette | |
743 #clschm <-c("#FFFFFF", "#e6e6fa","#ffffff","#f5f5f5") | |
744 clschm <-c("dvnUnvStatTbl", "dvnUnvStatTblHdr","dvnUnvStatTblRowO","dvnUnvStatTblRowE") | |
745 | |
746 # table parameters | |
747 # legend: c(border, cellspacing) | |
748 tblprm <-c(0, 2) | |
749 | |
750 #cat("\nEntered the function univarStatHtml\n") | |
751 | |
752 # values for local tests | |
753 # set localtest 0 after local tests | |
754 localtest<-TRUE | |
755 localtest<-FALSE | |
756 if (localtest){ | |
757 tmpimgfile<-c("") | |
758 imgprfx1<-c("<img src=\"") | |
759 imgprfx2<-c("") | |
760 univarstathdr<-c("Valid Cases", "Invalid Cases(NAs)", "Total", "Mean", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Mode", "Minimum","1st Quartile","Median","3rd Quartile","Maximum","Range","Interquartile Range","Normality Test:Shapiro-Wilk Statistic", "(Shapiro-Wilk Statistic: p value)") | |
761 } | |
762 if (standalone) { | |
763 imgflprfx<-paste(imgprfx1,tmpimgfile,imgprfx2,sep="") | |
764 } else { | |
765 imgflprfx<-"<img src=\"" | |
766 } | |
767 # constant for rendering a table for univariate statistics(continuous vars only) | |
768 uslstlen<-length(univarstathdr) | |
769 nrows <-ceiling(uslstlen/2) | |
770 blnkcell<-uslstlen%%2==TRUE | |
771 | |
772 | |
773 nameset<-names(dtfrm) | |
774 varlabelset<-attr(dtfrm,"var.labels") | |
775 CHRTLST<-attr(dtfrm, "univarChart.lst") | |
776 STATLST<-attr(dtfrm, "univarStat.lst") | |
777 VARTYPE<-attr(dtfrm, "var.type") | |
778 VALINDEX<-attr(dtfrm, "val.index") | |
779 VALTABLE<-attr(dtfrm, "val.table") | |
780 | |
781 | |
782 pt.varheader<-function(namesi, varlabelsi=NA) {h3<-paste("<h3>", namesi, if (!is.na(varlabelsi)) {paste(": ", varlabelsi, sep="")}, "</h3>\n",sep="");h3} | |
783 | |
784 ################### | |
785 # continuous case | |
786 univarStatHtml.cntn<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi){ | |
787 | |
788 # statlst STATLST[[as.character(i)]] | |
789 # imgfllst imgfllst=CHRTLST[[as.character(i)]] | |
790 # cmbntn analysisoptn | |
791 # function definition sections | |
792 | |
793 # create the first tr tag: chart part | |
794 pt.tr1<-function(imgfllst, cmbntn){ | |
795 tr1<-"" | |
796 if (cmbntn[2]) { | |
797 | |
798 if (cmbntn[1]) { colspan<-" colspan=\"2\"" } else { colspan<-""} | |
799 | |
800 # both | |
801 | |
802 if(!is.null(imgfllst[["hstbx"]])){ | |
803 tr1.l<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["hstbx"]],imgsffx1,"</td>\n",sep="") | |
804 } else { | |
805 tr1.l<-paste("<td",colspan,">\n<p><B><font color=red>Histogram/Boxplot Not Available</font></B></p>\n</td>\n") | |
806 } | |
807 | |
808 if(!is.null(imgfllst[["qqplt"]])) { | |
809 tr1.r<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["qqplt"]],imgsffx1,"</td>\n",sep="") | |
810 } else { | |
811 tr1.r<-paste("<td",colspan,">\n<p><B><font color=red>Normal Q-Q plot Not Available</font></B></p>\n</td>\n",sep="") | |
812 } | |
813 | |
814 tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") | |
815 } | |
816 tr1 | |
817 } | |
818 | |
819 # create the 2nd and thereafter tr tags: statistics part | |
820 pt.tr2<-function(statlst, cmbntn){ | |
821 tr2<-"" | |
822 if (cmbntn[1]) { | |
823 # statistics on | |
824 # table header | |
825 tr2<-paste("<tr class=\"",clschm[2],"\">\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n</tr>\n",sep="") | |
826 | |
827 # statistical data | |
828 # when # of statistics is not even | |
829 if (blnkcell){ univarstathdr[length(statlst)+1]<-" "} | |
830 | |
831 # table body | |
832 for (j in 1:nrows) { | |
833 if (j%%2==FALSE) colorprm <- clschm[3] else colorprm <-clschm[4] | |
834 | |
835 tr2<-paste(tr2, | |
836 "<tr class=\"",colorprm,"\">\n", | |
837 "<td align=\"left\">",univarstathdr[j],"</td>\n", | |
838 "<td align=\"right\">", prettyNum(statlst[[j]]),"</td>\n", | |
839 "<td align=\"left\">",univarstathdr[j+nrows],"</td>\n", | |
840 "<td align=\"right\">", if ( (j==nrows) & (blnkcell) ) {" "} else {prettyNum(statlst[[j+nrows]])},"</td>\n</tr>\n", sep="") | |
841 } | |
842 } | |
843 tr2 | |
844 } | |
845 | |
846 # create the chart/statistics table segment | |
847 pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst){ | |
848 tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) | |
849 tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn) | |
850 tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") | |
851 tbl | |
852 } | |
853 | |
854 # create per variable html segment | |
855 pt.varunit.cntn<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} | |
856 ## end of function definitions ## | |
857 | |
858 # implementation | |
859 | |
860 pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn) | |
861 ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) | |
862 ptvarunitc<-pt.varunit.cntn(vhdr=ptvarheader, vcntnts=pttbl) | |
863 | |
864 ptvarunitc | |
865 } # end of continuous case | |
866 | |
867 | |
868 ###################### | |
869 # discrete case | |
870 | |
871 univarStatHtml.dscrt<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi, vltbl) { | |
872 # statlst STATLST[[as.character(i)]] | |
873 # imgfllst imgfllst=CHRTLST[[as.character(i)]] | |
874 # cmbntn analysisoptn | |
875 # function definition sections | |
876 | |
877 #statlst[["freqtbl"]] | |
878 # mode and median even if a freq table is not available | |
879 nrw<-3 | |
880 # add one for "total" row | |
881 #if (!is.na(statlst$freqtbl)) {nrw<-length(statlst$freqtbl)+1+nrw} | |
882 | |
883 if (class(statlst$freqtbl)=="table") {nrw<-length(statlst$freqtbl)+nrw} | |
884 # nrws: rowspan parameter value if the chart option is chosen | |
885 nrws<-nrw+1 | |
886 | |
887 pt.tr1<-function(imgfllst, cmbntn){ | |
888 try({ | |
889 # tr1.l: chart part | |
890 tr1.l<-"" | |
891 sprsstr1r<-FALSE | |
892 if (cmbntn[2]) { | |
893 rowspan<-"" | |
894 if (cmbntn[1]) { rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") } | |
895 | |
896 if(!is.na(imgfllst[["brchrt"]])){ | |
897 tr1.l<-paste("<td",rowspan," valign=\"top\">\n",imgflprfx,imgfllst[["brchrt"]], imgsffx1, "</td>\n", sep="") | |
898 } else { | |
899 if (class(statlst$freqtbl)=="table"){ | |
900 rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") | |
901 tr1.l<-paste("<td",rowspan," valign=\"top\">\n<p><B><small>The number of categories is more than 10 or equal to 1.<br>Table substitutes for Bar plot</small></B></p>\n</td>\n",sep="") | |
902 cmbntn[1]<-1 | |
903 } else { | |
904 tr1.lm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") | |
905 | |
906 tr1.lhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") | |
907 | |
908 tr1.l<-paste(tr1.lm,tr1.lhdr, sep="") | |
909 | |
910 sprsstr1r<-TRUE | |
911 } | |
912 } | |
913 } | |
914 # tr1.r: freq/pcnt table header part | |
915 tr1.r<-"" | |
916 if (cmbntn[1]) { | |
917 if (class(statlst$freqtbl)=="table"){ | |
918 tr1.r<-paste("<td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") | |
919 } else if (!sprsstr1r){ | |
920 tr1.rm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") | |
921 | |
922 tr1.rhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") | |
923 | |
924 tr1.r<-paste(tr1.rm,tr1.rhdr, sep="") | |
925 } | |
926 } | |
927 tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") | |
928 }) # end of try | |
929 } | |
930 | |
931 # create the 2nd and thereafter tr tags: statistics part | |
932 pt.tr2<-function(statlst, cmbntn, vltbl, imgfllst){ | |
933 try({ | |
934 tr2<-"" | |
935 tableon<-FALSE | |
936 if ( cmbntn[2]){ | |
937 if (is.na(imgfllst[["brchrt"]])){ | |
938 tableon<-TRUE | |
939 } | |
940 } | |
941 if (cmbntn[1] | tableon) { | |
942 | |
943 if (class(statlst$freqtbl)=="table") {tblkey<-names(statlst$freqtbl)} | |
944 # if freqtbl is NA, tblkey becomes NULL | |
945 for (j in 1:nrw) { | |
946 if (j%%2==FALSE) { colorprm <- clschm[3]} else {colorprm <-clschm[4]} | |
947 if (j < (nrw -2)) { | |
948 | |
949 catgrylbl<-"" | |
950 if (!is.null(vltbl)){ | |
951 if(!is.null(vltbl[[tblkey[j]]])) { | |
952 catgrylbl<-paste("(",vltbl[[tblkey[j]]],")",sep="") | |
953 } | |
954 } | |
955 tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">",tblkey[j],catgrylbl,"</td>\n<td align=\"right\">",statlst$freqtbl[[j]],"</td>\n<td align=\"right\">", signif(statlst$pcnttbl[[j]],3),"</td>\n</tr>\n", sep="") | |
956 | |
957 } else if (j == (nrw -2)) { | |
958 #cat("entering the total row\n") | |
959 tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">Total</td>\n<td align=\"right\">",statlst$Vald+statlst$Invald,"</td>\n<td align=\"right\">100</td>\n</tr>\n", sep="") | |
960 | |
961 } else if (j == (nrw -1)) { | |
962 # median | |
963 #cat("entering the median\n") | |
964 median.vl<- "Not Available" | |
965 median.lbl<-"" | |
966 if (!is.null(statlst$Median)) { | |
967 median.vl<- as.character(statlst$Median) | |
968 if (!is.null(vltbl) && (nrw>3)){ | |
969 if (!is.null(vltbl[[median.vl]])) { | |
970 median.lbl<-paste("(",vltbl[[median.vl]],")",sep="") | |
971 } | |
972 } | |
973 } | |
974 | |
975 tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Median</td>\n<td align=\"right\">",median.vl,"</td>\n<td align=\"right\">",median.lbl,"</td>\n</tr>\n", sep="") | |
976 | |
977 } else if (j == nrw) { | |
978 # mode | |
979 #cat("entering the Mode\n") | |
980 mode.vl<-"Not Available" | |
981 mode.lbl<-"" | |
982 if (!is.null(statlst$Mode)) { | |
983 mode.vl<-statlst$Mode | |
984 if (!is.null(vltbl) && (nrw>3) ) { | |
985 if (!is.null(vltbl[[mode.vl]])) { | |
986 mode.lbl<-paste("(",vltbl[[mode.vl]], ")", sep="") | |
987 } | |
988 } | |
989 } | |
990 | |
991 tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Mode</td>\n<td align=\"right\">",mode.vl,"</td>\n<td align=\"right\">",mode.lbl,"</td>\n</tr>\n", sep="") | |
992 } | |
993 } | |
994 } | |
995 tr2 | |
996 }) # end of try | |
997 } | |
998 | |
999 # create the chart/statistics table segment | |
1000 pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst,vltbl=vltbl){ | |
1001 try({ | |
1002 tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) | |
1003 tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn, vltbl=vltbl,imgfllst=imgfllst) | |
1004 tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") | |
1005 tbl | |
1006 }) | |
1007 } | |
1008 | |
1009 # create per variable html segment | |
1010 pt.varunit.dscrt<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} | |
1011 | |
1012 ## end of function definitions ## | |
1013 | |
1014 | |
1015 # implementation | |
1016 try({ | |
1017 #cat("enters the discrete html body function\n", sep="") | |
1018 pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn, vltbl=vltbl) | |
1019 | |
1020 ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) | |
1021 ptvarunitd<-pt.varunit.dscrt(vhdr=ptvarheader, vcntnts=pttbl) | |
1022 | |
1023 ptvarunitd | |
1024 }) | |
1025 } # end of discrete case | |
1026 | |
1027 | |
1028 | |
1029 # main | |
1030 # implementation | |
1031 rawVarName <- nameset | |
1032 if (length(attr(dtfrm, "Rsafe2raw"))>0){ | |
1033 Rsafe2raw <- attr(dtfrm, "Rsafe2raw") | |
1034 for (i in 1:length(nameset)){ | |
1035 if (!is.null(Rsafe2raw[[nameset[i]]])){ | |
1036 rawVarName[i] <- Rsafe2raw[[nameset[i]]]; | |
1037 } | |
1038 } | |
1039 } | |
1040 | |
1041 for (i in 1:dim(dtfrm)[2]){ | |
1042 try({ | |
1043 if (VARTYPE[i]==2) { | |
1044 varsgmnt.c<-univarStatHtml.cntn(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i]) | |
1045 cat(file=whtml, varsgmnt.c, sep="") | |
1046 } else { | |
1047 if (DBG) {cat(i,"-th var before entering the discrete html function\n", sep="")} | |
1048 #cat("check the value table=",VALTABLE[[VALINDEX[[i]]]],"\n", sep="") | |
1049 if (is.null(VALINDEX[[as.character(i)]])){valtable<-NULL} else {valtable<-VALTABLE[[VALINDEX[[as.character(i)]]]]} | |
1050 varsgmnt.d<-univarStatHtml.dscrt(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i], vltbl=valtable) | |
1051 cat(file=whtml, varsgmnt.d, sep="") | |
1052 } | |
1053 }) # end of try | |
1054 } # end of var-wise for-loop | |
1055 | |
1056 | |
1057 } #end of the function univarStatHtml | |
1058 | |
1059 | |
1060 ########################################################### | |
1061 univarDataDwnld<-function(dtfrm, dwnldoptn, dsnprfx) { | |
1062 # dtfrm(=z1) dataset to be downloaded | |
1063 # dwnldoptn(=z2) data download option | |
1064 # dsnprfx(=z3) dataset name prefix | |
1065 | |
1066 # The portion of code immediately below has been added to | |
1067 # convert extra metadata, such as value labels, supplied in | |
1068 # the proprietary attributes (below) into standard R notations, | |
1069 # such as "comments" for variable labels and "factors" for | |
1070 # value labels. | |
1071 # | |
1072 # This is still work in progress! -- L.A. | |
1073 | |
1074 NAMESET<-names(dtfrm) | |
1075 VARLABELS<-attr(dtfrm,"var.labels") | |
1076 | |
1077 attr(x,"orig.names")<-attr(dtfrm,"var.labels") | |
1078 | |
1079 CHRTLST<-attr(dtfrm, "univarChart.lst") | |
1080 STATLST<-attr(dtfrm, "univarStat.lst") | |
1081 VARTYPE<-attr(dtfrm, "var.type") | |
1082 VALINDEX<-attr(dtfrm, "val.index") | |
1083 VALTABLE<-attr(dtfrm, "val.table") | |
1084 | |
1085 MISSVALINDEX <- attr(x,"missval.index") | |
1086 MISSVALTABLE <- attr(x,"missval.table") | |
1087 | |
1088 | |
1089 recodemiss<-TRUE | |
1090 recodefactors<-TRUE | |
1091 dropfactorlevels<-FALSE | |
1092 orderfactors<-TRUE | |
1093 | |
1094 | |
1095 for (i in 1:length(x)) { | |
1096 cat("inside the for loop\n") | |
1097 cat("class: ") | |
1098 cat(class(x[[i]])) | |
1099 cat("\n") | |
1100 # Recoding discrete, categorical variables as R factors; | |
1101 # But, (experimental...) only if there are value labels supplied. | |
1102 # This means, among other things, that an ingested R character, | |
1103 # or integer vector would stay a vector, and not a factor, | |
1104 # in a saved-as-R subset. | |
1105 | |
1106 | |
1107 # -- L.A. | |
1108 | |
1109 if (!is.null(VARTYPE) && VARTYPE[i]<2 && recodefactors) { | |
1110 | |
1111 # Additionally, if we are saving as Stata, we're only | |
1112 # recoding discrete numeric values (vartype 1), but not Strings. | |
1113 # This is because of the nature of factors in R. | |
1114 # TODO: add documentation for all of this! | |
1115 # -- L.A. | |
1116 | |
1117 #if ((dwnldoptn != 'D03') || (VARTYPE[i] == 1)) { | |
1118 if (((dwnldoptn != 'D03') || (VARTYPE[i] == 1)) && !(is.null(VALINDEX[[as.character(i)]]))) { | |
1119 | |
1120 if (is.null(VALINDEX[[as.character(i)]])) { | |
1121 vti <- NULL | |
1122 } else { | |
1123 vti <- VALTABLE[[VALINDEX[[as.character(i)]]]] | |
1124 cat(paste(class(vti),"\n")) | |
1125 cat(paste(length(vti),"\n")) | |
1126 cat(paste("VTI", vti, "\n", sep=" : ")) | |
1127 } | |
1128 | |
1129 if (dropfactorlevels) { | |
1130 vtilevels<-NULL | |
1131 } else { | |
1132 if (is.numeric(x[[i]])) { | |
1133 vtilevels<-as.numeric(names(vti)) | |
1134 } else { | |
1135 vtilevels<-names(vti) | |
1136 } | |
1137 } | |
1138 | |
1139 # save / re-attach date/time-related class name | |
1140 classToken <- class(x[[i]]) | |
1141 vlevsi <- as.list(sort(unique.default(c(x[[i]],vtilevels)))) | |
1142 if ((classToken[1] == "Date") || (classToken[1] == "POSIXt")) { | |
1143 class(vlevsi)<- classToken | |
1144 } | |
1145 names(vlevsi)<-vlevsi | |
1146 tmatch<-na.omit(match(names(vti),names(vlevsi))) | |
1147 if (length(tmatch)>0) { | |
1148 names(vlevsi)[tmatch] <- vti | |
1149 } | |
1150 | |
1151 mti<-integer(0); | |
1152 mti<-integer(0); | |
1153 if (recodemiss && !is.null(MISSVALINDEX[[as.character(i)]])) { | |
1154 mti<-MISSVALTABLE[[MISSVALINDEX[[as.character(i)]]]] | |
1155 tmatch<-na.omit(match(mti,vlevsi)) | |
1156 if (length(tmatch)>0) { | |
1157 vlevsi[tmatch]<-NULL | |
1158 } | |
1159 } | |
1160 | |
1161 # TODO: Add explicit nominal/ordinal/dichotomous information to meta data, instead | |
1162 # of assuming non-character vars are ordered | |
1163 | |
1164 if ((dwnldoptn == 'D04') && !(is.null(VALORDER[[as.character(i)]]))) { | |
1165 cat("ordered value labels supplied") | |
1166 x[[i]] <- factor(x[[i]], | |
1167 levels=VALORDER[[as.character(i)]], | |
1168 ordered=TRUE) | |
1169 } else { | |
1170 cat("no ordered value labels supplied\n") | |
1171 cat(paste(VARTYPE[i],"\n",sep="")) | |
1172 cat(paste(length(vlevsi),"\n",sep="")) | |
1173 orderedfct<-(orderfactors && | |
1174 VARTYPE[i]>0 && ((length(vlevsi)-length(mti)>2))) | |
1175 cat(paste(as.character(orderedfct),"\n", sep="")) | |
1176 paste("MTI", mti,"\n",sep=" : ") | |
1177 paste("VLEVSI", vlevsi,"\n",sep=" : ") | |
1178 | |
1179 x[[i]] <- factor(x[[i]], | |
1180 levels=vlevsi, | |
1181 labels=names(vlevsi), | |
1182 ordered=(orderfactors && | |
1183 VARTYPE[i]>0 && ((length(vlevsi)-length(mti)>2)))) | |
1184 } | |
1185 | |
1186 attr(x,"vlevsi")<-vlevsi; | |
1187 attr(x,"namesvlevsi")<-names(vlevsi); | |
1188 | |
1189 } | |
1190 } | |
1191 | |
1192 # try to add variable labels as R comments: (L.A. -- ?) | |
1193 | |
1194 comment(x[[i]]) <- VARLABELS[i] | |
1195 } | |
1196 | |
1197 # end of added recoding -- L.A. | |
1198 | |
1199 | |
1200 | |
1201 if (dwnldoptn == 'D01') { | |
1202 # In the future when a sample program file is attached to | |
1203 # a text file, col.names should be set to F to avoid | |
1204 # printing a variable list | |
1205 write.table(dtfrm, file=dsnprfx, sep="\t", row.names=F, na=".") | |
1206 } else if (dwnldoptn == 'D02') { | |
1207 # SPLUS: (L.A.) | |
1208 for (i in 1:length(x)) { | |
1209 if (class(x[[i]]) == 'AsIs'){ | |
1210 x[[i]]<-as.character(x[[i]]); | |
1211 } | |
1212 } | |
1213 #attach(dtfrm) | |
1214 dump('x', file=dsnprfx) | |
1215 #dump(ls(dtfrm), file=dsnprfx) | |
1216 #detach(dtfrm) | |
1217 } else if (dwnldoptn == 'D03') { | |
1218 # STATA: (L.A.) | |
1219 ## -- replaced dtfrm with x in the following paragraph; - L.A. (?) | |
1220 # truncate over-sized string variables | |
1221 MaxLenStringVar <- 127 | |
1222 vt <- attr(x, 'var.type') | |
1223 for (i in 1:length(vt)){ | |
1224 if (vt[i] == 0){ | |
1225 #cat(paste(i, "-th var is char type", sep=""), "\n") | |
1226 maxlen <- max(unlist(lapply(x[[i]],nchar))) | |
1227 if (maxlen > MaxLenStringVar) { | |
1228 #cat(paste(i, "-th var is over-sized string var", sep=""), "\n") | |
1229 x[[i]] <- strtrim(x[[i]], MaxLenStringVar) | |
1230 } | |
1231 } | |
1232 } | |
1233 write.dta(x, file=dsnprfx, version=7) | |
1234 } else if (dwnldoptn == 'D04') { | |
1235 # SAVE AS R WORKSPACE: (L.A.) | |
1236 save(x,file=dsnprfx) | |
1237 } | |
1238 } # end of univarDataDwnld.R | |
1239 | |
1240 ########################################################### | |
1241 sumStatTabAll.DDI<-function(dtfrm, file="", flid=1, jobNo=0, startno=1, novars=0){ | |
1242 | |
1243 # sumStatTabAll.DDI(dtfrm=x,file="/tmp/mry/00001/00001.stat.1.tab", flid=1) | |
1244 # arguments | |
1245 # dtfrm data frame furnished with attributes | |
1246 # file outp file (tab-delimited) | |
1247 # flid file ID | |
1248 # ordrDDI == 0 if no division of a job | |
1249 # constants | |
1250 entref<-c("&", "<", ">", "'", "\"") | |
1251 nmstr<-c("&","<", ">", "'",""") | |
1252 sumStatset<-c("mean", "medn", "mode", "vald", "invd", "min", "max", "stdev") | |
1253 DEBUG<-FALSE | |
1254 | |
1255 fileid<-paste("file", flid, sep="") | |
1256 #varIDprfx<-paste("v", flid, ".", sep="") | |
1257 | |
1258 # open the connection | |
1259 if (file!="") { | |
1260 if (jobNo==0){ | |
1261 wxml<-file(file, "w") | |
1262 } else if (jobNo >0) { | |
1263 wxml<-file(file, "a") | |
1264 } | |
1265 on.exit(close(wxml)) | |
1266 | |
1267 } else { | |
1268 stop("output file name is not specified\n") | |
1269 } | |
1270 | |
1271 # An auxiliary function that replaces the above five characters with the named entities | |
1272 chr2xtrf<-function(lbl){ | |
1273 for (i in 1:length(entref)) { | |
1274 lbl<-gsub(entref[i], nmstr[i], lbl, perl=T, useBytes = T) | |
1275 } | |
1276 } | |
1277 | |
1278 # xml printing up to the section 3 | |
1279 #if (jobNo<= 1 ){ | |
1280 # if (jobNo){novars<-"" } else {novars<-dim(dtfrm)[2]} | |
1281 if (jobNo == 0){ | |
1282 if (novars==0){novar<-"";} else if (novars > 0) {novar <-novars} | |
1283 cat(file=wxml, sep="",paste(dim(dtfrm)[1],novar,fileid,sep="\t"),"\n" ) | |
1284 } | |
1285 | |
1286 | |
1287 VARTYPE<-attr(dtfrm, "var.type") | |
1288 NAMES<-names(dtfrm) | |
1289 for (i in 1: dim(dtfrm)[2]){ | |
1290 | |
1291 # sumStatset<-c("mean", "medn", "mode", "vald", "invd", "min", "max", "stdev") | |
1292 | |
1293 | |
1294 tmpvari<-dtfrm[[i]] | |
1295 if(DEBUG) {cat("variable name =",NAMES[i],"\n")} | |
1296 | |
1297 if (VARTYPE[i] ==0){ | |
1298 # set '' to NA tmpvari[ tmpvari == ""]<-NA; | |
1299 tmpvari[ sub('^\\s+', '',tmpvari, perl = T, useBytes=T)==''] <-NA | |
1300 } | |
1301 | |
1302 | |
1303 tbl1<-table(tmpvari, useNA='ifany') | |
1304 | |
1305 N<-sum(complete.cases(tmpvari)) | |
1306 | |
1307 if (VARTYPE[i]== 2) { | |
1308 | |
1309 svnm<-summary(tmpvari) | |
1310 if (N) { | |
1311 min.value <- svnm[[1]] | |
1312 median.value <- median(tmpvari, na.rm=TRUE) | |
1313 max.value <- svnm[[6]] | |
1314 mean.value <- svnm[[4]] | |
1315 } | |
1316 else { | |
1317 min.value <- NA | |
1318 median.value <- NA | |
1319 max.value <- NA | |
1320 mean.value <- NA | |
1321 } | |
1322 stdv.value <- sd(tmpvari, na.rm=T) | |
1323 # find the maximum frequency cell | |
1324 # index: which.max(table(dtfrm[[i]])) | |
1325 maxfreq<-NA | |
1326 if (length(tmpvari) > length(tbl1)) { | |
1327 maxfreq<- names(tbl1)[which.max(tbl1)] | |
1328 } | |
1329 statset<- list( | |
1330 Mean = mean.value, | |
1331 Median = median.value, | |
1332 Mode.Value = maxfreq, | |
1333 Vald = N, | |
1334 Invald = sum(is.na(tmpvari)), | |
1335 Minimum = min.value, | |
1336 Maximum = max.value, | |
1337 Stdev = stdv.value | |
1338 ) | |
1339 #if (length(attr(tmpvari, 'Univariate'))==0){attr(dtfrm[[i]],"Univariate") <- statset} | |
1340 } else if ( (VARTYPE[i] < 2) & (VARTYPE[i] >=0) ){ | |
1341 | |
1342 if(DEBUG) {cat("pass the point univStat(discrete)\n")} | |
1343 statset<- list( | |
1344 Vald = N, | |
1345 Invald = (length(tmpvari) - N), | |
1346 Total = length(tmpvari) | |
1347 ) | |
1348 | |
1349 if (DEBUG){cat("\ttable header:",paste(dimnames(tbl1)[[1]], collapse='|'), "\n")} | |
1350 if (DEBUG){cat("\ttable frequency:",paste(tbl1, collapse='|'), "\n")} | |
1351 } else { | |
1352 if(DEBUG) {cat("out-of-range value", i ,"-th var =", VARTYPE[i],"\n")} | |
1353 } | |
1354 | |
1355 # section 4 | |
1356 pt.vr.1<-paste(NAMES[i],VARTYPE[i],sep='\t') | |
1357 pt.vr.st<-"" | |
1358 #iadj <- (i+startno-1) | |
1359 if (VARTYPE[i] == 2) { | |
1360 # continuous variable case | |
1361 #pt.vr.st<- if (!is.na(statset[[1]])) {statset[[1]]} else {"."} | |
1362 pt.vr.st<-"8" | |
1363 for (j in 1:length(sumStatset)) { | |
1364 pt.vr.st<-paste(pt.vr.st,if (!is.na(statset[[j]])) {statset[[j]]} else {"."}, sep="\t") | |
1365 } | |
1366 | |
1367 # if: end of continuous variable part | |
1368 } else { | |
1369 | |
1370 # discrete variable case | |
1371 | |
1372 # actual value table | |
1373 if (dim(tbl1)[1] <= 100) { | |
1374 # integer: how many value-freq sets follw? | |
1375 pt.vr.st<-dim(tbl1)[1] | |
1376 for (j in 1:(dim(tbl1)[1])) { | |
1377 # each value-freq set | |
1378 tmpvalue<-dimnames(tbl1)[[1]][j] | |
1379 #if (VARTYPE[i] == 0) {tmpvalue<-chr2xtrf(tmpvalue)} | |
1380 if (DEBUG) { cat("i=", i, "\tj=", j,"\ttmpvalue:", tmpvalue, "\n", sep="")} | |
1381 | |
1382 pt.vr.st<-paste(pt.vr.st,if (!is.na(tmpvalue)) {tmpvalue} else {"."}, tbl1[[j]],sep="\t") | |
1383 | |
1384 } | |
1385 | |
1386 } else { | |
1387 # for more-than-100-category vars, print 0 | |
1388 pt.vr.st<-0 | |
1389 } | |
1390 | |
1391 # for all cases, valid, invalid and N are printed | |
1392 for (k in 1:length(statset)){ | |
1393 pt.vr.st<-paste(pt.vr.st,if (!is.na(statset[[k]])) {statset[[k]]} else {"."}, sep="\t") | |
1394 } | |
1395 | |
1396 | |
1397 if (DEBUG) {cat("\n");} | |
1398 } # else: end of discrete case | |
1399 | |
1400 u <- unf(tmpvari, version=3) | |
1401 uxml<-paste(as.character(u),"\n",sep="") | |
1402 # dump this var | |
1403 cat(file=wxml,paste(pt.vr.1,pt.vr.st,uxml,sep="\t"),sep="") | |
1404 | |
1405 } # end of the variable-wise loop | |
1406 | |
1407 } #end of the sumStatTabAll.DDIx.R | |
1408 | |
1409 | |
1410 checkBinaryResponse<-function(binx){ | |
1411 bnryVarTbl <-attr(table(binx), 'dimnames')[[1]]; | |
1412 if (length(bnryVarTbl) == 2){ | |
1413 if ((bnryVarTbl[1] == 0) && (bnryVarTbl[2]==1)){ | |
1414 #cat('this variable is already 0-1\n'); | |
1415 } else { | |
1416 #cat('this variable needs the conversion\n'); | |
1417 #cat(paste( bnryVarTbl[1],' is recoded to 1; ', bnryVarTbl[2],' is recoded to 0;\n', sep='')); | |
1418 binx<-as.integer(binx == bnryVarTbl[1]); | |
1419 } | |
1420 } | |
1421 invisible(binx) | |
1422 } | |
1423 | |
1424 | |
1425 ####################################################################### | |
1426 univarStatHtmlBody<-function(dtfrm, whtml, analysisoptn, standalone=F){ | |
1427 # Description | |
1428 # | |
1429 # arguments | |
1430 # dtfrm variable furnished with attributes | |
1431 # tmpimgfile temporary image file prefix: =$SRVRCGI=$SERVER$CGIDIR | |
1432 # analysisoptn analysis option | |
1433 # nrows local variable | |
1434 # tmphtmlfile temporary html file | |
1435 # file tmphtmlfile | |
1436 | |
1437 DBG<-TRUE | |
1438 DBG<-FALSE | |
1439 | |
1440 # open the connection | |
1441 #whtml<-file(tmphtmlfile, "w") | |
1442 #on.exit(close(whtml)) | |
1443 | |
1444 # color parameters | |
1445 # legend: c(1:background, 2:table header, 3: table body(o), 4: table body(e)) | |
1446 # clschm <-c("#FFFFFF", "#CCFFCC","#e0ffff","#f0fff0") # green-based palette | |
1447 # blue-based palette | |
1448 #clschm <-c("#FFFFFF", "#e6e6fa","#ffffff","#f5f5f5") | |
1449 clschm <-c("dvnUnvStatTbl", "dvnUnvStatTblHdr","dvnUnvStatTblRowO","dvnUnvStatTblRowE") | |
1450 | |
1451 # table parameters | |
1452 # legend: c(border, cellspacing) | |
1453 tblprm <-c(0, 2) | |
1454 | |
1455 #cat("\nEntered the function univarStatHtml\n") | |
1456 | |
1457 # values for local tests | |
1458 # set localtest 0 after local tests | |
1459 localtest<-TRUE | |
1460 localtest<-FALSE | |
1461 if (localtest){ | |
1462 tmpimgfile<-c("") | |
1463 imgprfx1<-c("<img src=\"") | |
1464 imgprfx2<-c("") | |
1465 univarstathdr<-c("Valid Cases", "Invalid Cases(NAs)", "Total", "Mean", "Standard deviation", "Skewness", "Kurtosis", "Coefficient of variation", "Mode", "Minimum","1st Quartile","Median","3rd Quartile","Maximum","Range","Interquartile Range","Normality Test:Shapiro-Wilk Statistic", "(Shapiro-Wilk Statistic: p value)") | |
1466 } | |
1467 if (standalone) { | |
1468 imgflprfx<-paste(imgprfx1,tmpimgfile,imgprfx2,sep="") | |
1469 } else { | |
1470 imgflprfx<-"<img src=\"" | |
1471 } | |
1472 # constant for rendering a table for univariate statistics(continuous vars only) | |
1473 uslstlen<-length(univarstathdr) | |
1474 nrows <-ceiling(uslstlen/2) | |
1475 blnkcell<-uslstlen%%2==TRUE | |
1476 | |
1477 | |
1478 nameset<-names(dtfrm) | |
1479 varlabelset<-attr(dtfrm,"var.labels") | |
1480 CHRTLST<-attr(dtfrm, "univarChart.lst") | |
1481 STATLST<-attr(dtfrm, "univarStat.lst") | |
1482 VARTYPE<-attr(dtfrm, "var.type") | |
1483 VALINDEX<-attr(dtfrm, "val.index") | |
1484 VALTABLE<-attr(dtfrm, "val.table") | |
1485 | |
1486 | |
1487 pt.varheader<-function(namesi, varlabelsi=NA) {h3<-paste("<h3>", namesi, if (!is.na(varlabelsi)) {paste(": ", varlabelsi, sep="")}, "</h3>\n",sep="");h3} | |
1488 | |
1489 ################### | |
1490 # continuous case | |
1491 univarStatHtml.cntn<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi){ | |
1492 | |
1493 # statlst STATLST[[as.character(i)]] | |
1494 # imgfllst imgfllst=CHRTLST[[as.character(i)]] | |
1495 # cmbntn analysisoptn | |
1496 # function definition sections | |
1497 | |
1498 # create the first tr tag: chart part | |
1499 pt.tr1<-function(imgfllst, cmbntn){ | |
1500 tr1<-"" | |
1501 if (cmbntn[2]) { | |
1502 | |
1503 if (cmbntn[1]) { colspan<-" colspan=\"2\"" } else { colspan<-""} | |
1504 | |
1505 # both | |
1506 | |
1507 if(!is.null(imgfllst[["hstbx"]])){ | |
1508 tr1.l<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["hstbx"]],imgsffx1,"</td>\n",sep="") | |
1509 } else { | |
1510 tr1.l<-paste("<td",colspan,">\n<p><B><font color=red>Histogram/Boxplot Not Available</font></B></p>\n</td>\n") | |
1511 } | |
1512 | |
1513 if(!is.null(imgfllst[["qqplt"]])) { | |
1514 tr1.r<-paste("<td",colspan,">\n",imgflprfx,imgfllst[["qqplt"]],imgsffx1,"</td>\n",sep="") | |
1515 } else { | |
1516 tr1.r<-paste("<td",colspan,">\n<p><B><font color=red>Normal Q-Q plot Not Available</font></B></p>\n</td>\n",sep="") | |
1517 } | |
1518 | |
1519 tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") | |
1520 } | |
1521 tr1 | |
1522 } | |
1523 | |
1524 # create the 2nd and thereafter tr tags: statistics part | |
1525 pt.tr2<-function(statlst, cmbntn){ | |
1526 tr2<-"" | |
1527 if (cmbntn[1]) { | |
1528 # statistics on | |
1529 # table header | |
1530 tr2<-paste("<tr class=\"",clschm[2],"\">\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n<td align=\"left\"><b>Statistic</b></td><td align=\"right\"><b>Value</b></td>\n</tr>\n",sep="") | |
1531 | |
1532 # statistical data | |
1533 # when # of statistics is not even | |
1534 if (blnkcell){ univarstathdr[length(statlst)+1]<-" "} | |
1535 | |
1536 # table body | |
1537 for (j in 1:nrows) { | |
1538 if (j%%2==FALSE) colorprm <- clschm[3] else colorprm <-clschm[4] | |
1539 | |
1540 tr2<-paste(tr2, | |
1541 "<tr class=\"",colorprm,"\">\n", | |
1542 "<td align=\"left\">",univarstathdr[j],"</td>\n", | |
1543 "<td align=\"right\">", prettyNum(statlst[[j]]),"</td>\n", | |
1544 "<td align=\"left\">",univarstathdr[j+nrows],"</td>\n", | |
1545 "<td align=\"right\">", if ( (j==nrows) & (blnkcell) ) {" "} else {prettyNum(statlst[[j+nrows]])},"</td>\n</tr>\n", sep="") | |
1546 } | |
1547 } | |
1548 tr2 | |
1549 } | |
1550 | |
1551 # create the chart/statistics table segment | |
1552 pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst){ | |
1553 tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) | |
1554 tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn) | |
1555 tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") | |
1556 tbl | |
1557 } | |
1558 | |
1559 # create per variable html segment | |
1560 pt.varunit.cntn<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} | |
1561 ## end of function definitions ## | |
1562 | |
1563 # implementation | |
1564 | |
1565 pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn) | |
1566 ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) | |
1567 ptvarunitc<-pt.varunit.cntn(vhdr=ptvarheader, vcntnts=pttbl) | |
1568 | |
1569 ptvarunitc | |
1570 } # end of continuous case | |
1571 | |
1572 | |
1573 ###################### | |
1574 # discrete case | |
1575 | |
1576 univarStatHtml.dscrt<-function(statlst, imgfllst, cmbntn, namesi, varlabelsi, vltbl) { | |
1577 # statlst STATLST[[as.character(i)]] | |
1578 # imgfllst imgfllst=CHRTLST[[as.character(i)]] | |
1579 # cmbntn analysisoptn | |
1580 # function definition sections | |
1581 | |
1582 #statlst[["freqtbl"]] | |
1583 # mode and median even if a freq table is not available | |
1584 nrw<-3 | |
1585 # add one for "total" row | |
1586 #if (!is.na(statlst$freqtbl)) {nrw<-length(statlst$freqtbl)+1+nrw} | |
1587 | |
1588 if (class(statlst$freqtbl)=="table") {nrw<-length(statlst$freqtbl)+nrw} | |
1589 # nrws: rowspan parameter value if the chart option is chosen | |
1590 nrws<-nrw+1 | |
1591 | |
1592 pt.tr1<-function(imgfllst, cmbntn){ | |
1593 try({ | |
1594 # tr1.l: chart part | |
1595 tr1.l<-"" | |
1596 sprsstr1r<-FALSE | |
1597 if (cmbntn[2]) { | |
1598 rowspan<-"" | |
1599 if (cmbntn[1]) { rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") } | |
1600 | |
1601 if(!is.na(imgfllst[["brchrt"]])){ | |
1602 tr1.l<-paste("<td",rowspan," valign=\"top\">\n",imgflprfx,imgfllst[["brchrt"]], imgsffx1, "</td>\n", sep="") | |
1603 } else { | |
1604 if (class(statlst$freqtbl)=="table"){ | |
1605 rowspan<-paste(" rowspan=\"",nrws,"\"",sep="") | |
1606 tr1.l<-paste("<td",rowspan," valign=\"top\">\n<p><B><small>The number of categories is more than 10 or equal to 1.<br>Table substitutes for Bar plot</small></B></p>\n</td>\n",sep="") | |
1607 cmbntn[1]<-1 | |
1608 } else { | |
1609 | |
1610 tr1.lm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") | |
1611 | |
1612 tr1.lhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") | |
1613 tr1.l<-paste(tr1.lm,tr1.lhdr, sep="") | |
1614 | |
1615 sprsstr1r<-TRUE | |
1616 } | |
1617 } | |
1618 } | |
1619 # tr1.r: freq/pcnt table header part | |
1620 tr1.r<-"" | |
1621 if (cmbntn[1]) { | |
1622 if (class(statlst$freqtbl)=="table"){ | |
1623 tr1.r<-paste("<td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") | |
1624 } else if (!sprsstr1r){ | |
1625 | |
1626 tr1.rm<-paste("<td align=\"left\" colspan=\"3\" valign=\"top\">\n<p><B><small>The number of categories is more than 50. Frequency/Percentage tables are not shown here</small></B></p>\n</td></tr>\n",sep="") | |
1627 | |
1628 tr1.rhdr<-paste("<tr><td align=\"left\" class=\"",clschm[2],"\" ><b>Value: Value Label</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Freq</b></td><td align=\"right\" class=\"",clschm[2],"\" ><b>Percent</b></td>\n",sep="") | |
1629 | |
1630 tr1.r<-paste(tr1.rm,tr1.rhdr, sep="") | |
1631 } | |
1632 } | |
1633 tr1<-paste("<tr>\n",tr1.l,tr1.r,"</tr>\n",sep="") | |
1634 }) # end of try | |
1635 } | |
1636 | |
1637 # create the 2nd and thereafter tr tags: statistics part | |
1638 pt.tr2<-function(statlst, cmbntn, vltbl, imgfllst){ | |
1639 try({ | |
1640 tr2<-"" | |
1641 tableon<-FALSE | |
1642 if ( cmbntn[2]){ | |
1643 if (is.na(imgfllst[["brchrt"]])){ | |
1644 tableon<-TRUE | |
1645 } | |
1646 } | |
1647 if (cmbntn[1] | tableon) { | |
1648 | |
1649 if (class(statlst$freqtbl)=="table") {tblkey<-names(statlst$freqtbl)} | |
1650 # if freqtbl is NA, tblkey becomes NULL | |
1651 for (j in 1:nrw) { | |
1652 if (j%%2==FALSE) { colorprm <- clschm[3]} else {colorprm <-clschm[4]} | |
1653 if (j < (nrw -2)) { | |
1654 | |
1655 catgrylbl<-"" | |
1656 if (!is.null(vltbl)){ | |
1657 if(!is.null(vltbl[[tblkey[j]]])) { | |
1658 catgrylbl<-paste("(",vltbl[[tblkey[j]]],")",sep="") | |
1659 } | |
1660 } | |
1661 tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">",tblkey[j],catgrylbl,"</td>\n<td align=\"right\">",statlst$freqtbl[[j]],"</td>\n<td align=\"right\">", signif(statlst$pcnttbl[[j]],3),"</td>\n</tr>\n", sep="") | |
1662 | |
1663 } else if (j == (nrw -2)) { | |
1664 #cat("entering the total row\n") | |
1665 tr2<-paste(tr2, "<tr class=\"",colorprm,"\">\n<td align=\"left\">Total</td>\n<td align=\"right\">",statlst$Vald+statlst$Invald,"</td>\n<td align=\"right\">100</td>\n</tr>\n", sep="") | |
1666 | |
1667 } else if (j == (nrw -1)) { | |
1668 # median | |
1669 #cat("entering the median\n") | |
1670 median.vl<- "Not Available" | |
1671 median.lbl<-"" | |
1672 if (!is.null(statlst$Median)) { | |
1673 median.vl<- as.character(statlst$Median) | |
1674 if (!is.null(vltbl) && (nrw>3)){ | |
1675 if (!is.null(vltbl[[median.vl]])) { | |
1676 median.lbl<-paste("(",vltbl[[median.vl]],")",sep="") | |
1677 } | |
1678 } | |
1679 } | |
1680 | |
1681 tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Median</td>\n<td align=\"right\">",median.vl,"</td>\n<td align=\"right\">",median.lbl,"</td>\n</tr>\n", sep="") | |
1682 | |
1683 } else if (j == nrw) { | |
1684 # mode | |
1685 #cat("entering the Mode\n") | |
1686 mode.vl<-"Not Available" | |
1687 mode.lbl<-"" | |
1688 if (!is.null(statlst$Mode)) { | |
1689 mode.vl<-statlst$Mode | |
1690 if (!is.null(vltbl) && (nrw>3) ) { | |
1691 if (!is.null(vltbl[[mode.vl]])) { | |
1692 mode.lbl<-paste("(",vltbl[[mode.vl]], ")", sep="") | |
1693 } | |
1694 } | |
1695 } | |
1696 | |
1697 tr2<-paste(tr2,"<tr class=\"",colorprm,"\">\n<td align=\"left\">Mode</td>\n<td align=\"right\">",mode.vl,"</td>\n<td align=\"right\">",mode.lbl,"</td>\n</tr>\n", sep="") | |
1698 } | |
1699 } | |
1700 } | |
1701 tr2 | |
1702 }) # end of try | |
1703 } | |
1704 | |
1705 # create the chart/statistics table segment | |
1706 pt.tbl<-function(statlst=statlst,cmbntn=cmbntn,imgfllst=imgfllst,vltbl=vltbl){ | |
1707 try({ | |
1708 tr1<-pt.tr1(imgfllst=imgfllst, cmbntn=cmbntn) | |
1709 tr2<-pt.tr2(statlst=statlst, cmbntn=cmbntn, vltbl=vltbl,imgfllst=imgfllst) | |
1710 tbl<-paste("<center>\n<table border=\"",tblprm[1],"\" class=\"",clschm[1],"\" cellspacing=\"",tblprm[1],"\" >\n",tr1,tr2,"</table>\n</center>\n",sep="") | |
1711 tbl | |
1712 }) | |
1713 } | |
1714 | |
1715 # create per variable html segment | |
1716 pt.varunit.dscrt<-function(vhdr,vcntnts){varunit<-paste(vhdr,vcntnts,"<hr/>", sep="");varunit} | |
1717 | |
1718 ## end of function definitions ## | |
1719 | |
1720 | |
1721 # implementation | |
1722 try({ | |
1723 #cat("enters the discrete html body function\n", sep="") | |
1724 pttbl<-pt.tbl(statlst=statlst, imgfllst=imgfllst, cmbntn=cmbntn, vltbl=vltbl) | |
1725 | |
1726 ptvarheader<-pt.varheader(namesi=namesi, varlabelsi=varlabelsi) | |
1727 ptvarunitd<-pt.varunit.dscrt(vhdr=ptvarheader, vcntnts=pttbl) | |
1728 | |
1729 ptvarunitd | |
1730 }) | |
1731 } # end of discrete case | |
1732 | |
1733 | |
1734 | |
1735 # main | |
1736 # implementation | |
1737 rawVarName <- nameset | |
1738 if (length(attr(dtfrm, "Rsafe2raw"))>0){ | |
1739 Rsafe2raw <- attr(dtfrm, "Rsafe2raw") | |
1740 for (i in 1:length(nameset)){ | |
1741 if (!is.null(Rsafe2raw[[nameset[i]]])){ | |
1742 rawVarName[i] <- Rsafe2raw[[nameset[i]]]; | |
1743 } | |
1744 } | |
1745 } | |
1746 | |
1747 for (i in 1:dim(dtfrm)[2]){ | |
1748 try({ | |
1749 if (VARTYPE[i]==2) { | |
1750 varsgmnt.c<-univarStatHtml.cntn(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i]) | |
1751 #cat(file=whtml, varsgmnt.c, sep="") | |
1752 HTML(file=whtml, varsgmnt.c) | |
1753 } else { | |
1754 if (DBG) {cat(i,"-th var before entering the discrete html function\n", sep="")} | |
1755 #cat("check the value table=",VALTABLE[[VALINDEX[[i]]]],"\n", sep="") | |
1756 if (is.null(VALINDEX[[as.character(i)]])){valtable<-NULL} else {valtable<-VALTABLE[[VALINDEX[[as.character(i)]]]]} | |
1757 varsgmnt.d<-univarStatHtml.dscrt(statlst=STATLST[[as.character(i)]], imgfllst=CHRTLST[[as.character(i)]], cmbntn=analysisoptn, namesi=rawVarName[i], varlabelsi=varlabelset[i], vltbl=valtable) | |
1758 #cat(file=whtml, varsgmnt.d, sep="") | |
1759 HTML(file=whtml, varsgmnt.d) | |
1760 } | |
1761 }) # end of try | |
1762 } # end of var-wise for-loop | |
1763 | |
1764 | |
1765 } #end of the function univarStatHtml |