comparison src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @ 10:a50cf11e5178

Rewrite LGDataverse completely upgrading to dataverse4.0
author Zoe Hong <zhong@mpiwg-berlin.mpg.de>
date Tue, 08 Sep 2015 17:00:21 +0200
parents
children
comparison
equal deleted inserted replaced
9:5926d6419569 10:a50cf11e5178
1 /*
2 Copyright (C) 2005-2012, by the President and Fellows of Harvard College.
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15
16 Dataverse Network - A web application to share, preserve and analyze research data.
17 Developed at the Institute for Quantitative Social Science, Harvard University.
18 Version 3.0.
19 */
20
21 package edu.harvard.iq.dataverse.dataaccess;
22
23 import edu.harvard.iq.dataverse.DataFile;
24 import edu.harvard.iq.dataverse.datavariable.DataVariable;
25 import java.util.*;
26 import java.util.Scanner;
27 import java.util.logging.*;
28 import java.io.*;
29 import java.io.FileNotFoundException;
30 import java.math.BigDecimal;
31 import java.math.MathContext;
32 import java.math.RoundingMode;
33 import java.nio.ByteBuffer;
34 import java.nio.channels.FileChannel;
35 import java.nio.file.Paths;
36 import java.nio.file.StandardOpenOption;
37 import java.util.regex.Matcher;
38
39
40 import org.apache.commons.lang.*;
41
42
43 /**
44 *
45 * @author Leonid Andreev
46 * original author:
47 * @author a.sone
48 */
49
50 public class TabularSubsetGenerator implements SubsetGenerator {
51
52 private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName());
53
54 private static int COLUMN_TYPE_STRING = 1;
55 private static int COLUMN_TYPE_LONG = 2;
56 private static int COLUMN_TYPE_DOUBLE = 3;
57 private static int COLUMN_TYPE_FLOAT = 4;
58
59 private static int MAX_COLUMN_BUFFER = 8192;
60
61 private FileChannel fileChannel = null;
62
63 private int varcount;
64 private int casecount;
65 private int subsetcount;
66
67 private byte[][] columnEntries = null;
68
69
70 private ByteBuffer[] columnByteBuffers;
71 private int[] columnBufferSizes;
72 private int[] columnBufferOffsets;
73
74 private long[] columnStartOffsets;
75 private long[] columnTotalOffsets;
76 private long[] columnTotalLengths;
77
78 public TabularSubsetGenerator() {
79
80 }
81
82 public TabularSubsetGenerator (DataFile datafile, List<DataVariable> variables) throws IOException {
83 if (!datafile.isTabularData()) {
84 throw new IOException("DataFile is not tabular data.");
85 }
86
87 setVarCount(datafile.getDataTable().getVarQuantity().intValue());
88 setCaseCount(datafile.getDataTable().getCaseQuantity().intValue());
89
90 File tabfile = datafile.getFileSystemLocation().toFile();
91 File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount());
92 long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount());
93
94 fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ));
95
96 if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) {
97 throw new IOException("Illegal number of variables in the subset request");
98 }
99
100 subsetcount = variables.size();
101 columnTotalOffsets = new long[subsetcount];
102 columnTotalLengths = new long[subsetcount];
103 columnByteBuffers = new ByteBuffer[subsetcount];
104
105
106
107 if (subsetcount == 1) {
108 if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) {
109 throw new IOException("Variable in the subset request does not belong to the datafile.");
110 }
111 dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
112 fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder()));
113 columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder());
114 columnTotalOffsets[0] = 0;
115 } else {
116 columnEntries = new byte[subsetcount][];
117
118 columnBufferSizes = new int[subsetcount];
119 columnBufferOffsets = new int[subsetcount];
120 columnStartOffsets = new long[subsetcount];
121
122 int i = 0;
123 for (DataVariable var : variables) {
124 if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) {
125 throw new IOException("Variable in the subset request does not belong to the datafile.");
126 }
127 columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
128 columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder());
129 columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder());
130 if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) {
131 columnByteBuffers[i].limit((int)columnTotalLengths[i]);
132 }
133 fileChannel.position(columnStartOffsets[i]);
134 columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]);
135 columnBufferOffsets[i] = 0;
136 columnTotalOffsets[i] = columnBufferSizes[i];
137 i++;
138 }
139 }
140 }
141
142 private int getVarCount() {
143 return varcount;
144 }
145
146 private void setVarCount(int varcount) {
147 this.varcount = varcount;
148 }
149
150 private int getCaseCount() {
151 return casecount;
152 }
153
154 private void setCaseCount(int casecount) {
155 this.casecount = casecount;
156 }
157
158
159 /*
160 * Note that this method operates on the *absolute* column number, i.e.
161 * the number of the physical column in the tabular file. This is stored
162 * in DataVariable.FileOrder.
163 * This "column number" should not be confused with the number of column
164 * in the subset request; a user can request any number of variable
165 * columns, in an order that doesn't have to follow the physical order
166 * of the columns in the file.
167 */
168 private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException {
169 if (columnEndOffsets == null || columnEndOffsets.length <= column) {
170 throw new IOException("Offsets table not initialized; or column out of bounds.");
171 }
172 long columnOffset;
173
174 if (column > 0) {
175 columnOffset = columnEndOffsets[column - 1];
176 } else {
177 columnOffset = getVarCount() * 8;
178 }
179 return columnOffset;
180 }
181
182 /*
183 * See the comment for the method above.
184 */
185 private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException {
186 if (columnEndOffsets == null || columnEndOffsets.length <= column) {
187 throw new IOException("Offsets table not initialized; or column out of bounds.");
188 }
189 long columnLength;
190
191 if (column > 0) {
192 columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
193 } else {
194 columnLength = columnEndOffsets[0] - varcount * 8;
195 }
196
197 return columnLength;
198 }
199
200
201 private void bufferMoreColumnBytes(int column) throws IOException {
202 if (columnTotalOffsets[column] >= columnTotalLengths[column]) {
203 throw new IOException("attempt to buffer bytes past the column boundary");
204 }
205 fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]);
206
207 columnByteBuffers[column].clear();
208 if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) {
209 dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes");
210 columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column]));
211 }
212 columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]);
213 dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column);
214 columnBufferOffsets[column] = 0;
215 columnTotalOffsets[column] += columnBufferSizes[column];
216 }
217
218 /*
219 do not use this method!
220 there's a high potential for the "UTF8 character split between buffers" error!
221 public String readColumnEntry(int column) {
222 String ret = null;
223 int currentbyte;
224
225 if (columnBufferOffsets[column] >= columnBufferSizes[column]) {
226 try {
227 bufferMoreColumnBytes(column);
228 } catch (IOException ioe) {
229 return null;
230 }
231 }
232
233 currentbyte = columnBufferOffsets[column];
234 try {
235 while (columnByteBuffers[column].array()[currentbyte] != '\n') {
236 currentbyte++;
237 if (currentbyte == columnBufferSizes[column]) {
238 // save the leftover:
239 if (ret == null) {
240 ret = new String(columnByteBuffers[column].array(), columnBufferOffsets[column], columnBufferSizes[column] - columnBufferOffsets[column], "UTF8");
241 } else {
242 ret = ret.concat(new String(columnByteBuffers[column].array(), columnBufferOffsets[column], columnBufferSizes[column] - columnBufferOffsets[column], "UTF8"));
243 }
244 // read more bytes:
245 bufferMoreColumnBytes(column);
246 currentbyte = 0;
247 }
248 }
249
250 // presumably, we have found our '\n':
251 if (ret == null) {
252 ret = new String(columnByteBuffers[column].array(), columnBufferOffsets[column], currentbyte - columnBufferOffsets[column], "UTF8");
253 } else {
254 ret = ret.concat(new String(columnByteBuffers[column].array(), columnBufferOffsets[column], currentbyte - columnBufferOffsets[column], "UTF8"));
255 }
256
257 } catch (IOException ioe) {
258 return null;
259 }
260
261 columnBufferOffsets[column] += (currentbyte + 1);
262
263 return ret;
264 }
265 */
266
267 public byte[] readColumnEntryBytes(int column) {
268 return readColumnEntryBytes(column, true);
269 }
270
271
272 public byte[] readColumnEntryBytes(int column, boolean addTabs) {
273 byte[] leftover = null;
274 byte[] ret = null;
275
276 if (columnBufferOffsets[column] >= columnBufferSizes[column]) {
277 try {
278 bufferMoreColumnBytes(column);
279 if (columnBufferSizes[column] < 1) {
280 return null;
281 }
282 } catch (IOException ioe) {
283 return null;
284 }
285 }
286
287 int byteindex = columnBufferOffsets[column];
288 try {
289 while (columnByteBuffers[column].array()[byteindex] != '\n') {
290 byteindex++;
291 if (byteindex == columnBufferSizes[column]) {
292 // save the leftover:
293 if (leftover == null) {
294 leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]];
295 System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]);
296 } else {
297 byte[] merged = new byte[leftover.length + columnBufferSizes[column]];
298
299 System.arraycopy(leftover, 0, merged, 0, leftover.length);
300 System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]);
301 leftover = merged;
302 merged = null;
303 }
304 // read more bytes:
305 bufferMoreColumnBytes(column);
306 if (columnBufferSizes[column] < 1) {
307 return null;
308 }
309 byteindex = 0;
310 }
311 }
312
313 // presumably, we have found our '\n':
314 if (leftover == null) {
315 ret = new byte[byteindex - columnBufferOffsets[column] + 1];
316 System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1);
317 } else {
318 ret = new byte[leftover.length + byteindex + 1];
319 System.arraycopy(leftover, 0, ret, 0, leftover.length);
320 System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1);
321 }
322
323 } catch (IOException ioe) {
324 return null;
325 }
326
327 columnBufferOffsets[column] = (byteindex + 1);
328
329 if (column < columnBufferOffsets.length - 1) {
330 ret[ret.length - 1] = '\t';
331 }
332 return ret;
333 }
334
335 public int readSingleColumnSubset(byte[] buffer) throws IOException {
336 if (columnTotalOffsets[0] == columnTotalLengths[0]) {
337 return -1;
338 }
339
340 if (columnByteBuffers[0] == null) {
341 dbgLog.fine("allocating single column subset buffer.");
342 columnByteBuffers[0] = ByteBuffer.allocate(buffer.length);
343 }
344
345 int bytesread = fileChannel.read(columnByteBuffers[0]);
346 dbgLog.fine("single column subset: read "+bytesread+" bytes.");
347 if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) {
348 bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]);
349 }
350 System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread);
351
352 columnTotalOffsets[0] += bytesread;
353 columnByteBuffers[0].clear();
354 return bytesread > 0 ? bytesread : -1;
355 }
356
357
358 public byte[] readSubsetLineBytes() throws IOException {
359 byte[] ret = null;
360 int total = 0;
361
362 for (int i = 0; i < subsetcount; i++) {
363 columnEntries[i] = readColumnEntryBytes(i);
364 if (columnEntries[i] == null) {
365 throw new IOException("Failed to read subset line entry");
366 }
367 total += columnEntries[i].length;
368 }
369
370 ret = new byte[total];
371 int offset = 0;
372 for (int i = 0; i < subsetcount; i++) {
373 System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length);
374 offset += columnEntries[i].length;
375 }
376 dbgLog.fine("line: "+new String(ret));
377 return ret;
378 }
379
380
381 public void close() {
382 if (fileChannel != null) {
383 try {
384 fileChannel.close();
385 } catch (IOException ioe) {
386 // don't care.
387 }
388 }
389 }
390
391 public void subsetFile(String infile, String outfile, Set<Integer> columns, Long numCases) {
392 subsetFile(infile, outfile, columns, numCases, "\t");
393 }
394
395 public void subsetFile(String infile, String outfile, Set<Integer> columns, Long numCases,
396 String delimiter) {
397 try {
398 subsetFile(new FileInputStream(new File(infile)), outfile, columns, numCases, delimiter);
399 } catch (IOException ex) {
400 throw new RuntimeException("Could not open file "+infile);
401 }
402 }
403
404
405 public void subsetFile(InputStream in, String outfile, Set<Integer> columns, Long numCases,
406 String delimiter) {
407 try {
408 Scanner scanner = new Scanner(in);
409 scanner.useDelimiter("\\n");
410
411 BufferedWriter out = new BufferedWriter(new FileWriter(outfile));
412 for (long caseIndex = 0; caseIndex < numCases; caseIndex++) {
413 if (scanner.hasNext()) {
414 String[] line = (scanner.next()).split(delimiter,-1);
415 List<String> ln = new ArrayList<String>();
416 for (Integer i : columns) {
417 ln.add(line[i]);
418 }
419 out.write(StringUtils.join(ln,"\t")+"\n");
420 } else {
421 throw new RuntimeException("Tab file has fewer rows than the determined number of cases.");
422 }
423 }
424
425 while (scanner.hasNext()) {
426 if (!"".equals(scanner.next()) ) {
427 throw new RuntimeException("Tab file has extra nonempty rows than the determined number of cases.");
428
429 }
430 }
431
432 scanner.close();
433 out.close();
434
435 } catch (FileNotFoundException e) {
436 e.printStackTrace();
437 } catch (IOException e) {
438 e.printStackTrace();
439 }
440
441 }
442
443 /*
444 * Straightforward method for subsetting a column; inefficient on large
445 * files, OK to use on small files:
446 */
447
448 public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) {
449 Double[] retVector = new Double[numCases];
450 Scanner scanner = new Scanner(in);
451 scanner.useDelimiter("\\n");
452
453 for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
454 if (scanner.hasNext()) {
455 String[] line = (scanner.next()).split("\t", -1);
456 try {
457 retVector[caseIndex] = new Double(line[column]);
458 } catch (NumberFormatException ex) {
459 retVector[caseIndex] = null; // missing value
460 }
461 } else {
462 scanner.close();
463 throw new RuntimeException("Tab file has fewer rows than the stored number of cases!");
464 }
465 }
466
467 int tailIndex = numCases;
468 while (scanner.hasNext()) {
469 String nextLine = scanner.next();
470 if (!"".equals(nextLine)) {
471 scanner.close();
472 throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine);
473 }
474 tailIndex++;
475 }
476
477 scanner.close();
478 return retVector;
479
480 }
481
482 /*
483 * Straightforward method for subsetting a tab-delimited data file, extracting
484 * all the columns representing continuous variables and returning them as
485 * a 2-dimensional array of Doubles;
486 * Inefficient on large files, OK to use on small ones.
487 */
488 public static Double[][] subsetDoubleVectors(InputStream in, Set<Integer> columns, int numCases) throws IOException {
489 Double[][] retVector = new Double[columns.size()][numCases];
490 Scanner scanner = new Scanner(in);
491 scanner.useDelimiter("\\n");
492
493 for (int caseIndex = 0; caseIndex < numCases; caseIndex++) {
494 if (scanner.hasNext()) {
495 String[] line = (scanner.next()).split("\t", -1);
496 int j = 0;
497 for (Integer i : columns) {
498 try {
499 // TODO: verify that NaN and +-Inf are going to be
500 // handled correctly here! -- L.A.
501 // NO, "+-Inf" is not handled correctly; see the
502 // comment further down below.
503 retVector[j][caseIndex] = new Double(line[i]);
504 } catch (NumberFormatException ex) {
505 retVector[j][caseIndex] = null; // missing value
506 }
507 j++;
508 }
509 } else {
510 scanner.close();
511 throw new IOException("Tab file has fewer rows than the stored number of cases!");
512 }
513 }
514
515 int tailIndex = numCases;
516 while (scanner.hasNext()) {
517 String nextLine = scanner.next();
518 if (!"".equals(nextLine)) {
519 scanner.close();
520 throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine);
521 }
522 tailIndex++;
523 }
524
525 scanner.close();
526 return retVector;
527
528 }
529
530 public String[] subsetStringVector(DataFile datafile, int column) throws IOException {
531 return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING);
532 }
533
534 public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException {
535 return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE);
536 }
537
538 public Long[] subsetLongVector(DataFile datafile, int column) throws IOException {
539 return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG);
540 }
541
542 // Float methods are temporary;
543 // In normal operations we'll be treating all the floating point types as
544 // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest
545 // tests. -- L.A.
546
547 public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException {
548 return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT);
549 }
550
551 public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException {
552 return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING);
553 }
554
555 public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException {
556 return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE);
557 }
558
559 public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException {
560 return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG);
561 }
562
563 public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException {
564 return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT);
565 }
566
567 public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException {
568 if (!dataFile.isTabularData()) {
569 throw new IOException("DataFile is not tabular data.");
570 }
571
572 int varcount = dataFile.getDataTable().getVarQuantity().intValue();
573 int casecount = dataFile.getDataTable().getCaseQuantity().intValue();
574
575 if (column >= varcount) {
576 throw new IOException("Column "+column+" is out of bounds.");
577 }
578
579 File tabfile = dataFile.getFileSystemLocation().toFile();
580
581 if (columntype == COLUMN_TYPE_STRING) {
582 String filename = dataFile.getFileMetadata().getLabel();
583 if (filename != null) {
584 filename = filename.replaceFirst("^_", "");
585 Integer fnumvalue = null;
586 try {
587 fnumvalue = new Integer(filename);
588 } catch (Exception ex){
589 fnumvalue = null;
590 }
591 if (fnumvalue != null) {
592 //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) {
593 if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) {
594 if (!(fnumvalue.intValue() == 60007
595 || fnumvalue.intValue() == 59997
596 || fnumvalue.intValue() == 60015
597 || fnumvalue.intValue() == 59948
598 || fnumvalue.intValue() == 60012
599 || fnumvalue.intValue() == 52585
600 || fnumvalue.intValue() == 60005
601 || fnumvalue.intValue() == 60002
602 || fnumvalue.intValue() == 59954
603 || fnumvalue.intValue() == 60008
604 || fnumvalue.intValue() == 54972
605 || fnumvalue.intValue() == 55010
606 || fnumvalue.intValue() == 54996
607 || fnumvalue.intValue() == 53527
608 || fnumvalue.intValue() == 53546
609 || fnumvalue.intValue() == 55002
610 || fnumvalue.intValue() == 55006
611 || fnumvalue.intValue() == 54998
612 || fnumvalue.intValue() == 52552
613 // SPSS/SAV cases with similar issue - compat mode must be disabled
614 //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16...
615 || fnumvalue.intValue() == 54618 // another SAV file, with long strings...
616 || fnumvalue.intValue() == 54619 // [same]
617 || fnumvalue.intValue() == 57983
618 || fnumvalue.intValue() == 58262
619 || fnumvalue.intValue() == 58288
620 || fnumvalue.intValue() == 58656
621 || fnumvalue.intValue() == 59144
622 // || fnumvalue.intValue() == 69626 [nope!]
623 )) {
624 dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;");
625 return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true);
626 }
627 }
628 }
629 }
630 }
631
632 return subsetObjectVector(tabfile, column, varcount, casecount, columntype);
633 }
634
635 public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException {
636 return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false);
637 }
638
639
640
641 public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException {
642
643 Object[] retVector = null;
644
645 boolean isString = false;
646 boolean isDouble = false;
647 boolean isLong = false;
648 boolean isFloat = false;
649
650 //Locale loc = new Locale("en", "US");
651
652 if (columntype == COLUMN_TYPE_STRING) {
653 isString = true;
654 retVector = new String[casecount];
655 } else if (columntype == COLUMN_TYPE_DOUBLE) {
656 isDouble = true;
657 retVector = new Double[casecount];
658 } else if (columntype == COLUMN_TYPE_LONG) {
659 isLong = true;
660 retVector = new Long[casecount];
661 } else if (columntype == COLUMN_TYPE_FLOAT){
662 isFloat = true;
663 retVector = new Float[casecount];
664 } else {
665 throw new IOException("Unsupported column type: "+columntype);
666 }
667
668 File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount);
669 long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount);
670 long columnOffset = 0;
671 long columnLength = 0;
672
673 if (column > 0) {
674 columnOffset = columnEndOffsets[column - 1];
675 columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1];
676 } else {
677 columnOffset = varcount * 8;
678 columnLength = columnEndOffsets[0] - varcount * 8;
679 }
680
681 FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ));
682 fc.position(columnOffset);
683 int MAX_COLUMN_BUFFER = 8192;
684
685 ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER);
686
687 if (columnLength < MAX_COLUMN_BUFFER) {
688 in.limit((int)(columnLength));
689 }
690
691 long bytesRead = 0;
692 long bytesReadTotal = 0;
693 int caseindex = 0;
694 int byteoffset = 0;
695 byte[] leftover = null;
696
697 while (bytesReadTotal < columnLength) {
698 bytesRead = fc.read(in);
699 byte[] columnBytes = in.array();
700 int bytecount = 0;
701
702
703 while (bytecount < bytesRead) {
704 if (columnBytes[bytecount] == '\n') {
705 /*
706 String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8");
707
708 if (leftover != null) {
709 String leftoverString = new String (leftover, "UTF8");
710 token = leftoverString + token;
711 leftover = null;
712 }
713 */
714 /*
715 * Note that the way I was doing it at first - above -
716 * was not quite the correct way - because I was creating UTF8
717 * strings from the leftover bytes, and the bytes in the
718 * current buffer *separately*; which means, if a multi-byte
719 * UTF8 character got split in the middle between one buffer
720 * and the next, both chunks of it would become junk
721 * characters, on each side!
722 * The correct way of doing it, of course, is to create a
723 * merged byte buffer, and then turn it into a UTF8 string.
724 * -- L.A. 4.0
725 */
726 String token = null;
727
728 if (leftover == null) {
729 token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8");
730 } else {
731 byte[] merged = new byte[leftover.length + bytecount-byteoffset];
732
733 System.arraycopy(leftover, 0, merged, 0, leftover.length);
734 System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount-byteoffset);
735 token = new String (merged, "UTF8");
736 leftover = null;
737 merged = null;
738 }
739
740 if (isString) {
741 if ("".equals(token)) {
742 // An empty string is a string missing value!
743 // An empty string in quotes is an empty string!
744 retVector[caseindex] = null;
745 } else {
746 // Strip the outer quotes:
747 token = token.replaceFirst("^\\\"", "");
748 token = token.replaceFirst("\\\"$", "");
749
750 // We need to restore the special characters that
751 // are stored in tab files escaped - quotes, new lines
752 // and tabs. Before we do that however, we need to
753 // take care of any escaped backslashes stored in
754 // the tab file. I.e., "foo\t" should be transformed
755 // to "foo<TAB>"; but "foo\\t" should be transformed
756 // to "foo\t". This way new lines and tabs that were
757 // already escaped in the original data are not
758 // going to be transformed to unescaped tab and
759 // new line characters!
760
761 String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2);
762
763 // (note that it's important to use the 2-argument version
764 // of String.split(), and set the limit argument to a
765 // negative value; otherwise any trailing backslashes
766 // are lost.)
767
768 for (int i = 0; i < splitTokens.length; i++) {
769 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\"");
770 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t");
771 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n");
772 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r");
773 }
774 // TODO:
775 // Make (some of?) the above optional; for ex., we
776 // do need to restore the newlines when calculating UNFs;
777 // But if we are subsetting these vectors in order to
778 // create a new tab-delimited file, they will
779 // actually break things! -- L.A. Jul. 28 2014
780
781 token = StringUtils.join(splitTokens, '\\');
782
783 // "compatibility mode" - a hack, to be able to produce
784 // unfs identical to those produced by the "early"
785 // unf5 jar; will be removed in production 4.0.
786 // -- L.A. (TODO: ...)
787 if (compatmode && !"".equals(token)) {
788 if (token.length() > 128) {
789 if ("".equals(token.trim())) {
790 // don't ask...
791 token = token.substring(0, 129);
792 } else {
793 token = token.substring(0, 128);
794 //token = String.format(loc, "%.128s", token);
795 token = token.trim();
796 //dbgLog.info("formatted and trimmed: "+token);
797 }
798 } else {
799 if ("".equals(token.trim())) {
800 // again, don't ask;
801 // - this replicates some bugginness
802 // that happens inside unf5;
803 token = "null";
804 } else {
805 token = token.trim();
806 }
807 }
808 }
809
810 retVector[caseindex] = token;
811 }
812 } else if (isDouble) {
813 try {
814 // TODO: verify that NaN and +-Inf are
815 // handled correctly here! -- L.A.
816 // Verified: new Double("nan") works correctly,
817 // resulting in Double.NaN;
818 // Double("[+-]Inf") doesn't work however;
819 // (the constructor appears to be expecting it
820 // to be spelled as "Infinity", "-Infinity", etc.
821 if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
822 retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY;
823 } else if ("-inf".equalsIgnoreCase(token)) {
824 retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY;
825 } else if (token == null || token.equals("")) {
826 // missing value:
827 retVector[caseindex] = null;
828 } else {
829 retVector[caseindex] = new Double(token);
830 }
831 } catch (NumberFormatException ex) {
832 dbgLog.warning("NumberFormatException thrown for "+token+" as Double");
833
834 retVector[caseindex] = null; // missing value
835 // TODO: ?
836 }
837 } else if (isLong) {
838 try {
839 retVector[caseindex] = new Long(token);
840 } catch (NumberFormatException ex) {
841 retVector[caseindex] = null; // assume missing value
842 }
843 } else if (isFloat) {
844 try {
845 if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) {
846 retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY;
847 } else if ("-inf".equalsIgnoreCase(token)) {
848 retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY;
849 } else if (token == null || token.equals("")) {
850 // missing value:
851 retVector[caseindex] = null;
852 } else {
853 retVector[caseindex] = new Float(token);
854 }
855 } catch (NumberFormatException ex) {
856 dbgLog.warning("NumberFormatException thrown for "+token+" as Float");
857 retVector[caseindex] = null; // assume missing value (TODO: ?)
858 }
859 }
860 caseindex++;
861
862 if (bytecount == bytesRead - 1) {
863 byteoffset = 0;
864 } else {
865 byteoffset = bytecount + 1;
866 }
867 } else {
868 if (bytecount == bytesRead - 1) {
869 // We've reached the end of the buffer;
870 // This means we'll save whatever unused bytes left in
871 // it - i.e., the bytes between the last new line
872 // encountered and the end - in the leftover buffer.
873
874 // *EXCEPT*, there may be a case of a very long String
875 // that is actually longer than MAX_COLUMN_BUFFER, in
876 // which case it is possible that we've read through
877 // an entire buffer of bytes without finding any
878 // new lines... in this case we may need to add this
879 // entire byte buffer to an already existing leftover
880 // buffer!
881 if (leftover == null) {
882 leftover = new byte[(int)bytesRead - byteoffset];
883 System.arraycopy(columnBytes, byteoffset, leftover, 0, (int)bytesRead - byteoffset);
884 } else {
885 if (byteoffset != 0) {
886 throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!");
887 }
888 byte[] merged = new byte[leftover.length + (int)bytesRead];
889
890 System.arraycopy(leftover, 0, merged, 0, leftover.length);
891 System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int)bytesRead);
892 //leftover = null;
893 leftover = merged;
894 merged = null;
895 }
896 byteoffset = 0;
897
898 }
899 }
900 bytecount++;
901 }
902
903 bytesReadTotal += bytesRead;
904 in.clear();
905 if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) {
906 in.limit((int)(columnLength - bytesReadTotal));
907 }
908 }
909
910 fc.close();
911
912 if (caseindex != casecount) {
913 throw new IOException("Faile to read "+casecount+" tokens for column "+column);
914 //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+".");
915 }
916
917 return retVector;
918 }
919
920 private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException {
921 BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile));
922
923 byte[] offsetHeader = new byte[varcount * 8];
924 long[] byteOffsets = new long[varcount];
925
926
927 int readlen = rotfileStream.read(offsetHeader);
928
929 if (readlen != varcount * 8) {
930 throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file.");
931 }
932
933 for (int varindex = 0; varindex < varcount; varindex++) {
934 byte[] offsetBytes = new byte[8];
935 System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8);
936
937 ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
938 byteOffsets[varindex] = offsetByteBuffer.getLong();
939
940 //System.out.println(byteOffsets[varindex]);
941 }
942
943 rotfileStream.close();
944
945 return byteOffsets;
946 }
947
948 private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException {
949 String fileName = tabfile.getAbsolutePath();
950 String rotatedImageFileName = fileName + ".90d";
951 File rotatedImageFile = new File(rotatedImageFileName);
952 if (rotatedImageFile.exists()) {
953 //System.out.println("Image already exists!");
954 return rotatedImageFile;
955 }
956
957 return generateRotatedImage(tabfile, varcount, casecount);
958
959 }
960
961 private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException {
962 // TODO: throw exceptions if bad file, zero varcount, etc. ...
963
964 String fileName = tabfile.getAbsolutePath();
965 String rotatedImageFileName = fileName + ".90d";
966
967 int MAX_OUTPUT_STREAMS = 32;
968 int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now?
969 int MAX_COLUMN_BUFFER = 8 * 1024;
970
971 // offsetHeader will contain the byte offsets of the individual column
972 // vectors in the final rotated image file
973 byte[] offsetHeader = new byte[varcount * 8];
974 int[] bufferedSizes = new int[varcount];
975 long[] cachedfileSizes = new long[varcount];
976 File[] columnTempFiles = new File[varcount];
977
978 for (int i = 0; i < varcount; i++) {
979 bufferedSizes[i] = 0;
980 cachedfileSizes[i] = 0;
981 }
982
983 // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is
984 // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?)
985
986 byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER];
987
988 // read the tab-delimited file:
989
990 FileInputStream tabfileStream = new FileInputStream(tabfile);
991
992 Scanner scanner = new Scanner(tabfileStream);
993 scanner.useDelimiter("\\n");
994
995 for (int caseindex = 0; caseindex < casecount; caseindex++) {
996 if (scanner.hasNext()) {
997 String[] line = (scanner.next()).split("\t", -1);
998 // TODO: throw an exception if there are fewer tab-delimited
999 // tokens than the number of variables specified.
1000 String token = "";
1001 int tokensize = 0;
1002 for (int varindex = 0; varindex < varcount; varindex++) {
1003 // TODO: figure out the safest way to convert strings to
1004 // bytes here. Is it going to be safer to use getBytes("UTF8")?
1005 // we are already making the assumption that the values
1006 // in the tab file are in UTF8. -- L.A.
1007 token = line[varindex] + "\n";
1008 tokensize = token.getBytes().length;
1009 if (bufferedSizes[varindex]+tokensize > MAX_COLUMN_BUFFER) {
1010 // fill the buffer and dump its contents into the temp file:
1011 // (do note that there may be *several* MAX_COLUMN_BUFFERs
1012 // worth of bytes in the token!)
1013
1014 int tokenoffset = 0;
1015
1016 if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) {
1017 tokenoffset = MAX_COLUMN_BUFFER-bufferedSizes[varindex];
1018 System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset);
1019 } // (otherwise the buffer is already full, and we should
1020 // simply dump it into the temp file, without adding any
1021 // extra bytes to it)
1022
1023 File bufferTempFile = columnTempFiles[varindex];
1024 if (bufferTempFile == null) {
1025 bufferTempFile = File.createTempFile("columnBufferFile", "bytes");
1026 columnTempFiles[varindex] = bufferTempFile;
1027 }
1028
1029 // *append* the contents of the buffer to the end of the
1030 // temp file, if already exists:
1031 BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream (bufferTempFile, true));
1032 outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER);
1033 cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
1034
1035 // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into
1036 // the temp file, for as long as there's more than MAX_COLUMN_BUFFER
1037 // bytes left in the token:
1038
1039 while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) {
1040 outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER);
1041 cachedfileSizes[varindex] += MAX_COLUMN_BUFFER;
1042 tokenoffset += MAX_COLUMN_BUFFER;
1043 }
1044
1045 outputStream.close();
1046
1047 // buffer the remaining bytes and reset the buffered
1048 // byte counter:
1049
1050 System.arraycopy(token.getBytes(),
1051 tokenoffset,
1052 bufferedColumns[varindex],
1053 0,
1054 tokensize - tokenoffset);
1055
1056 bufferedSizes[varindex] = tokensize - tokenoffset;
1057
1058 } else {
1059 // continue buffering
1060 System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize);
1061 bufferedSizes[varindex] += tokensize;
1062 }
1063 }
1064 } else {
1065 scanner.close();
1066 throw new IOException("Tab file has fewer rows than the stored number of cases!");
1067 }
1068
1069 }
1070
1071 // OK, we've created the individual byte vectors of the tab file columns;
1072 // they may be partially saved in temp files and/or in memory.
1073 // We now need to go through all these buffers and create the final
1074 // rotated image file.
1075
1076 BufferedOutputStream finalOut = new BufferedOutputStream(new FileOutputStream (new File(rotatedImageFileName)));
1077
1078 // but first we should create the offset header and write it out into
1079 // the final file; because it should be at the head, doh!
1080
1081 long columnOffset = varcount * 8;
1082 // (this is the offset of the first column vector; it is equal to the
1083 // size of the offset header, i.e. varcount * 8 bytes)
1084
1085 for (int varindex = 0; varindex < varcount; varindex++) {
1086 long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex];
1087 columnOffset+=totalColumnBytes;
1088 //totalColumnBytes;
1089 byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array();
1090 System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8);
1091 }
1092
1093 finalOut.write(offsetHeader, 0, varcount * 8);
1094
1095 for (int varindex = 0; varindex < varcount; varindex++) {
1096 long cachedBytesRead = 0;
1097
1098 // check if there is a cached temp file:
1099
1100 File cachedTempFile = columnTempFiles[varindex];
1101 if (cachedTempFile != null) {
1102 byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER];
1103 BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile));
1104 int readlen = 0;
1105 while ((readlen = cachedIn.read(cachedBytes)) > -1) {
1106 finalOut.write(cachedBytes, 0, readlen);
1107 cachedBytesRead += readlen;
1108 }
1109 cachedIn.close();
1110 // delete the temp file:
1111 cachedTempFile.delete();
1112
1113 }
1114
1115 if (cachedBytesRead != cachedfileSizes[varindex]) {
1116 finalOut.close();
1117 throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+
1118 cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read.");
1119 }
1120
1121 // then check if there are any bytes buffered for this column:
1122
1123 if (bufferedSizes[varindex] > 0) {
1124 finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]);
1125 }
1126
1127 }
1128
1129 finalOut.close();
1130 return new File(rotatedImageFileName);
1131
1132 }
1133
1134 /*
1135 * Test method for taking a "rotated" image, and reversing it, reassembling
1136 * all the columns in the original order. Which should result in a file
1137 * byte-for-byte identical file to the original tab-delimited version.
1138 *
1139 * (do note that this method is not efficiently implemented; it's only
1140 * being used for experiments so far, to confirm the accuracy of the
1141 * accuracy of generateRotatedImage(). It should not be used for any
1142 * practical means in the application!)
1143 */
1144 private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException {
1145 // open the file, read in the offset header:
1146 BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile));
1147
1148 byte[] offsetHeader = new byte[varcount * 8];
1149 long[] byteOffsets = new long[varcount];
1150
1151 int readlen = rotfileStream.read(offsetHeader);
1152
1153 if (readlen != varcount * 8) {
1154 throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file.");
1155 }
1156
1157 for (int varindex = 0; varindex < varcount; varindex++) {
1158 byte[] offsetBytes = new byte[8];
1159 System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8);
1160
1161 ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes);
1162 byteOffsets[varindex] = offsetByteBuffer.getLong();
1163
1164 //System.out.println(byteOffsets[varindex]);
1165 }
1166
1167 String [][] reversedMatrix = new String[casecount][varcount];
1168
1169 long offset = varcount * 8;
1170 byte[] columnBytes;
1171
1172 for (int varindex = 0; varindex < varcount; varindex++) {
1173 long columnLength = byteOffsets[varindex] - offset;
1174
1175
1176
1177 columnBytes = new byte[(int)columnLength];
1178 readlen = rotfileStream.read(columnBytes);
1179
1180 if (readlen != columnLength) {
1181 throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex);
1182 }
1183 /*
1184 String columnString = new String(columnBytes);
1185 //System.out.print(columnString);
1186 String[] values = columnString.split("\n", -1);
1187
1188 if (values.length < casecount) {
1189 throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex);
1190 }
1191
1192 for (int caseindex = 0; caseindex < casecount; caseindex++) {
1193 reversedMatrix[caseindex][varindex] = values[caseindex];
1194 }*/
1195
1196 int bytecount = 0;
1197 int byteoffset = 0;
1198 int caseindex = 0;
1199 //System.out.println("generating value vector for column "+varindex);
1200 while (bytecount < columnLength) {
1201 if (columnBytes[bytecount] == '\n') {
1202 String token = new String(columnBytes, byteoffset, bytecount-byteoffset);
1203 reversedMatrix[caseindex++][varindex] = token;
1204 byteoffset = bytecount + 1;
1205 }
1206 bytecount++;
1207 }
1208
1209 if (caseindex != casecount) {
1210 throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex);
1211 }
1212 offset = byteOffsets[varindex];
1213 }
1214
1215 for (int caseindex = 0; caseindex < casecount; caseindex++) {
1216 for (int varindex = 0; varindex < varcount; varindex++) {
1217 System.out.print(reversedMatrix[caseindex][varindex]);
1218 if (varindex < varcount-1) {
1219 System.out.print("\t");
1220 } else {
1221 System.out.print("\n");
1222 }
1223 }
1224 }
1225
1226 rotfileStream.close();
1227
1228
1229 }
1230
1231 /**
1232 * main() method, for testing
1233 * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type
1234 * make sure the CLASSPATH contains ...
1235 *
1236 */
1237
1238 public static void main(String[] args) {
1239
1240 String tabFileName = args[0];
1241 int varcount = new Integer(args[1]).intValue();
1242 int casecount = new Integer(args[2]).intValue();
1243 int column = new Integer(args[3]).intValue();
1244 String type = args[4];
1245
1246 File tabFile = new File(tabFileName);
1247 File rotatedImageFile = null;
1248
1249 TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator();
1250
1251 /*
1252 try {
1253 rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount);
1254 } catch (IOException ex) {
1255 System.out.println(ex.getMessage());
1256 }
1257 */
1258
1259 //System.out.println("\nFinished generating \"rotated\" column image file.");
1260
1261 //System.out.println("\nOffsets:");
1262
1263 MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN);
1264 String FORMAT_IEEE754 = "%+#.15e";
1265
1266 try {
1267 //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount);
1268 //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
1269 if ("string".equals(type)) {
1270 String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount);
1271 for (int i = 0; i < casecount; i++) {
1272 System.out.println(columns[i]);
1273 }
1274 } else {
1275
1276 Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount);
1277 for (int i = 0; i < casecount; i++) {
1278 if (columns[i] != null) {
1279 BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext);
1280 System.out.println(String.format(FORMAT_IEEE754, outBigDecimal));
1281 } else {
1282 System.out.println("NA");
1283 }
1284 //System.out.println(columns[i]);
1285 }
1286 }
1287 } catch (IOException ex) {
1288 System.out.println(ex.getMessage());
1289 }
1290 }
1291 }
1292
1293