Mercurial > hg > LGDataverses
annotate src/main/java/edu/harvard/iq/dataverse/dataaccess/TabularSubsetGenerator.java @ 10:a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
| author | Zoe Hong <zhong@mpiwg-berlin.mpg.de> |
|---|---|
| date | Tue, 08 Sep 2015 17:00:21 +0200 |
| parents | |
| children |
| rev | line source |
|---|---|
|
10
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
2 Copyright (C) 2005-2012, by the President and Fellows of Harvard College. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
3 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
4 Licensed under the Apache License, Version 2.0 (the "License"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
5 you may not use this file except in compliance with the License. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
6 You may obtain a copy of the License at |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
7 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
8 http://www.apache.org/licenses/LICENSE-2.0 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
9 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
10 Unless required by applicable law or agreed to in writing, software |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
11 distributed under the License is distributed on an "AS IS" BASIS, |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
13 See the License for the specific language governing permissions and |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
14 limitations under the License. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
15 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
16 Dataverse Network - A web application to share, preserve and analyze research data. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
17 Developed at the Institute for Quantitative Social Science, Harvard University. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
18 Version 3.0. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
19 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
20 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
21 package edu.harvard.iq.dataverse.dataaccess; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
22 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
23 import edu.harvard.iq.dataverse.DataFile; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
24 import edu.harvard.iq.dataverse.datavariable.DataVariable; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
25 import java.util.*; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
26 import java.util.Scanner; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
27 import java.util.logging.*; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
28 import java.io.*; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
29 import java.io.FileNotFoundException; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
30 import java.math.BigDecimal; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
31 import java.math.MathContext; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
32 import java.math.RoundingMode; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
33 import java.nio.ByteBuffer; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
34 import java.nio.channels.FileChannel; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
35 import java.nio.file.Paths; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
36 import java.nio.file.StandardOpenOption; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
37 import java.util.regex.Matcher; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
38 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
39 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
40 import org.apache.commons.lang.*; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
41 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
42 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
43 /** |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
44 * |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
45 * @author Leonid Andreev |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
46 * original author: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
47 * @author a.sone |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
48 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
49 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
50 public class TabularSubsetGenerator implements SubsetGenerator { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
51 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
52 private static Logger dbgLog = Logger.getLogger(TabularSubsetGenerator.class.getPackage().getName()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
53 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
54 private static int COLUMN_TYPE_STRING = 1; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
55 private static int COLUMN_TYPE_LONG = 2; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
56 private static int COLUMN_TYPE_DOUBLE = 3; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
57 private static int COLUMN_TYPE_FLOAT = 4; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
58 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
59 private static int MAX_COLUMN_BUFFER = 8192; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
60 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
61 private FileChannel fileChannel = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
62 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
63 private int varcount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
64 private int casecount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
65 private int subsetcount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
66 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
67 private byte[][] columnEntries = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
68 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
69 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
70 private ByteBuffer[] columnByteBuffers; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
71 private int[] columnBufferSizes; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
72 private int[] columnBufferOffsets; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
73 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
74 private long[] columnStartOffsets; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
75 private long[] columnTotalOffsets; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
76 private long[] columnTotalLengths; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
77 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
78 public TabularSubsetGenerator() { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
79 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
80 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
81 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
82 public TabularSubsetGenerator (DataFile datafile, List<DataVariable> variables) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
83 if (!datafile.isTabularData()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
84 throw new IOException("DataFile is not tabular data."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
85 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
86 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
87 setVarCount(datafile.getDataTable().getVarQuantity().intValue()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
88 setCaseCount(datafile.getDataTable().getCaseQuantity().intValue()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
89 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
90 File tabfile = datafile.getFileSystemLocation().toFile(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
91 File rotatedImageFile = getRotatedImage(tabfile, getVarCount(), getCaseCount()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
92 long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, getVarCount(), getCaseCount()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
93 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
94 fileChannel = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
95 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
96 if (variables == null || variables.size() < 1 || variables.size() > getVarCount()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
97 throw new IOException("Illegal number of variables in the subset request"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
98 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
99 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
100 subsetcount = variables.size(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
101 columnTotalOffsets = new long[subsetcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
102 columnTotalLengths = new long[subsetcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
103 columnByteBuffers = new ByteBuffer[subsetcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
104 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
105 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
106 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
107 if (subsetcount == 1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
108 if (!datafile.getDataTable().getId().equals(variables.get(0).getDataTable().getId())) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
109 throw new IOException("Variable in the subset request does not belong to the datafile."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
110 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
111 dbgLog.fine("single variable subset; setting fileChannel position to "+extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
112 fileChannel.position(extractColumnOffset(columnEndOffsets, variables.get(0).getFileOrder())); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
113 columnTotalLengths[0] = extractColumnLength(columnEndOffsets, variables.get(0).getFileOrder()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
114 columnTotalOffsets[0] = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
115 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
116 columnEntries = new byte[subsetcount][]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
117 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
118 columnBufferSizes = new int[subsetcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
119 columnBufferOffsets = new int[subsetcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
120 columnStartOffsets = new long[subsetcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
121 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
122 int i = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
123 for (DataVariable var : variables) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
124 if (!datafile.getDataTable().getId().equals(var.getDataTable().getId())) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
125 throw new IOException("Variable in the subset request does not belong to the datafile."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
126 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
127 columnByteBuffers[i] = ByteBuffer.allocate(MAX_COLUMN_BUFFER); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
128 columnTotalLengths[i] = extractColumnLength(columnEndOffsets, var.getFileOrder()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
129 columnStartOffsets[i] = extractColumnOffset(columnEndOffsets, var.getFileOrder()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
130 if (columnTotalLengths[i] < MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
131 columnByteBuffers[i].limit((int)columnTotalLengths[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
132 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
133 fileChannel.position(columnStartOffsets[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
134 columnBufferSizes[i] = fileChannel.read(columnByteBuffers[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
135 columnBufferOffsets[i] = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
136 columnTotalOffsets[i] = columnBufferSizes[i]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
137 i++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
138 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
139 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
140 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
141 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
142 private int getVarCount() { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
143 return varcount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
144 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
145 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
146 private void setVarCount(int varcount) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
147 this.varcount = varcount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
148 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
149 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
150 private int getCaseCount() { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
151 return casecount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
152 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
153 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
154 private void setCaseCount(int casecount) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
155 this.casecount = casecount; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
156 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
157 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
158 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
159 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
160 * Note that this method operates on the *absolute* column number, i.e. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
161 * the number of the physical column in the tabular file. This is stored |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
162 * in DataVariable.FileOrder. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
163 * This "column number" should not be confused with the number of column |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
164 * in the subset request; a user can request any number of variable |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
165 * columns, in an order that doesn't have to follow the physical order |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
166 * of the columns in the file. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
167 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
168 private long extractColumnOffset(long[] columnEndOffsets, int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
169 if (columnEndOffsets == null || columnEndOffsets.length <= column) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
170 throw new IOException("Offsets table not initialized; or column out of bounds."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
171 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
172 long columnOffset; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
173 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
174 if (column > 0) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
175 columnOffset = columnEndOffsets[column - 1]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
176 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
177 columnOffset = getVarCount() * 8; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
178 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
179 return columnOffset; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
180 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
181 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
182 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
183 * See the comment for the method above. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
184 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
185 private long extractColumnLength(long[] columnEndOffsets, int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
186 if (columnEndOffsets == null || columnEndOffsets.length <= column) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
187 throw new IOException("Offsets table not initialized; or column out of bounds."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
188 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
189 long columnLength; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
190 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
191 if (column > 0) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
192 columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
193 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
194 columnLength = columnEndOffsets[0] - varcount * 8; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
195 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
196 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
197 return columnLength; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
198 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
199 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
200 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
201 private void bufferMoreColumnBytes(int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
202 if (columnTotalOffsets[column] >= columnTotalLengths[column]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
203 throw new IOException("attempt to buffer bytes past the column boundary"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
204 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
205 fileChannel.position(columnStartOffsets[column] + columnTotalOffsets[column]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
206 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
207 columnByteBuffers[column].clear(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
208 if (columnTotalLengths[column] < columnTotalOffsets[column] + MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
209 dbgLog.fine("Limiting the buffer to "+(columnTotalLengths[column] - columnTotalOffsets[column])+" bytes"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
210 columnByteBuffers[column].limit((int) (columnTotalLengths[column] - columnTotalOffsets[column])); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
211 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
212 columnBufferSizes[column] = fileChannel.read(columnByteBuffers[column]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
213 dbgLog.fine("Read "+columnBufferSizes[column]+" bytes for subset column "+column); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
214 columnBufferOffsets[column] = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
215 columnTotalOffsets[column] += columnBufferSizes[column]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
216 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
217 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
218 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
219 do not use this method! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
220 there's a high potential for the "UTF8 character split between buffers" error! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
221 public String readColumnEntry(int column) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
222 String ret = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
223 int currentbyte; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
224 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
225 if (columnBufferOffsets[column] >= columnBufferSizes[column]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
226 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
227 bufferMoreColumnBytes(column); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
228 } catch (IOException ioe) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
229 return null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
230 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
231 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
232 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
233 currentbyte = columnBufferOffsets[column]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
234 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
235 while (columnByteBuffers[column].array()[currentbyte] != '\n') { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
236 currentbyte++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
237 if (currentbyte == columnBufferSizes[column]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
238 // save the leftover: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
239 if (ret == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
240 ret = new String(columnByteBuffers[column].array(), columnBufferOffsets[column], columnBufferSizes[column] - columnBufferOffsets[column], "UTF8"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
241 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
242 ret = ret.concat(new String(columnByteBuffers[column].array(), columnBufferOffsets[column], columnBufferSizes[column] - columnBufferOffsets[column], "UTF8")); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
243 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
244 // read more bytes: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
245 bufferMoreColumnBytes(column); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
246 currentbyte = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
247 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
248 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
249 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
250 // presumably, we have found our '\n': |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
251 if (ret == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
252 ret = new String(columnByteBuffers[column].array(), columnBufferOffsets[column], currentbyte - columnBufferOffsets[column], "UTF8"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
253 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
254 ret = ret.concat(new String(columnByteBuffers[column].array(), columnBufferOffsets[column], currentbyte - columnBufferOffsets[column], "UTF8")); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
255 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
256 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
257 } catch (IOException ioe) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
258 return null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
259 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
260 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
261 columnBufferOffsets[column] += (currentbyte + 1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
262 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
263 return ret; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
264 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
265 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
266 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
267 public byte[] readColumnEntryBytes(int column) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
268 return readColumnEntryBytes(column, true); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
269 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
270 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
271 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
272 public byte[] readColumnEntryBytes(int column, boolean addTabs) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
273 byte[] leftover = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
274 byte[] ret = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
275 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
276 if (columnBufferOffsets[column] >= columnBufferSizes[column]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
277 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
278 bufferMoreColumnBytes(column); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
279 if (columnBufferSizes[column] < 1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
280 return null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
281 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
282 } catch (IOException ioe) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
283 return null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
284 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
285 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
286 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
287 int byteindex = columnBufferOffsets[column]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
288 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
289 while (columnByteBuffers[column].array()[byteindex] != '\n') { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
290 byteindex++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
291 if (byteindex == columnBufferSizes[column]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
292 // save the leftover: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
293 if (leftover == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
294 leftover = new byte[columnBufferSizes[column] - columnBufferOffsets[column]]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
295 System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], leftover, 0, columnBufferSizes[column] - columnBufferOffsets[column]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
296 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
297 byte[] merged = new byte[leftover.length + columnBufferSizes[column]]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
298 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
299 System.arraycopy(leftover, 0, merged, 0, leftover.length); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
300 System.arraycopy(columnByteBuffers[column].array(), 0, merged, leftover.length, columnBufferSizes[column]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
301 leftover = merged; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
302 merged = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
303 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
304 // read more bytes: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
305 bufferMoreColumnBytes(column); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
306 if (columnBufferSizes[column] < 1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
307 return null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
308 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
309 byteindex = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
310 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
311 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
312 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
313 // presumably, we have found our '\n': |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
314 if (leftover == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
315 ret = new byte[byteindex - columnBufferOffsets[column] + 1]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
316 System.arraycopy(columnByteBuffers[column].array(), columnBufferOffsets[column], ret, 0, byteindex - columnBufferOffsets[column] + 1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
317 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
318 ret = new byte[leftover.length + byteindex + 1]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
319 System.arraycopy(leftover, 0, ret, 0, leftover.length); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
320 System.arraycopy(columnByteBuffers[column].array(), 0, ret, leftover.length, byteindex + 1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
321 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
322 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
323 } catch (IOException ioe) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
324 return null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
325 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
326 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
327 columnBufferOffsets[column] = (byteindex + 1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
328 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
329 if (column < columnBufferOffsets.length - 1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
330 ret[ret.length - 1] = '\t'; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
331 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
332 return ret; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
333 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
334 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
335 public int readSingleColumnSubset(byte[] buffer) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
336 if (columnTotalOffsets[0] == columnTotalLengths[0]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
337 return -1; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
338 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
339 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
340 if (columnByteBuffers[0] == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
341 dbgLog.fine("allocating single column subset buffer."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
342 columnByteBuffers[0] = ByteBuffer.allocate(buffer.length); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
343 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
344 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
345 int bytesread = fileChannel.read(columnByteBuffers[0]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
346 dbgLog.fine("single column subset: read "+bytesread+" bytes."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
347 if (columnTotalOffsets[0] + bytesread > columnTotalLengths[0]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
348 bytesread = (int)(columnTotalLengths[0] - columnTotalOffsets[0]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
349 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
350 System.arraycopy(columnByteBuffers[0].array(), 0, buffer, 0, bytesread); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
351 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
352 columnTotalOffsets[0] += bytesread; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
353 columnByteBuffers[0].clear(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
354 return bytesread > 0 ? bytesread : -1; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
355 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
356 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
357 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
358 public byte[] readSubsetLineBytes() throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
359 byte[] ret = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
360 int total = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
361 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
362 for (int i = 0; i < subsetcount; i++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
363 columnEntries[i] = readColumnEntryBytes(i); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
364 if (columnEntries[i] == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
365 throw new IOException("Failed to read subset line entry"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
366 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
367 total += columnEntries[i].length; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
368 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
369 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
370 ret = new byte[total]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
371 int offset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
372 for (int i = 0; i < subsetcount; i++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
373 System.arraycopy(columnEntries[i], 0, ret, offset, columnEntries[i].length); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
374 offset += columnEntries[i].length; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
375 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
376 dbgLog.fine("line: "+new String(ret)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
377 return ret; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
378 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
379 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
380 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
381 public void close() { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
382 if (fileChannel != null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
383 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
384 fileChannel.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
385 } catch (IOException ioe) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
386 // don't care. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
387 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
388 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
389 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
390 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
391 public void subsetFile(String infile, String outfile, Set<Integer> columns, Long numCases) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
392 subsetFile(infile, outfile, columns, numCases, "\t"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
393 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
394 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
395 public void subsetFile(String infile, String outfile, Set<Integer> columns, Long numCases, |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
396 String delimiter) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
397 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
398 subsetFile(new FileInputStream(new File(infile)), outfile, columns, numCases, delimiter); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
399 } catch (IOException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
400 throw new RuntimeException("Could not open file "+infile); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
401 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
402 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
403 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
404 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
405 public void subsetFile(InputStream in, String outfile, Set<Integer> columns, Long numCases, |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
406 String delimiter) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
407 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
408 Scanner scanner = new Scanner(in); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
409 scanner.useDelimiter("\\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
410 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
411 BufferedWriter out = new BufferedWriter(new FileWriter(outfile)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
412 for (long caseIndex = 0; caseIndex < numCases; caseIndex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
413 if (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
414 String[] line = (scanner.next()).split(delimiter,-1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
415 List<String> ln = new ArrayList<String>(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
416 for (Integer i : columns) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
417 ln.add(line[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
418 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
419 out.write(StringUtils.join(ln,"\t")+"\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
420 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
421 throw new RuntimeException("Tab file has fewer rows than the determined number of cases."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
422 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
423 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
424 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
425 while (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
426 if (!"".equals(scanner.next()) ) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
427 throw new RuntimeException("Tab file has extra nonempty rows than the determined number of cases."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
428 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
429 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
430 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
431 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
432 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
433 out.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
434 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
435 } catch (FileNotFoundException e) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
436 e.printStackTrace(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
437 } catch (IOException e) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
438 e.printStackTrace(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
439 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
440 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
441 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
442 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
443 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
444 * Straightforward method for subsetting a column; inefficient on large |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
445 * files, OK to use on small files: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
446 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
447 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
448 public static Double[] subsetDoubleVector(InputStream in, int column, int numCases) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
449 Double[] retVector = new Double[numCases]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
450 Scanner scanner = new Scanner(in); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
451 scanner.useDelimiter("\\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
452 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
453 for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
454 if (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
455 String[] line = (scanner.next()).split("\t", -1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
456 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
457 retVector[caseIndex] = new Double(line[column]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
458 } catch (NumberFormatException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
459 retVector[caseIndex] = null; // missing value |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
460 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
461 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
462 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
463 throw new RuntimeException("Tab file has fewer rows than the stored number of cases!"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
464 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
465 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
466 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
467 int tailIndex = numCases; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
468 while (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
469 String nextLine = scanner.next(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
470 if (!"".equals(nextLine)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
471 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
472 throw new RuntimeException("Column "+column+": tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
473 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
474 tailIndex++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
475 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
476 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
477 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
478 return retVector; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
479 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
480 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
481 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
482 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
483 * Straightforward method for subsetting a tab-delimited data file, extracting |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
484 * all the columns representing continuous variables and returning them as |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
485 * a 2-dimensional array of Doubles; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
486 * Inefficient on large files, OK to use on small ones. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
487 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
488 public static Double[][] subsetDoubleVectors(InputStream in, Set<Integer> columns, int numCases) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
489 Double[][] retVector = new Double[columns.size()][numCases]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
490 Scanner scanner = new Scanner(in); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
491 scanner.useDelimiter("\\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
492 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
493 for (int caseIndex = 0; caseIndex < numCases; caseIndex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
494 if (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
495 String[] line = (scanner.next()).split("\t", -1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
496 int j = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
497 for (Integer i : columns) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
498 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
499 // TODO: verify that NaN and +-Inf are going to be |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
500 // handled correctly here! -- L.A. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
501 // NO, "+-Inf" is not handled correctly; see the |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
502 // comment further down below. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
503 retVector[j][caseIndex] = new Double(line[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
504 } catch (NumberFormatException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
505 retVector[j][caseIndex] = null; // missing value |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
506 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
507 j++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
508 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
509 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
510 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
511 throw new IOException("Tab file has fewer rows than the stored number of cases!"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
512 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
513 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
514 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
515 int tailIndex = numCases; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
516 while (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
517 String nextLine = scanner.next(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
518 if (!"".equals(nextLine)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
519 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
520 throw new IOException("Tab file has more nonempty rows than the stored number of cases ("+numCases+")! current index: "+tailIndex+", line: "+nextLine); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
521 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
522 tailIndex++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
523 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
524 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
525 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
526 return retVector; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
527 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
528 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
529 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
530 public String[] subsetStringVector(DataFile datafile, int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
531 return (String[])subsetObjectVector(datafile, column, COLUMN_TYPE_STRING); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
532 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
533 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
534 public Double[] subsetDoubleVector(DataFile datafile, int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
535 return (Double[])subsetObjectVector(datafile, column, COLUMN_TYPE_DOUBLE); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
536 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
537 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
538 public Long[] subsetLongVector(DataFile datafile, int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
539 return (Long[])subsetObjectVector(datafile, column, COLUMN_TYPE_LONG); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
540 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
541 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
542 // Float methods are temporary; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
543 // In normal operations we'll be treating all the floating point types as |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
544 // doubles. I need to be able to handle floats for some 4.0 vs 3.* ingest |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
545 // tests. -- L.A. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
546 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
547 public Float[] subsetFloatVector(DataFile datafile, int column) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
548 return (Float[])subsetObjectVector(datafile, column, COLUMN_TYPE_FLOAT); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
549 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
550 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
551 public String[] subsetStringVector(File tabfile, int column, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
552 return (String[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_STRING); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
553 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
554 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
555 public Double[] subsetDoubleVector(File tabfile, int column, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
556 return (Double[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_DOUBLE); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
557 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
558 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
559 public Long[] subsetLongVector(File tabfile, int column, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
560 return (Long[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_LONG); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
561 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
562 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
563 public Float[] subsetFloatVector(File tabfile, int column, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
564 return (Float[])subsetObjectVector(tabfile, column, varcount, casecount, COLUMN_TYPE_FLOAT); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
565 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
566 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
567 public Object[] subsetObjectVector(DataFile dataFile, int column, int columntype) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
568 if (!dataFile.isTabularData()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
569 throw new IOException("DataFile is not tabular data."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
570 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
571 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
572 int varcount = dataFile.getDataTable().getVarQuantity().intValue(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
573 int casecount = dataFile.getDataTable().getCaseQuantity().intValue(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
574 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
575 if (column >= varcount) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
576 throw new IOException("Column "+column+" is out of bounds."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
577 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
578 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
579 File tabfile = dataFile.getFileSystemLocation().toFile(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
580 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
581 if (columntype == COLUMN_TYPE_STRING) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
582 String filename = dataFile.getFileMetadata().getLabel(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
583 if (filename != null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
584 filename = filename.replaceFirst("^_", ""); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
585 Integer fnumvalue = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
586 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
587 fnumvalue = new Integer(filename); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
588 } catch (Exception ex){ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
589 fnumvalue = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
590 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
591 if (fnumvalue != null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
592 //if ((fnumvalue.intValue() < 112497)) { // && (fnumvalue.intValue() > 60015)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
593 if ((fnumvalue.intValue() < 111931)) { // && (fnumvalue.intValue() > 60015)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
594 if (!(fnumvalue.intValue() == 60007 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
595 || fnumvalue.intValue() == 59997 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
596 || fnumvalue.intValue() == 60015 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
597 || fnumvalue.intValue() == 59948 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
598 || fnumvalue.intValue() == 60012 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
599 || fnumvalue.intValue() == 52585 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
600 || fnumvalue.intValue() == 60005 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
601 || fnumvalue.intValue() == 60002 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
602 || fnumvalue.intValue() == 59954 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
603 || fnumvalue.intValue() == 60008 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
604 || fnumvalue.intValue() == 54972 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
605 || fnumvalue.intValue() == 55010 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
606 || fnumvalue.intValue() == 54996 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
607 || fnumvalue.intValue() == 53527 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
608 || fnumvalue.intValue() == 53546 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
609 || fnumvalue.intValue() == 55002 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
610 || fnumvalue.intValue() == 55006 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
611 || fnumvalue.intValue() == 54998 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
612 || fnumvalue.intValue() == 52552 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
613 // SPSS/SAV cases with similar issue - compat mode must be disabled |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
614 //|| fnumvalue.intValue() == 101826 // temporary - tricky file with accents and v. 16... |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
615 || fnumvalue.intValue() == 54618 // another SAV file, with long strings... |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
616 || fnumvalue.intValue() == 54619 // [same] |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
617 || fnumvalue.intValue() == 57983 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
618 || fnumvalue.intValue() == 58262 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
619 || fnumvalue.intValue() == 58288 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
620 || fnumvalue.intValue() == 58656 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
621 || fnumvalue.intValue() == 59144 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
622 // || fnumvalue.intValue() == 69626 [nope!] |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
623 )) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
624 dbgLog.info("\"Old\" file name detected; using \"compatibility mode\" for a character vector subset;"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
625 return subsetObjectVector(tabfile, column, varcount, casecount, columntype, true); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
626 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
627 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
628 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
629 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
630 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
631 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
632 return subsetObjectVector(tabfile, column, varcount, casecount, columntype); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
633 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
634 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
635 public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
636 return subsetObjectVector(tabfile, column, varcount, casecount, columntype, false); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
637 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
638 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
639 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
640 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
641 public Object[] subsetObjectVector(File tabfile, int column, int varcount, int casecount, int columntype, boolean compatmode) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
642 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
643 Object[] retVector = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
644 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
645 boolean isString = false; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
646 boolean isDouble = false; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
647 boolean isLong = false; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
648 boolean isFloat = false; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
649 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
650 //Locale loc = new Locale("en", "US"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
651 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
652 if (columntype == COLUMN_TYPE_STRING) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
653 isString = true; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
654 retVector = new String[casecount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
655 } else if (columntype == COLUMN_TYPE_DOUBLE) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
656 isDouble = true; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
657 retVector = new Double[casecount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
658 } else if (columntype == COLUMN_TYPE_LONG) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
659 isLong = true; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
660 retVector = new Long[casecount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
661 } else if (columntype == COLUMN_TYPE_FLOAT){ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
662 isFloat = true; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
663 retVector = new Float[casecount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
664 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
665 throw new IOException("Unsupported column type: "+columntype); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
666 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
667 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
668 File rotatedImageFile = getRotatedImage(tabfile, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
669 long[] columnEndOffsets = extractColumnOffsets(rotatedImageFile, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
670 long columnOffset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
671 long columnLength = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
672 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
673 if (column > 0) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
674 columnOffset = columnEndOffsets[column - 1]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
675 columnLength = columnEndOffsets[column] - columnEndOffsets[column - 1]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
676 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
677 columnOffset = varcount * 8; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
678 columnLength = columnEndOffsets[0] - varcount * 8; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
679 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
680 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
681 FileChannel fc = (FileChannel.open(Paths.get(rotatedImageFile.getAbsolutePath()), StandardOpenOption.READ)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
682 fc.position(columnOffset); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
683 int MAX_COLUMN_BUFFER = 8192; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
684 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
685 ByteBuffer in = ByteBuffer.allocate(MAX_COLUMN_BUFFER); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
686 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
687 if (columnLength < MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
688 in.limit((int)(columnLength)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
689 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
690 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
691 long bytesRead = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
692 long bytesReadTotal = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
693 int caseindex = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
694 int byteoffset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
695 byte[] leftover = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
696 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
697 while (bytesReadTotal < columnLength) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
698 bytesRead = fc.read(in); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
699 byte[] columnBytes = in.array(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
700 int bytecount = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
701 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
702 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
703 while (bytecount < bytesRead) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
704 if (columnBytes[bytecount] == '\n') { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
705 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
706 String token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
707 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
708 if (leftover != null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
709 String leftoverString = new String (leftover, "UTF8"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
710 token = leftoverString + token; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
711 leftover = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
712 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
713 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
714 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
715 * Note that the way I was doing it at first - above - |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
716 * was not quite the correct way - because I was creating UTF8 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
717 * strings from the leftover bytes, and the bytes in the |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
718 * current buffer *separately*; which means, if a multi-byte |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
719 * UTF8 character got split in the middle between one buffer |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
720 * and the next, both chunks of it would become junk |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
721 * characters, on each side! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
722 * The correct way of doing it, of course, is to create a |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
723 * merged byte buffer, and then turn it into a UTF8 string. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
724 * -- L.A. 4.0 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
725 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
726 String token = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
727 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
728 if (leftover == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
729 token = new String(columnBytes, byteoffset, bytecount-byteoffset, "UTF8"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
730 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
731 byte[] merged = new byte[leftover.length + bytecount-byteoffset]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
732 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
733 System.arraycopy(leftover, 0, merged, 0, leftover.length); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
734 System.arraycopy(columnBytes, byteoffset, merged, leftover.length, bytecount-byteoffset); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
735 token = new String (merged, "UTF8"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
736 leftover = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
737 merged = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
738 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
739 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
740 if (isString) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
741 if ("".equals(token)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
742 // An empty string is a string missing value! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
743 // An empty string in quotes is an empty string! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
744 retVector[caseindex] = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
745 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
746 // Strip the outer quotes: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
747 token = token.replaceFirst("^\\\"", ""); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
748 token = token.replaceFirst("\\\"$", ""); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
749 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
750 // We need to restore the special characters that |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
751 // are stored in tab files escaped - quotes, new lines |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
752 // and tabs. Before we do that however, we need to |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
753 // take care of any escaped backslashes stored in |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
754 // the tab file. I.e., "foo\t" should be transformed |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
755 // to "foo<TAB>"; but "foo\\t" should be transformed |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
756 // to "foo\t". This way new lines and tabs that were |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
757 // already escaped in the original data are not |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
758 // going to be transformed to unescaped tab and |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
759 // new line characters! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
760 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
761 String[] splitTokens = token.split(Matcher.quoteReplacement("\\\\"), -2); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
762 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
763 // (note that it's important to use the 2-argument version |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
764 // of String.split(), and set the limit argument to a |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
765 // negative value; otherwise any trailing backslashes |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
766 // are lost.) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
767 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
768 for (int i = 0; i < splitTokens.length; i++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
769 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\\""), "\""); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
770 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\t"), "\t"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
771 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\n"), "\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
772 splitTokens[i] = splitTokens[i].replaceAll(Matcher.quoteReplacement("\\r"), "\r"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
773 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
774 // TODO: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
775 // Make (some of?) the above optional; for ex., we |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
776 // do need to restore the newlines when calculating UNFs; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
777 // But if we are subsetting these vectors in order to |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
778 // create a new tab-delimited file, they will |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
779 // actually break things! -- L.A. Jul. 28 2014 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
780 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
781 token = StringUtils.join(splitTokens, '\\'); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
782 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
783 // "compatibility mode" - a hack, to be able to produce |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
784 // unfs identical to those produced by the "early" |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
785 // unf5 jar; will be removed in production 4.0. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
786 // -- L.A. (TODO: ...) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
787 if (compatmode && !"".equals(token)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
788 if (token.length() > 128) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
789 if ("".equals(token.trim())) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
790 // don't ask... |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
791 token = token.substring(0, 129); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
792 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
793 token = token.substring(0, 128); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
794 //token = String.format(loc, "%.128s", token); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
795 token = token.trim(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
796 //dbgLog.info("formatted and trimmed: "+token); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
797 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
798 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
799 if ("".equals(token.trim())) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
800 // again, don't ask; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
801 // - this replicates some bugginness |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
802 // that happens inside unf5; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
803 token = "null"; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
804 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
805 token = token.trim(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
806 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
807 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
808 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
809 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
810 retVector[caseindex] = token; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
811 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
812 } else if (isDouble) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
813 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
814 // TODO: verify that NaN and +-Inf are |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
815 // handled correctly here! -- L.A. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
816 // Verified: new Double("nan") works correctly, |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
817 // resulting in Double.NaN; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
818 // Double("[+-]Inf") doesn't work however; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
819 // (the constructor appears to be expecting it |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
820 // to be spelled as "Infinity", "-Infinity", etc. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
821 if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
822 retVector[caseindex] = java.lang.Double.POSITIVE_INFINITY; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
823 } else if ("-inf".equalsIgnoreCase(token)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
824 retVector[caseindex] = java.lang.Double.NEGATIVE_INFINITY; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
825 } else if (token == null || token.equals("")) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
826 // missing value: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
827 retVector[caseindex] = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
828 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
829 retVector[caseindex] = new Double(token); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
830 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
831 } catch (NumberFormatException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
832 dbgLog.warning("NumberFormatException thrown for "+token+" as Double"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
833 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
834 retVector[caseindex] = null; // missing value |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
835 // TODO: ? |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
836 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
837 } else if (isLong) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
838 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
839 retVector[caseindex] = new Long(token); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
840 } catch (NumberFormatException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
841 retVector[caseindex] = null; // assume missing value |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
842 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
843 } else if (isFloat) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
844 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
845 if ("inf".equalsIgnoreCase(token) || "+inf".equalsIgnoreCase(token)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
846 retVector[caseindex] = java.lang.Float.POSITIVE_INFINITY; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
847 } else if ("-inf".equalsIgnoreCase(token)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
848 retVector[caseindex] = java.lang.Float.NEGATIVE_INFINITY; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
849 } else if (token == null || token.equals("")) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
850 // missing value: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
851 retVector[caseindex] = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
852 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
853 retVector[caseindex] = new Float(token); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
854 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
855 } catch (NumberFormatException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
856 dbgLog.warning("NumberFormatException thrown for "+token+" as Float"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
857 retVector[caseindex] = null; // assume missing value (TODO: ?) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
858 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
859 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
860 caseindex++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
861 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
862 if (bytecount == bytesRead - 1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
863 byteoffset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
864 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
865 byteoffset = bytecount + 1; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
866 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
867 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
868 if (bytecount == bytesRead - 1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
869 // We've reached the end of the buffer; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
870 // This means we'll save whatever unused bytes left in |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
871 // it - i.e., the bytes between the last new line |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
872 // encountered and the end - in the leftover buffer. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
873 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
874 // *EXCEPT*, there may be a case of a very long String |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
875 // that is actually longer than MAX_COLUMN_BUFFER, in |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
876 // which case it is possible that we've read through |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
877 // an entire buffer of bytes without finding any |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
878 // new lines... in this case we may need to add this |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
879 // entire byte buffer to an already existing leftover |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
880 // buffer! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
881 if (leftover == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
882 leftover = new byte[(int)bytesRead - byteoffset]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
883 System.arraycopy(columnBytes, byteoffset, leftover, 0, (int)bytesRead - byteoffset); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
884 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
885 if (byteoffset != 0) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
886 throw new IOException("Reached the end of the byte buffer, with some leftover left from the last read; yet the offset is not zero!"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
887 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
888 byte[] merged = new byte[leftover.length + (int)bytesRead]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
889 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
890 System.arraycopy(leftover, 0, merged, 0, leftover.length); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
891 System.arraycopy(columnBytes, byteoffset, merged, leftover.length, (int)bytesRead); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
892 //leftover = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
893 leftover = merged; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
894 merged = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
895 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
896 byteoffset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
897 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
898 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
899 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
900 bytecount++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
901 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
902 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
903 bytesReadTotal += bytesRead; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
904 in.clear(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
905 if (columnLength - bytesReadTotal < MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
906 in.limit((int)(columnLength - bytesReadTotal)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
907 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
908 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
909 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
910 fc.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
911 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
912 if (caseindex != casecount) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
913 throw new IOException("Faile to read "+casecount+" tokens for column "+column); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
914 //System.out.println("read "+caseindex+" tokens instead of expected "+casecount+"."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
915 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
916 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
917 return retVector; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
918 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
919 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
920 private long[] extractColumnOffsets (File rotatedImageFile, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
921 BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotatedImageFile)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
922 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
923 byte[] offsetHeader = new byte[varcount * 8]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
924 long[] byteOffsets = new long[varcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
925 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
926 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
927 int readlen = rotfileStream.read(offsetHeader); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
928 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
929 if (readlen != varcount * 8) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
930 throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
931 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
932 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
933 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
934 byte[] offsetBytes = new byte[8]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
935 System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
936 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
937 ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
938 byteOffsets[varindex] = offsetByteBuffer.getLong(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
939 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
940 //System.out.println(byteOffsets[varindex]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
941 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
942 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
943 rotfileStream.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
944 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
945 return byteOffsets; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
946 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
947 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
948 private File getRotatedImage(File tabfile, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
949 String fileName = tabfile.getAbsolutePath(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
950 String rotatedImageFileName = fileName + ".90d"; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
951 File rotatedImageFile = new File(rotatedImageFileName); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
952 if (rotatedImageFile.exists()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
953 //System.out.println("Image already exists!"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
954 return rotatedImageFile; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
955 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
956 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
957 return generateRotatedImage(tabfile, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
958 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
959 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
960 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
961 private File generateRotatedImage (File tabfile, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
962 // TODO: throw exceptions if bad file, zero varcount, etc. ... |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
963 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
964 String fileName = tabfile.getAbsolutePath(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
965 String rotatedImageFileName = fileName + ".90d"; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
966 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
967 int MAX_OUTPUT_STREAMS = 32; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
968 int MAX_BUFFERED_BYTES = 10 * 1024 * 1024; // 10 MB - for now? |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
969 int MAX_COLUMN_BUFFER = 8 * 1024; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
970 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
971 // offsetHeader will contain the byte offsets of the individual column |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
972 // vectors in the final rotated image file |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
973 byte[] offsetHeader = new byte[varcount * 8]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
974 int[] bufferedSizes = new int[varcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
975 long[] cachedfileSizes = new long[varcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
976 File[] columnTempFiles = new File[varcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
977 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
978 for (int i = 0; i < varcount; i++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
979 bufferedSizes[i] = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
980 cachedfileSizes[i] = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
981 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
982 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
983 // TODO: adjust MAX_COLUMN_BUFFER here, so that the total size is |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
984 // no more than MAX_BUFFERED_BYTES (but no less than 1024 maybe?) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
985 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
986 byte[][] bufferedColumns = new byte [varcount][MAX_COLUMN_BUFFER]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
987 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
988 // read the tab-delimited file: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
989 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
990 FileInputStream tabfileStream = new FileInputStream(tabfile); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
991 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
992 Scanner scanner = new Scanner(tabfileStream); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
993 scanner.useDelimiter("\\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
994 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
995 for (int caseindex = 0; caseindex < casecount; caseindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
996 if (scanner.hasNext()) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
997 String[] line = (scanner.next()).split("\t", -1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
998 // TODO: throw an exception if there are fewer tab-delimited |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
999 // tokens than the number of variables specified. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1000 String token = ""; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1001 int tokensize = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1002 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1003 // TODO: figure out the safest way to convert strings to |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1004 // bytes here. Is it going to be safer to use getBytes("UTF8")? |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1005 // we are already making the assumption that the values |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1006 // in the tab file are in UTF8. -- L.A. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1007 token = line[varindex] + "\n"; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1008 tokensize = token.getBytes().length; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1009 if (bufferedSizes[varindex]+tokensize > MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1010 // fill the buffer and dump its contents into the temp file: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1011 // (do note that there may be *several* MAX_COLUMN_BUFFERs |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1012 // worth of bytes in the token!) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1013 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1014 int tokenoffset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1015 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1016 if (bufferedSizes[varindex] != MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1017 tokenoffset = MAX_COLUMN_BUFFER-bufferedSizes[varindex]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1018 System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokenoffset); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1019 } // (otherwise the buffer is already full, and we should |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1020 // simply dump it into the temp file, without adding any |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1021 // extra bytes to it) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1022 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1023 File bufferTempFile = columnTempFiles[varindex]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1024 if (bufferTempFile == null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1025 bufferTempFile = File.createTempFile("columnBufferFile", "bytes"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1026 columnTempFiles[varindex] = bufferTempFile; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1027 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1028 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1029 // *append* the contents of the buffer to the end of the |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1030 // temp file, if already exists: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1031 BufferedOutputStream outputStream = new BufferedOutputStream(new FileOutputStream (bufferTempFile, true)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1032 outputStream.write(bufferedColumns[varindex], 0, MAX_COLUMN_BUFFER); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1033 cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1034 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1035 // keep writing MAX_COLUMN_BUFFER-size chunks of bytes into |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1036 // the temp file, for as long as there's more than MAX_COLUMN_BUFFER |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1037 // bytes left in the token: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1038 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1039 while (tokensize - tokenoffset > MAX_COLUMN_BUFFER) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1040 outputStream.write(token.getBytes(), tokenoffset, MAX_COLUMN_BUFFER); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1041 cachedfileSizes[varindex] += MAX_COLUMN_BUFFER; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1042 tokenoffset += MAX_COLUMN_BUFFER; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1043 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1044 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1045 outputStream.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1046 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1047 // buffer the remaining bytes and reset the buffered |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1048 // byte counter: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1049 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1050 System.arraycopy(token.getBytes(), |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1051 tokenoffset, |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1052 bufferedColumns[varindex], |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1053 0, |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1054 tokensize - tokenoffset); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1055 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1056 bufferedSizes[varindex] = tokensize - tokenoffset; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1057 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1058 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1059 // continue buffering |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1060 System.arraycopy(token.getBytes(), 0, bufferedColumns[varindex], bufferedSizes[varindex], tokensize); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1061 bufferedSizes[varindex] += tokensize; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1062 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1063 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1064 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1065 scanner.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1066 throw new IOException("Tab file has fewer rows than the stored number of cases!"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1067 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1068 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1069 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1070 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1071 // OK, we've created the individual byte vectors of the tab file columns; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1072 // they may be partially saved in temp files and/or in memory. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1073 // We now need to go through all these buffers and create the final |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1074 // rotated image file. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1075 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1076 BufferedOutputStream finalOut = new BufferedOutputStream(new FileOutputStream (new File(rotatedImageFileName))); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1077 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1078 // but first we should create the offset header and write it out into |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1079 // the final file; because it should be at the head, doh! |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1080 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1081 long columnOffset = varcount * 8; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1082 // (this is the offset of the first column vector; it is equal to the |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1083 // size of the offset header, i.e. varcount * 8 bytes) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1084 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1085 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1086 long totalColumnBytes = cachedfileSizes[varindex] + bufferedSizes[varindex]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1087 columnOffset+=totalColumnBytes; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1088 //totalColumnBytes; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1089 byte[] columnOffsetByteArray = ByteBuffer.allocate(8).putLong(columnOffset).array(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1090 System.arraycopy(columnOffsetByteArray, 0, offsetHeader, varindex * 8, 8); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1091 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1092 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1093 finalOut.write(offsetHeader, 0, varcount * 8); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1094 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1095 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1096 long cachedBytesRead = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1097 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1098 // check if there is a cached temp file: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1099 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1100 File cachedTempFile = columnTempFiles[varindex]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1101 if (cachedTempFile != null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1102 byte[] cachedBytes = new byte[MAX_COLUMN_BUFFER]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1103 BufferedInputStream cachedIn = new BufferedInputStream(new FileInputStream(cachedTempFile)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1104 int readlen = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1105 while ((readlen = cachedIn.read(cachedBytes)) > -1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1106 finalOut.write(cachedBytes, 0, readlen); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1107 cachedBytesRead += readlen; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1108 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1109 cachedIn.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1110 // delete the temp file: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1111 cachedTempFile.delete(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1112 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1113 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1114 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1115 if (cachedBytesRead != cachedfileSizes[varindex]) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1116 finalOut.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1117 throw new IOException("Could not read the correct number of bytes cached for column "+varindex+"; "+ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1118 cachedfileSizes[varindex] + " bytes expected, "+cachedBytesRead+" read."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1119 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1120 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1121 // then check if there are any bytes buffered for this column: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1122 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1123 if (bufferedSizes[varindex] > 0) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1124 finalOut.write(bufferedColumns[varindex], 0, bufferedSizes[varindex]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1125 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1126 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1127 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1128 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1129 finalOut.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1130 return new File(rotatedImageFileName); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1131 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1132 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1133 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1134 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1135 * Test method for taking a "rotated" image, and reversing it, reassembling |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1136 * all the columns in the original order. Which should result in a file |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1137 * byte-for-byte identical file to the original tab-delimited version. |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1138 * |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1139 * (do note that this method is not efficiently implemented; it's only |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1140 * being used for experiments so far, to confirm the accuracy of the |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1141 * accuracy of generateRotatedImage(). It should not be used for any |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1142 * practical means in the application!) |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1143 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1144 private void reverseRotatedImage (File rotfile, int varcount, int casecount) throws IOException { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1145 // open the file, read in the offset header: |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1146 BufferedInputStream rotfileStream = new BufferedInputStream(new FileInputStream(rotfile)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1147 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1148 byte[] offsetHeader = new byte[varcount * 8]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1149 long[] byteOffsets = new long[varcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1150 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1151 int readlen = rotfileStream.read(offsetHeader); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1152 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1153 if (readlen != varcount * 8) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1154 throw new IOException ("Could not read "+varcount*8+" header bytes from the rotated file."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1155 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1156 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1157 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1158 byte[] offsetBytes = new byte[8]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1159 System.arraycopy(offsetHeader, varindex*8, offsetBytes, 0, 8); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1160 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1161 ByteBuffer offsetByteBuffer = ByteBuffer.wrap(offsetBytes); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1162 byteOffsets[varindex] = offsetByteBuffer.getLong(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1163 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1164 //System.out.println(byteOffsets[varindex]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1165 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1166 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1167 String [][] reversedMatrix = new String[casecount][varcount]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1168 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1169 long offset = varcount * 8; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1170 byte[] columnBytes; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1171 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1172 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1173 long columnLength = byteOffsets[varindex] - offset; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1174 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1175 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1176 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1177 columnBytes = new byte[(int)columnLength]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1178 readlen = rotfileStream.read(columnBytes); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1179 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1180 if (readlen != columnLength) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1181 throw new IOException ("Could not read "+columnBytes+" bytes for column "+varindex); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1182 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1183 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1184 String columnString = new String(columnBytes); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1185 //System.out.print(columnString); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1186 String[] values = columnString.split("\n", -1); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1187 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1188 if (values.length < casecount) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1189 throw new IOException("count mismatch: "+values.length+" tokens found for column "+varindex); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1190 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1191 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1192 for (int caseindex = 0; caseindex < casecount; caseindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1193 reversedMatrix[caseindex][varindex] = values[caseindex]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1194 }*/ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1195 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1196 int bytecount = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1197 int byteoffset = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1198 int caseindex = 0; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1199 //System.out.println("generating value vector for column "+varindex); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1200 while (bytecount < columnLength) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1201 if (columnBytes[bytecount] == '\n') { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1202 String token = new String(columnBytes, byteoffset, bytecount-byteoffset); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1203 reversedMatrix[caseindex++][varindex] = token; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1204 byteoffset = bytecount + 1; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1205 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1206 bytecount++; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1207 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1208 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1209 if (caseindex != casecount) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1210 throw new IOException("count mismatch: "+caseindex+" tokens found for column "+varindex); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1211 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1212 offset = byteOffsets[varindex]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1213 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1214 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1215 for (int caseindex = 0; caseindex < casecount; caseindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1216 for (int varindex = 0; varindex < varcount; varindex++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1217 System.out.print(reversedMatrix[caseindex][varindex]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1218 if (varindex < varcount-1) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1219 System.out.print("\t"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1220 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1221 System.out.print("\n"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1222 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1223 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1224 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1225 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1226 rotfileStream.close(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1227 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1228 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1229 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1230 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1231 /** |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1232 * main() method, for testing |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1233 * usage: java edu.harvard.iq.dataverse.dataaccess.TabularSubsetGenerator testfile.tab varcount casecount column type |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1234 * make sure the CLASSPATH contains ... |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1235 * |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1236 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1237 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1238 public static void main(String[] args) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1239 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1240 String tabFileName = args[0]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1241 int varcount = new Integer(args[1]).intValue(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1242 int casecount = new Integer(args[2]).intValue(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1243 int column = new Integer(args[3]).intValue(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1244 String type = args[4]; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1245 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1246 File tabFile = new File(tabFileName); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1247 File rotatedImageFile = null; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1248 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1249 TabularSubsetGenerator subsetGenerator = new TabularSubsetGenerator(); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1250 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1251 /* |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1252 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1253 rotatedImageFile = subsetGenerator.getRotatedImage(tabFile, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1254 } catch (IOException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1255 System.out.println(ex.getMessage()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1256 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1257 */ |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1258 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1259 //System.out.println("\nFinished generating \"rotated\" column image file."); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1260 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1261 //System.out.println("\nOffsets:"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1262 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1263 MathContext doubleMathContext = new MathContext(15, RoundingMode.HALF_EVEN); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1264 String FORMAT_IEEE754 = "%+#.15e"; |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1265 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1266 try { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1267 //subsetGenerator.reverseRotatedImage(rotatedImageFile, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1268 //String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1269 if ("string".equals(type)) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1270 String[] columns = subsetGenerator.subsetStringVector(tabFile, column, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1271 for (int i = 0; i < casecount; i++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1272 System.out.println(columns[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1273 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1274 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1275 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1276 Double[] columns = subsetGenerator.subsetDoubleVector(tabFile, column, varcount, casecount); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1277 for (int i = 0; i < casecount; i++) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1278 if (columns[i] != null) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1279 BigDecimal outBigDecimal = new BigDecimal(columns[i], doubleMathContext); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1280 System.out.println(String.format(FORMAT_IEEE754, outBigDecimal)); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1281 } else { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1282 System.out.println("NA"); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1283 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1284 //System.out.println(columns[i]); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1285 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1286 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1287 } catch (IOException ex) { |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1288 System.out.println(ex.getMessage()); |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1289 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1290 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1291 } |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1292 |
|
a50cf11e5178
Rewrite LGDataverse completely upgrading to dataverse4.0
Zoe Hong <zhong@mpiwg-berlin.mpg.de>
parents:
diff
changeset
|
1293 |
