annotate src/main/java/de/mpiwg/itgroup/ismi/utils/NaturalOrderComparator.java @ 188:34ac2e1b323a

new Clean UI to find non-public digitalizations of public codices and make them public.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Tue, 06 Nov 2018 20:05:49 +0100
parents 2e911857a759
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
jurzua
parents:
diff changeset
1 package de.mpiwg.itgroup.ismi.utils;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 /*
jurzua
parents:
diff changeset
4 NaturalOrderComparator.java -- Perform 'natural order' comparisons of strings in Java.
jurzua
parents:
diff changeset
5 Copyright (C) 2003 by Pierre-Luc Paour <natorder@paour.com>
jurzua
parents:
diff changeset
6
jurzua
parents:
diff changeset
7 Based on the C version by Martin Pool, of which this is more or less a straight conversion.
jurzua
parents:
diff changeset
8 Copyright (C) 2000 by Martin Pool <mbp@humbug.org.au>
jurzua
parents:
diff changeset
9
jurzua
parents:
diff changeset
10 This software is provided 'as-is', without any express or implied
jurzua
parents:
diff changeset
11 warranty. In no event will the authors be held liable for any damages
jurzua
parents:
diff changeset
12 arising from the use of this software.
jurzua
parents:
diff changeset
13
jurzua
parents:
diff changeset
14 Permission is granted to anyone to use this software for any purpose,
jurzua
parents:
diff changeset
15 including commercial applications, and to alter it and redistribute it
jurzua
parents:
diff changeset
16 freely, subject to the following restrictions:
jurzua
parents:
diff changeset
17
jurzua
parents:
diff changeset
18 1. The origin of this software must not be misrepresented; you must not
jurzua
parents:
diff changeset
19 claim that you wrote the original software. If you use this software
jurzua
parents:
diff changeset
20 in a product, an acknowledgment in the product documentation would be
jurzua
parents:
diff changeset
21 appreciated but is not required.
jurzua
parents:
diff changeset
22 2. Altered source versions must be plainly marked as such, and must not be
jurzua
parents:
diff changeset
23 misrepresented as being the original software.
jurzua
parents:
diff changeset
24 3. This notice may not be removed or altered from any source distribution.
jurzua
parents:
diff changeset
25 */
jurzua
parents:
diff changeset
26
jurzua
parents:
diff changeset
27 import java.util.*;
jurzua
parents:
diff changeset
28
jurzua
parents:
diff changeset
29 public class NaturalOrderComparator
jurzua
parents:
diff changeset
30 {
jurzua
parents:
diff changeset
31 private static int compareRight(String a, String b)
jurzua
parents:
diff changeset
32 {
jurzua
parents:
diff changeset
33 int bias = 0;
jurzua
parents:
diff changeset
34 int ia = 0;
jurzua
parents:
diff changeset
35 int ib = 0;
jurzua
parents:
diff changeset
36
jurzua
parents:
diff changeset
37 // The longest run of digits wins. That aside, the greatest
jurzua
parents:
diff changeset
38 // value wins, but we can't know that it will until we've scanned
jurzua
parents:
diff changeset
39 // both numbers to know that they have the same magnitude, so we
jurzua
parents:
diff changeset
40 // remember it in BIAS.
jurzua
parents:
diff changeset
41 for (;; ia++, ib++)
jurzua
parents:
diff changeset
42 {
jurzua
parents:
diff changeset
43 char ca = charAt(a, ia);
jurzua
parents:
diff changeset
44 char cb = charAt(b, ib);
jurzua
parents:
diff changeset
45
jurzua
parents:
diff changeset
46 if (!Character.isDigit(ca) && !Character.isDigit(cb))
jurzua
parents:
diff changeset
47 {
jurzua
parents:
diff changeset
48 return bias;
jurzua
parents:
diff changeset
49 }
jurzua
parents:
diff changeset
50 else if (!Character.isDigit(ca))
jurzua
parents:
diff changeset
51 {
jurzua
parents:
diff changeset
52 return -1;
jurzua
parents:
diff changeset
53 }
jurzua
parents:
diff changeset
54 else if (!Character.isDigit(cb))
jurzua
parents:
diff changeset
55 {
jurzua
parents:
diff changeset
56 return +1;
jurzua
parents:
diff changeset
57 }
jurzua
parents:
diff changeset
58 else if (ca < cb)
jurzua
parents:
diff changeset
59 {
jurzua
parents:
diff changeset
60 if (bias == 0)
jurzua
parents:
diff changeset
61 {
jurzua
parents:
diff changeset
62 bias = -1;
jurzua
parents:
diff changeset
63 }
jurzua
parents:
diff changeset
64 }
jurzua
parents:
diff changeset
65 else if (ca > cb)
jurzua
parents:
diff changeset
66 {
jurzua
parents:
diff changeset
67 if (bias == 0)
jurzua
parents:
diff changeset
68 bias = +1;
jurzua
parents:
diff changeset
69 }
jurzua
parents:
diff changeset
70 else if (ca == 0 && cb == 0)
jurzua
parents:
diff changeset
71 {
jurzua
parents:
diff changeset
72 return bias;
jurzua
parents:
diff changeset
73 }
jurzua
parents:
diff changeset
74 }
jurzua
parents:
diff changeset
75 }
jurzua
parents:
diff changeset
76
jurzua
parents:
diff changeset
77
jurzua
parents:
diff changeset
78
jurzua
parents:
diff changeset
79 public static int compare(String a, String b)
jurzua
parents:
diff changeset
80 {
jurzua
parents:
diff changeset
81 //String a = o1.toString();
jurzua
parents:
diff changeset
82 //String b = o2.toString();
jurzua
parents:
diff changeset
83
jurzua
parents:
diff changeset
84 int ia = 0, ib = 0;
jurzua
parents:
diff changeset
85 int nza = 0, nzb = 0;
jurzua
parents:
diff changeset
86 char ca, cb;
jurzua
parents:
diff changeset
87 int result;
jurzua
parents:
diff changeset
88
jurzua
parents:
diff changeset
89 while (true)
jurzua
parents:
diff changeset
90 {
jurzua
parents:
diff changeset
91 // only count the number of zeroes leading the last number compared
jurzua
parents:
diff changeset
92 nza = nzb = 0;
jurzua
parents:
diff changeset
93
jurzua
parents:
diff changeset
94 ca = charAt(a, ia);
jurzua
parents:
diff changeset
95 cb = charAt(b, ib);
jurzua
parents:
diff changeset
96
jurzua
parents:
diff changeset
97 // skip over leading spaces or zeros
jurzua
parents:
diff changeset
98 while (Character.isSpaceChar(ca) || ca == '0')
jurzua
parents:
diff changeset
99 {
jurzua
parents:
diff changeset
100 if (ca == '0')
jurzua
parents:
diff changeset
101 {
jurzua
parents:
diff changeset
102 nza++;
jurzua
parents:
diff changeset
103 }
jurzua
parents:
diff changeset
104 else
jurzua
parents:
diff changeset
105 {
jurzua
parents:
diff changeset
106 // only count consecutive zeroes
jurzua
parents:
diff changeset
107 nza = 0;
jurzua
parents:
diff changeset
108 }
jurzua
parents:
diff changeset
109
jurzua
parents:
diff changeset
110 ca = charAt(a, ++ia);
jurzua
parents:
diff changeset
111 }
jurzua
parents:
diff changeset
112
jurzua
parents:
diff changeset
113 while (Character.isSpaceChar(cb) || cb == '0')
jurzua
parents:
diff changeset
114 {
jurzua
parents:
diff changeset
115 if (cb == '0')
jurzua
parents:
diff changeset
116 {
jurzua
parents:
diff changeset
117 nzb++;
jurzua
parents:
diff changeset
118 }
jurzua
parents:
diff changeset
119 else
jurzua
parents:
diff changeset
120 {
jurzua
parents:
diff changeset
121 // only count consecutive zeroes
jurzua
parents:
diff changeset
122 nzb = 0;
jurzua
parents:
diff changeset
123 }
jurzua
parents:
diff changeset
124
jurzua
parents:
diff changeset
125 cb = charAt(b, ++ib);
jurzua
parents:
diff changeset
126 }
jurzua
parents:
diff changeset
127
jurzua
parents:
diff changeset
128 // process run of digits
jurzua
parents:
diff changeset
129 if (Character.isDigit(ca) && Character.isDigit(cb))
jurzua
parents:
diff changeset
130 {
jurzua
parents:
diff changeset
131 if ((result = compareRight(a.substring(ia), b.substring(ib))) != 0)
jurzua
parents:
diff changeset
132 {
jurzua
parents:
diff changeset
133 return result;
jurzua
parents:
diff changeset
134 }
jurzua
parents:
diff changeset
135 }
jurzua
parents:
diff changeset
136
jurzua
parents:
diff changeset
137 if (ca == 0 && cb == 0)
jurzua
parents:
diff changeset
138 {
jurzua
parents:
diff changeset
139 // The strings compare the same. Perhaps the caller
jurzua
parents:
diff changeset
140 // will want to call strcmp to break the tie.
jurzua
parents:
diff changeset
141 return nza - nzb;
jurzua
parents:
diff changeset
142 }
jurzua
parents:
diff changeset
143
jurzua
parents:
diff changeset
144 if (ca < cb)
jurzua
parents:
diff changeset
145 {
jurzua
parents:
diff changeset
146 return -1;
jurzua
parents:
diff changeset
147 }
jurzua
parents:
diff changeset
148 else if (ca > cb)
jurzua
parents:
diff changeset
149 {
jurzua
parents:
diff changeset
150 return +1;
jurzua
parents:
diff changeset
151 }
jurzua
parents:
diff changeset
152
jurzua
parents:
diff changeset
153 ++ia;
jurzua
parents:
diff changeset
154 ++ib;
jurzua
parents:
diff changeset
155 }
jurzua
parents:
diff changeset
156 }
jurzua
parents:
diff changeset
157
jurzua
parents:
diff changeset
158 static char charAt(String s, int i)
jurzua
parents:
diff changeset
159 {
jurzua
parents:
diff changeset
160 if (i >= s.length())
jurzua
parents:
diff changeset
161 {
jurzua
parents:
diff changeset
162 return 0;
jurzua
parents:
diff changeset
163 }
jurzua
parents:
diff changeset
164 else
jurzua
parents:
diff changeset
165 {
jurzua
parents:
diff changeset
166 return s.charAt(i);
jurzua
parents:
diff changeset
167 }
jurzua
parents:
diff changeset
168 }
jurzua
parents:
diff changeset
169
jurzua
parents:
diff changeset
170 /*
jurzua
parents:
diff changeset
171 public static void main(String[] args)
jurzua
parents:
diff changeset
172 {
jurzua
parents:
diff changeset
173 String[] strings = new String[] { "1-2", "1-02", "1-20", "10-20", "fred", "jane", "pic01",
jurzua
parents:
diff changeset
174 "pic2", "pic02", "pic02a", "pic3", "pic4", "pic 4 else", "pic 5", "pic05", "pic 5",
jurzua
parents:
diff changeset
175 "pic 5 something", "pic 6", "pic 7", "pic100", "pic100a", "pic120", "pic121",
jurzua
parents:
diff changeset
176 "pic02000", "tom", "x2-g8", "x2-y7", "x2-y08", "x8-y8" };
jurzua
parents:
diff changeset
177
jurzua
parents:
diff changeset
178 List orig = Arrays.asList(strings);
jurzua
parents:
diff changeset
179
jurzua
parents:
diff changeset
180 System.out.println("Original: " + orig);
jurzua
parents:
diff changeset
181
jurzua
parents:
diff changeset
182 List scrambled = Arrays.asList(strings);
jurzua
parents:
diff changeset
183 Collections.shuffle(scrambled);
jurzua
parents:
diff changeset
184
jurzua
parents:
diff changeset
185 System.out.println("Scrambled: " + scrambled);
jurzua
parents:
diff changeset
186
jurzua
parents:
diff changeset
187 Collections.sort(scrambled, new NaturalOrderComparator());
jurzua
parents:
diff changeset
188
jurzua
parents:
diff changeset
189 System.out.println("Sorted: " + scrambled);
jurzua
parents:
diff changeset
190 }
jurzua
parents:
diff changeset
191 */
jurzua
parents:
diff changeset
192 }