annotate src/main/java/de/mpiwg/itgroup/ismi/search/beans/SearchResultBean.java @ 153:3c83f42a8a39 public_by_author

first stab at new public-by-author interface.
author Robert Casties <casties@mpiwg-berlin.mpg.de>
date Fri, 18 Aug 2017 14:58:11 -0400
parents 2e911857a759
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
jurzua
parents:
diff changeset
1 package de.mpiwg.itgroup.ismi.search.beans;
jurzua
parents:
diff changeset
2
jurzua
parents:
diff changeset
3 import java.io.Serializable;
jurzua
parents:
diff changeset
4 import java.util.ArrayList;
jurzua
parents:
diff changeset
5 import java.util.Collections;
jurzua
parents:
diff changeset
6 import java.util.List;
jurzua
parents:
diff changeset
7 import java.util.Map;
jurzua
parents:
diff changeset
8
jurzua
parents:
diff changeset
9 import org.apache.commons.lang.StringUtils;
jurzua
parents:
diff changeset
10 import org.mpi.openmind.repository.bo.Attribute;
jurzua
parents:
diff changeset
11 import org.mpi.openmind.repository.bo.Entity;
jurzua
parents:
diff changeset
12 import org.mpi.openmind.repository.bo.Relation;
jurzua
parents:
diff changeset
13 import org.mpi.openmind.repository.bo.utils.EntitySortByNormalizedOwnValue;
jurzua
parents:
diff changeset
14 import org.mpi.openmind.repository.services.utils.AttributeFilter;
jurzua
parents:
diff changeset
15 import org.mpi.openmind.repository.utils.NormalizerUtils;
jurzua
parents:
diff changeset
16
jurzua
parents:
diff changeset
17 import de.mpiwg.itgroup.ismi.auxObjects.ResultSet;
jurzua
parents:
diff changeset
18 import de.mpiwg.itgroup.ismi.entry.beans.AbstractISMIBean;
jurzua
parents:
diff changeset
19
jurzua
parents:
diff changeset
20
jurzua
parents:
diff changeset
21 public class SearchResultBean extends AbstractISMIBean {
jurzua
parents:
diff changeset
22
jurzua
parents:
diff changeset
23 /**
jurzua
parents:
diff changeset
24 *
jurzua
parents:
diff changeset
25 */
jurzua
parents:
diff changeset
26 private static final long serialVersionUID = 7072264955252613769L;
jurzua
parents:
diff changeset
27 public static Long SEC_05 = new Long(5000);
jurzua
parents:
diff changeset
28 public static Long SEC_10 = new Long(10000);
jurzua
parents:
diff changeset
29 public static Long SEC_20 = new Long(20000);
jurzua
parents:
diff changeset
30 public static Long SEC_30 = new Long(30000);
jurzua
parents:
diff changeset
31
jurzua
parents:
diff changeset
32 private List<ResultSet> resultSetList = new ArrayList<ResultSet>();
jurzua
parents:
diff changeset
33
jurzua
parents:
diff changeset
34 private int maxResult = -1 ;
jurzua
parents:
diff changeset
35 private int counter = 0;
jurzua
parents:
diff changeset
36
jurzua
parents:
diff changeset
37 public void searchAttributes(String term, String mode){
jurzua
parents:
diff changeset
38 long start = System.currentTimeMillis();
jurzua
parents:
diff changeset
39 this.resultSetList = new ArrayList<ResultSet>();
jurzua
parents:
diff changeset
40 this.counter = 0;
jurzua
parents:
diff changeset
41 try{
jurzua
parents:
diff changeset
42 if (StringUtils.isNotEmpty(term)) {
jurzua
parents:
diff changeset
43
jurzua
parents:
diff changeset
44 boolean includeTitles = (SimpleSearchBean.TITLES.equals(mode) || SimpleSearchBean.AUTHORS_TITLES.equals(mode)) ? true : false;
jurzua
parents:
diff changeset
45 boolean includeAuthors = (SimpleSearchBean.AUTHORS.equals(mode) || SimpleSearchBean.AUTHORS_TITLES.equals(mode) ? true : false);
jurzua
parents:
diff changeset
46
jurzua
parents:
diff changeset
47 List<AttributeFilter> filters = new ArrayList<AttributeFilter>();
jurzua
parents:
diff changeset
48
jurzua
parents:
diff changeset
49 AttributeFilter filter1 = new AttributeFilter();
jurzua
parents:
diff changeset
50 filter1.setEntObjectClass("PERSON");
jurzua
parents:
diff changeset
51 filter1.setName("name");
jurzua
parents:
diff changeset
52 //filter1.setNormalize(true);
jurzua
parents:
diff changeset
53
jurzua
parents:
diff changeset
54 AttributeFilter filter2 = new AttributeFilter();
jurzua
parents:
diff changeset
55 filter2.setEntObjectClass("TEXT");
jurzua
parents:
diff changeset
56 filter2.setName("title");
jurzua
parents:
diff changeset
57 //filter2.setNormalize(true);
jurzua
parents:
diff changeset
58
jurzua
parents:
diff changeset
59 AttributeFilter filter3 = new AttributeFilter();
jurzua
parents:
diff changeset
60 filter3.setEntObjectClass("PERSON");
jurzua
parents:
diff changeset
61 filter3.setName("name_translit");
jurzua
parents:
diff changeset
62
jurzua
parents:
diff changeset
63 //filter3.setNormalize(true);
jurzua
parents:
diff changeset
64
jurzua
parents:
diff changeset
65 AttributeFilter filter4 = new AttributeFilter();
jurzua
parents:
diff changeset
66 filter4.setEntObjectClass("TEXT");
jurzua
parents:
diff changeset
67 filter4.setName("title_translit");
jurzua
parents:
diff changeset
68
jurzua
parents:
diff changeset
69 //filter4.setNormalize(true);
jurzua
parents:
diff changeset
70
jurzua
parents:
diff changeset
71 AttributeFilter filter5 = new AttributeFilter();
jurzua
parents:
diff changeset
72 filter5.setEntObjectClass("TEXT");
jurzua
parents:
diff changeset
73 filter5.setName("full_title");
jurzua
parents:
diff changeset
74
jurzua
parents:
diff changeset
75 //filter4.setNormalize(true);
jurzua
parents:
diff changeset
76
jurzua
parents:
diff changeset
77
jurzua
parents:
diff changeset
78 AttributeFilter filter6 = new AttributeFilter();
jurzua
parents:
diff changeset
79 filter6.setEntObjectClass("TEXT");
jurzua
parents:
diff changeset
80 filter6.setName("full_title_translit");
jurzua
parents:
diff changeset
81
jurzua
parents:
diff changeset
82
jurzua
parents:
diff changeset
83 //filers for alias
jurzua
parents:
diff changeset
84 AttributeFilter filter7 = new AttributeFilter();
jurzua
parents:
diff changeset
85 filter7.setEntObjectClass("ALIAS");
jurzua
parents:
diff changeset
86 filter7.setName("alias");
jurzua
parents:
diff changeset
87
jurzua
parents:
diff changeset
88 /*
jurzua
parents:
diff changeset
89 filter1.setOwnValue(search);
jurzua
parents:
diff changeset
90 filter2.setOwnValue(search);
jurzua
parents:
diff changeset
91 filter3.setOwnValue(search);
jurzua
parents:
diff changeset
92 filter4.setOwnValue(search);
jurzua
parents:
diff changeset
93 filter5.setOwnValue(search);
jurzua
parents:
diff changeset
94 filter6.setOwnValue(search);
jurzua
parents:
diff changeset
95 filter7.setOwnValue(search);
jurzua
parents:
diff changeset
96 */
jurzua
parents:
diff changeset
97
jurzua
parents:
diff changeset
98 filters.add(filter1);
jurzua
parents:
diff changeset
99 filters.add(filter2);
jurzua
parents:
diff changeset
100 filters.add(filter3);
jurzua
parents:
diff changeset
101 filters.add(filter4);
jurzua
parents:
diff changeset
102 filters.add(filter5);
jurzua
parents:
diff changeset
103 filters.add(filter6);
jurzua
parents:
diff changeset
104 filters.add(filter7);
jurzua
parents:
diff changeset
105
jurzua
parents:
diff changeset
106 long startQuery = System.currentTimeMillis();
jurzua
parents:
diff changeset
107 Map<Attribute, Entity> map;
jurzua
parents:
diff changeset
108
jurzua
parents:
diff changeset
109 if(getCache().isMapDirty()){
jurzua
parents:
diff changeset
110 map = getWrapper().searchAttEntityByAttributeFilter(filters, getMaxResult());
jurzua
parents:
diff changeset
111 System.out.println();
jurzua
parents:
diff changeset
112 System.out.println("###########################################################");
jurzua
parents:
diff changeset
113 System.out.println();
jurzua
parents:
diff changeset
114 System.out.println("MAP SEARCH SIZEx= " + map.size() + " max result " + getMaxResult());
jurzua
parents:
diff changeset
115 /*
jurzua
parents:
diff changeset
116 for(Attribute a : map.keySet()){
jurzua
parents:
diff changeset
117 System.out.println(a);
jurzua
parents:
diff changeset
118 }*/
jurzua
parents:
diff changeset
119
jurzua
parents:
diff changeset
120 getCache().setAttResultMap(map);
jurzua
parents:
diff changeset
121 }else{
jurzua
parents:
diff changeset
122 map = getCache().getAttResultMap();
jurzua
parents:
diff changeset
123 }
jurzua
parents:
diff changeset
124
jurzua
parents:
diff changeset
125 long endQuery = System.currentTimeMillis();
jurzua
parents:
diff changeset
126 String normalizedString = NormalizerUtils.normalize(term);
jurzua
parents:
diff changeset
127
jurzua
parents:
diff changeset
128 System.out.println("");
jurzua
parents:
diff changeset
129 System.out.println("***********************");
jurzua
parents:
diff changeset
130 System.out.println("Word: " + term);
jurzua
parents:
diff changeset
131 System.out.println("Normalized: " + normalizedString);
jurzua
parents:
diff changeset
132 System.out.println("Query Execution= " + (endQuery - startQuery));
jurzua
parents:
diff changeset
133
jurzua
parents:
diff changeset
134 ResultSet nameResult = new ResultSet("Name");
jurzua
parents:
diff changeset
135 ResultSet titleResult = new ResultSet("Title");
jurzua
parents:
diff changeset
136
jurzua
parents:
diff changeset
137 List<Long> idList = new ArrayList<Long>();
jurzua
parents:
diff changeset
138
jurzua
parents:
diff changeset
139 int countCached = 0;
jurzua
parents:
diff changeset
140 int countNoCached = 0;
jurzua
parents:
diff changeset
141 int noCachedAlias = 0;
jurzua
parents:
diff changeset
142 int noCachedTextPerson = 0;
jurzua
parents:
diff changeset
143 int putInCache = 0;
jurzua
parents:
diff changeset
144 int putInCacheError = 0;
jurzua
parents:
diff changeset
145 for(Attribute att : map.keySet()){
jurzua
parents:
diff changeset
146
jurzua
parents:
diff changeset
147 //limiting the execution time.
jurzua
parents:
diff changeset
148 if((System.currentTimeMillis() - start) > SEC_20 || counter >= 1000)
jurzua
parents:
diff changeset
149 break;
jurzua
parents:
diff changeset
150
jurzua
parents:
diff changeset
151 //Attribute att = map.get(ent);
jurzua
parents:
diff changeset
152 Entity ent = map.get(att);
jurzua
parents:
diff changeset
153
jurzua
parents:
diff changeset
154 if(StringUtils.isNotEmpty(att.getNormalizedOwnValue()) && !getCache().ignoreAttribute(att) && att.getNormalizedOwnValue().contains(normalizedString)){
jurzua
parents:
diff changeset
155 if(getCache().containsAttribute(att)){
jurzua
parents:
diff changeset
156 countCached++;
jurzua
parents:
diff changeset
157 //if the attributed was saved in the cache
jurzua
parents:
diff changeset
158 Entity ee = getCache().getEntMap().get(att.getId());
jurzua
parents:
diff changeset
159 String description = getCache().getDescriptionMap().get(att.getId() + "-" + ee.getId());
jurzua
parents:
diff changeset
160 if(includeAuthors && ee.getObjectClass().equals("PERSON") && !idList.contains(ee.getId())){
jurzua
parents:
diff changeset
161 nameResult.setTuple(ee, att);
jurzua
parents:
diff changeset
162 nameResult.setDescription(ee, description);
jurzua
parents:
diff changeset
163 //nameResult.setDescription(ee, att.getObjectClass() + "=" + att.getOwnValue());
jurzua
parents:
diff changeset
164 counter++;
jurzua
parents:
diff changeset
165 idList.add(ee.getId());
jurzua
parents:
diff changeset
166 }else if(includeTitles && ee.getObjectClass().equals("TEXT") && !idList.contains(ee.getId())){
jurzua
parents:
diff changeset
167 titleResult.setTuple(ee, att);
jurzua
parents:
diff changeset
168 titleResult.setDescription(ee, description);
jurzua
parents:
diff changeset
169 //titleResult.setDescription(ee, att.getObjectClass() + "=" + att.getOwnValue());
jurzua
parents:
diff changeset
170 counter++;
jurzua
parents:
diff changeset
171 idList.add(ee.getId());
jurzua
parents:
diff changeset
172 }
jurzua
parents:
diff changeset
173 }else{
jurzua
parents:
diff changeset
174
jurzua
parents:
diff changeset
175 countNoCached++;
jurzua
parents:
diff changeset
176 if(includeAuthors && !idList.contains(ent.getId()) && att.getName().contains("name") && att.getSourceObjectClass().equals("PERSON")){
jurzua
parents:
diff changeset
177 //OC: PERSON
jurzua
parents:
diff changeset
178 String d = att.getObjectClass() + "=" + att.getOwnValue();
jurzua
parents:
diff changeset
179 nameResult.setDescription(ent, d);
jurzua
parents:
diff changeset
180 nameResult.setTuple(ent, att);
jurzua
parents:
diff changeset
181 getCache().setTuple(ent, att, d);
jurzua
parents:
diff changeset
182 counter++;
jurzua
parents:
diff changeset
183 noCachedTextPerson++;
jurzua
parents:
diff changeset
184 idList.add(ent.getId());
jurzua
parents:
diff changeset
185 }else if(includeTitles && !idList.contains(ent.getId()) && (att.getName().contains("title") || att.getName().contains("full_title"))){
jurzua
parents:
diff changeset
186 //OC: TEXT
jurzua
parents:
diff changeset
187 String d = att.getObjectClass() + "=" + att.getOwnValue();
jurzua
parents:
diff changeset
188 titleResult.setDescription(ent, d);
jurzua
parents:
diff changeset
189 titleResult.setTuple(ent, att);
jurzua
parents:
diff changeset
190 getCache().setTuple(ent, att, d);
jurzua
parents:
diff changeset
191 counter++;
jurzua
parents:
diff changeset
192 noCachedTextPerson++;
jurzua
parents:
diff changeset
193 idList.add(ent.getId());
jurzua
parents:
diff changeset
194 }else if(att.getSourceObjectClass().equals("ALIAS")){
jurzua
parents:
diff changeset
195 noCachedAlias++;
jurzua
parents:
diff changeset
196 //OC: ALIAS
jurzua
parents:
diff changeset
197 AliasStructure aliasStructure = getAliasStructure(ent, att);
jurzua
parents:
diff changeset
198 if(aliasStructure.target != null){
jurzua
parents:
diff changeset
199 getCache().setTuple(aliasStructure.target, att, aliasStructure.description);
jurzua
parents:
diff changeset
200 putInCache++;
jurzua
parents:
diff changeset
201 if(!idList.contains(aliasStructure.target.getId())){
jurzua
parents:
diff changeset
202 if(includeAuthors && aliasStructure.target.getObjectClass().equals("PERSON")){
jurzua
parents:
diff changeset
203 nameResult.setTuple(aliasStructure.target, att);
jurzua
parents:
diff changeset
204 nameResult.setDescription(aliasStructure.target, aliasStructure.description);
jurzua
parents:
diff changeset
205 counter++;
jurzua
parents:
diff changeset
206 }else if(includeTitles && aliasStructure.target.getObjectClass().equals("TEXT")){
jurzua
parents:
diff changeset
207 titleResult.setTuple(aliasStructure.target, att);
jurzua
parents:
diff changeset
208 titleResult.setDescription(aliasStructure.target, aliasStructure.description);
jurzua
parents:
diff changeset
209 counter++;
jurzua
parents:
diff changeset
210 }
jurzua
parents:
diff changeset
211 idList.add(aliasStructure.target.getId());
jurzua
parents:
diff changeset
212 }
jurzua
parents:
diff changeset
213 }else{
jurzua
parents:
diff changeset
214 putInCacheError++;
jurzua
parents:
diff changeset
215 this.getCache().getIgnoredAttIdList().add(att.getId());
jurzua
parents:
diff changeset
216 System.out.println("cacheError++ : " + att);
jurzua
parents:
diff changeset
217 }
jurzua
parents:
diff changeset
218 }
jurzua
parents:
diff changeset
219
jurzua
parents:
diff changeset
220 }
jurzua
parents:
diff changeset
221 }
jurzua
parents:
diff changeset
222 }
jurzua
parents:
diff changeset
223
jurzua
parents:
diff changeset
224 System.out.println();
jurzua
parents:
diff changeset
225 System.out.println("countCached: " + countCached + " - countNoCached: " + countNoCached + " - total: " + (countCached + countNoCached));
jurzua
parents:
diff changeset
226 System.out.println("noCachedTextPerson: " + noCachedTextPerson++ + " - noCachedAlias: " + noCachedAlias);
jurzua
parents:
diff changeset
227 System.out.println("putInCacheError: " + putInCacheError + " - putInCache: " + putInCache);
jurzua
parents:
diff changeset
228 System.out.println("nameResult: " + nameResult.getResults().size());
jurzua
parents:
diff changeset
229 System.out.println("titleResult: " + titleResult.getResults().size());
jurzua
parents:
diff changeset
230
jurzua
parents:
diff changeset
231 if(nameResult.getResults().size() > 0){
jurzua
parents:
diff changeset
232 Collections.sort(nameResult.getResults(), new EntitySortByNormalizedOwnValue());
jurzua
parents:
diff changeset
233 this.resultSetList.add(nameResult);
jurzua
parents:
diff changeset
234 }
jurzua
parents:
diff changeset
235 if(titleResult.getResults().size() > 0){
jurzua
parents:
diff changeset
236 Collections.sort(titleResult.getResults(), new EntitySortByNormalizedOwnValue());
jurzua
parents:
diff changeset
237 this.resultSetList.add(titleResult);
jurzua
parents:
diff changeset
238 }
jurzua
parents:
diff changeset
239 }
jurzua
parents:
diff changeset
240
jurzua
parents:
diff changeset
241 }catch(Exception e){
jurzua
parents:
diff changeset
242 e.printStackTrace();
jurzua
parents:
diff changeset
243 }
jurzua
parents:
diff changeset
244
jurzua
parents:
diff changeset
245 long end = System.currentTimeMillis();
jurzua
parents:
diff changeset
246 System.out.println("Simple search time execution= " + (end - start));
jurzua
parents:
diff changeset
247 }
jurzua
parents:
diff changeset
248
jurzua
parents:
diff changeset
249 /**
jurzua
parents:
diff changeset
250 * TODO do it more clever!
jurzua
parents:
diff changeset
251 * rules:
jurzua
parents:
diff changeset
252 * ----Text
jurzua
parents:
diff changeset
253 * is_prime_alias_title_of
jurzua
parents:
diff changeset
254 * is_alias_title_of
jurzua
parents:
diff changeset
255 * is_alias_incipit_of
jurzua
parents:
diff changeset
256 * is_alias_explicit_of
jurzua
parents:
diff changeset
257 * ----Person
jurzua
parents:
diff changeset
258 * is_prime_alias_name_of
jurzua
parents:
diff changeset
259 * is_alias_name_of
jurzua
parents:
diff changeset
260 * @param alias
jurzua
parents:
diff changeset
261 * @return
jurzua
parents:
diff changeset
262 */
jurzua
parents:
diff changeset
263 private AliasStructure getAliasStructure(Entity alias, Attribute att) throws Exception{
jurzua
parents:
diff changeset
264 AliasStructure structure = new AliasStructure();
jurzua
parents:
diff changeset
265 structure.aliasAtt = att;
jurzua
parents:
diff changeset
266 List<Relation> list = null;
jurzua
parents:
diff changeset
267 list = getWrapper().getSourceRelations(alias, "is_prime_alias_title_of", "TEXT", 1);
jurzua
parents:
diff changeset
268 if(list.size() > 0)
jurzua
parents:
diff changeset
269 structure.setRelation(list.get(0));
jurzua
parents:
diff changeset
270 list = getWrapper().getSourceRelations(alias, "is_alias_title_of", "TEXT", 1);
jurzua
parents:
diff changeset
271 if(list.size() > 0)
jurzua
parents:
diff changeset
272 structure.setRelation(list.get(0));
jurzua
parents:
diff changeset
273 list = getWrapper().getSourceRelations(alias, "is_alias_incipit_of", "TEXT", 1);
jurzua
parents:
diff changeset
274 if(list.size() > 0)
jurzua
parents:
diff changeset
275 structure.setRelation(list.get(0));
jurzua
parents:
diff changeset
276 list = getWrapper().getSourceRelations(alias, "is_alias_explicit_of", "TEXT", 1);
jurzua
parents:
diff changeset
277 if(list.size() > 0)
jurzua
parents:
diff changeset
278 structure.setRelation(list.get(0));
jurzua
parents:
diff changeset
279 list = getWrapper().getSourceRelations(alias, "is_prime_alias_name_of", "PERSON", 1);
jurzua
parents:
diff changeset
280 if(list.size() > 0)
jurzua
parents:
diff changeset
281 structure.setRelation(list.get(0));
jurzua
parents:
diff changeset
282 list = getWrapper().getSourceRelations(alias, "is_alias_name_of", "PERSON", 1);
jurzua
parents:
diff changeset
283 if(list.size() > 0)
jurzua
parents:
diff changeset
284 structure.setRelation(list.get(0));
jurzua
parents:
diff changeset
285 return structure;
jurzua
parents:
diff changeset
286 }
jurzua
parents:
diff changeset
287
jurzua
parents:
diff changeset
288 public List<ResultSet> getResultSetList() {
jurzua
parents:
diff changeset
289 return resultSetList;
jurzua
parents:
diff changeset
290 }
jurzua
parents:
diff changeset
291
jurzua
parents:
diff changeset
292 public void setResultSetList(List<ResultSet> resultSetList) {
jurzua
parents:
diff changeset
293 this.resultSetList = resultSetList;
jurzua
parents:
diff changeset
294 }
jurzua
parents:
diff changeset
295 public int getMaxResult() {
jurzua
parents:
diff changeset
296 return maxResult;
jurzua
parents:
diff changeset
297 }
jurzua
parents:
diff changeset
298
jurzua
parents:
diff changeset
299 public void setMaxResult(int maxResult) {
jurzua
parents:
diff changeset
300 this.maxResult = maxResult;
jurzua
parents:
diff changeset
301 }
jurzua
parents:
diff changeset
302 public int getCounter() {
jurzua
parents:
diff changeset
303 return counter;
jurzua
parents:
diff changeset
304 }
jurzua
parents:
diff changeset
305
jurzua
parents:
diff changeset
306 public void setCounter(int counter) {
jurzua
parents:
diff changeset
307 this.counter = counter;
jurzua
parents:
diff changeset
308 }
jurzua
parents:
diff changeset
309 private class AliasStructure implements Serializable{
jurzua
parents:
diff changeset
310 private static final long serialVersionUID = -833933447985472058L;
jurzua
parents:
diff changeset
311
jurzua
parents:
diff changeset
312 public Entity target = null;
jurzua
parents:
diff changeset
313 public Entity alias = null;
jurzua
parents:
diff changeset
314 public Relation rel = null;
jurzua
parents:
diff changeset
315 public String description = "";
jurzua
parents:
diff changeset
316 public Attribute aliasAtt = null;
jurzua
parents:
diff changeset
317
jurzua
parents:
diff changeset
318 public void setRelation(Relation rel){
jurzua
parents:
diff changeset
319 this.rel = rel;
jurzua
parents:
diff changeset
320 this.target = rel.getTarget();
jurzua
parents:
diff changeset
321 this.alias = rel.getSource();
jurzua
parents:
diff changeset
322 this.description = rel.getOwnValue() + " <- ALIAS [alias=" + aliasAtt.getOwnValue() + "]";
jurzua
parents:
diff changeset
323 }
jurzua
parents:
diff changeset
324 }
jurzua
parents:
diff changeset
325 }