comparison src/classifiers/english.all.3class.distsim.prop @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:036535fcd179
1 trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
2 testFile = /u/nlp/data/ner/column_data/conll.testa
3 serializeTo = english.all.3class.distsim.crf.ser.gz
4
5 type = crf
6
7 #distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
8 #distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
9 distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
10 useDistSim = true
11
12 map = word=0,answer=1
13
14 saveFeatureIndexToDisk = true
15
16 useClassFeature=true
17 useWord=true
18 #useWordPairs=true
19 useNGrams=true
20 noMidNGrams=true
21 maxNGramLeng=6
22 usePrev=true
23 useNext=true
24 #useTags=true
25 #useWordTag=true
26 useLongSequences=true
27 useSequences=true
28 usePrevSequences=true
29 maxLeft=1
30 useTypeSeqs=true
31 useTypeSeqs2=true
32 useTypeySequences=true
33 useOccurrencePatterns=true
34 useLastRealWord=true
35 useNextRealWord=true
36 #useReverse=false
37 normalize=true
38 # normalizeTimex=true
39 wordShape=chris2useLC
40 useDisjunctive=true
41 disjunctionWidth=5
42 #useDisjunctiveShapeInteraction=true
43
44 readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
45
46 useObservedSequencesOnly=true
47
48 useQN = true
49 QNsize = 25
50
51 # makes it go faster
52 featureDiffThresh=0.05