0
|
1 # This is better than Jenny's either with or without distsim turned on
|
|
2 # And using iob2 is better for optimal CoNLL performance.
|
|
3 # Features titled "chris2009"
|
|
4
|
|
5 trainFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.train
|
|
6 testFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.testa
|
|
7 serializeTo = english.conll.4class.distsim.crf.ser.gz
|
|
8
|
|
9 useDistSim = true
|
|
10 distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
|
|
11
|
|
12 map = word=0,answer=1
|
|
13
|
|
14 saveFeatureIndexToDisk = true
|
|
15
|
|
16 useTitle = true
|
|
17 useClassFeature=true
|
|
18 useWord=true
|
|
19 # useWordPairs=true
|
|
20 useNGrams=true
|
|
21 noMidNGrams=true
|
|
22 # maxNGramLeng=6 # Having them all helps, which is the default
|
|
23 usePrev=true
|
|
24 useNext=true
|
|
25 # useTags=true
|
|
26 # useWordTag=true
|
|
27 useLongSequences=true
|
|
28 useSequences=true
|
|
29 usePrevSequences=true
|
|
30 maxLeft=1
|
|
31 useTypeSeqs=true
|
|
32 useTypeSeqs2=true
|
|
33 useTypeySequences=true
|
|
34 useOccurrencePatterns=true
|
|
35 useLastRealWord=true
|
|
36 useNextRealWord=true
|
|
37 #useReverse=false
|
|
38 normalize=true
|
|
39 # normalizeTimex=true
|
|
40 # dan2 better than chris2 on CoNLL data...
|
|
41 wordShape=dan2useLC
|
|
42 useDisjunctive=true
|
|
43 # disjunctionWidth 4 is better than 5 on CoNLL data
|
|
44 disjunctionWidth=4
|
|
45 #useDisjunctiveShapeInteraction=true
|
|
46
|
|
47 type=crf
|
|
48
|
|
49 readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
|
|
50
|
|
51 useObservedSequencesOnly=true
|
|
52
|
|
53 sigma = 20
|
|
54 useQN = true
|
|
55 QNsize = 25
|
|
56
|
|
57 # makes it go faster
|
|
58 featureDiffThresh=0.05
|