comparison src/classifiers/english.conll.4class.distsim.prop @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:036535fcd179
1 # This is better than Jenny's either with or without distsim turned on
2 # And using iob2 is better for optimal CoNLL performance.
3 # Features titled "chris2009"
4
5 trainFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.train
6 testFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.testa
7 serializeTo = english.conll.4class.distsim.crf.ser.gz
8
9 useDistSim = true
10 distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
11
12 map = word=0,answer=1
13
14 saveFeatureIndexToDisk = true
15
16 useTitle = true
17 useClassFeature=true
18 useWord=true
19 # useWordPairs=true
20 useNGrams=true
21 noMidNGrams=true
22 # maxNGramLeng=6 # Having them all helps, which is the default
23 usePrev=true
24 useNext=true
25 # useTags=true
26 # useWordTag=true
27 useLongSequences=true
28 useSequences=true
29 usePrevSequences=true
30 maxLeft=1
31 useTypeSeqs=true
32 useTypeSeqs2=true
33 useTypeySequences=true
34 useOccurrencePatterns=true
35 useLastRealWord=true
36 useNextRealWord=true
37 #useReverse=false
38 normalize=true
39 # normalizeTimex=true
40 # dan2 better than chris2 on CoNLL data...
41 wordShape=dan2useLC
42 useDisjunctive=true
43 # disjunctionWidth 4 is better than 5 on CoNLL data
44 disjunctionWidth=4
45 #useDisjunctiveShapeInteraction=true
46
47 type=crf
48
49 readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
50
51 useObservedSequencesOnly=true
52
53 sigma = 20
54 useQN = true
55 QNsize = 25
56
57 # makes it go faster
58 featureDiffThresh=0.05