annotate src/classifiers/english.conll.4class.distsim.prop @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
036535fcd179 anteater
jdamerow
parents:
diff changeset
1 # This is better than Jenny's either with or without distsim turned on
036535fcd179 anteater
jdamerow
parents:
diff changeset
2 # And using iob2 is better for optimal CoNLL performance.
036535fcd179 anteater
jdamerow
parents:
diff changeset
3 # Features titled "chris2009"
036535fcd179 anteater
jdamerow
parents:
diff changeset
4
036535fcd179 anteater
jdamerow
parents:
diff changeset
5 trainFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.train
036535fcd179 anteater
jdamerow
parents:
diff changeset
6 testFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.testa
036535fcd179 anteater
jdamerow
parents:
diff changeset
7 serializeTo = english.conll.4class.distsim.crf.ser.gz
036535fcd179 anteater
jdamerow
parents:
diff changeset
8
036535fcd179 anteater
jdamerow
parents:
diff changeset
9 useDistSim = true
036535fcd179 anteater
jdamerow
parents:
diff changeset
10 distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
036535fcd179 anteater
jdamerow
parents:
diff changeset
11
036535fcd179 anteater
jdamerow
parents:
diff changeset
12 map = word=0,answer=1
036535fcd179 anteater
jdamerow
parents:
diff changeset
13
036535fcd179 anteater
jdamerow
parents:
diff changeset
14 saveFeatureIndexToDisk = true
036535fcd179 anteater
jdamerow
parents:
diff changeset
15
036535fcd179 anteater
jdamerow
parents:
diff changeset
16 useTitle = true
036535fcd179 anteater
jdamerow
parents:
diff changeset
17 useClassFeature=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
18 useWord=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
19 # useWordPairs=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
20 useNGrams=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
21 noMidNGrams=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
22 # maxNGramLeng=6 # Having them all helps, which is the default
036535fcd179 anteater
jdamerow
parents:
diff changeset
23 usePrev=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
24 useNext=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
25 # useTags=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
26 # useWordTag=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
27 useLongSequences=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
28 useSequences=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
29 usePrevSequences=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
30 maxLeft=1
036535fcd179 anteater
jdamerow
parents:
diff changeset
31 useTypeSeqs=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
32 useTypeSeqs2=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
33 useTypeySequences=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
34 useOccurrencePatterns=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
35 useLastRealWord=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
36 useNextRealWord=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
37 #useReverse=false
036535fcd179 anteater
jdamerow
parents:
diff changeset
38 normalize=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
39 # normalizeTimex=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
40 # dan2 better than chris2 on CoNLL data...
036535fcd179 anteater
jdamerow
parents:
diff changeset
41 wordShape=dan2useLC
036535fcd179 anteater
jdamerow
parents:
diff changeset
42 useDisjunctive=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
43 # disjunctionWidth 4 is better than 5 on CoNLL data
036535fcd179 anteater
jdamerow
parents:
diff changeset
44 disjunctionWidth=4
036535fcd179 anteater
jdamerow
parents:
diff changeset
45 #useDisjunctiveShapeInteraction=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
46
036535fcd179 anteater
jdamerow
parents:
diff changeset
47 type=crf
036535fcd179 anteater
jdamerow
parents:
diff changeset
48
036535fcd179 anteater
jdamerow
parents:
diff changeset
49 readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
036535fcd179 anteater
jdamerow
parents:
diff changeset
50
036535fcd179 anteater
jdamerow
parents:
diff changeset
51 useObservedSequencesOnly=true
036535fcd179 anteater
jdamerow
parents:
diff changeset
52
036535fcd179 anteater
jdamerow
parents:
diff changeset
53 sigma = 20
036535fcd179 anteater
jdamerow
parents:
diff changeset
54 useQN = true
036535fcd179 anteater
jdamerow
parents:
diff changeset
55 QNsize = 25
036535fcd179 anteater
jdamerow
parents:
diff changeset
56
036535fcd179 anteater
jdamerow
parents:
diff changeset
57 # makes it go faster
036535fcd179 anteater
jdamerow
parents:
diff changeset
58 featureDiffThresh=0.05