diff src/classifiers/english.all.3class.distsim.prop @ 0:036535fcd179

anteater
author jdamerow
date Fri, 14 Sep 2012 10:30:43 +0200
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/classifiers/english.all.3class.distsim.prop	Fri Sep 14 10:30:43 2012 +0200
@@ -0,0 +1,52 @@
+trainFile = /u/nlp/data/ner/goodClassifiers/data/all.3class.train
+testFile = /u/nlp/data/ner/column_data/conll.testa
+serializeTo = english.all.3class.distsim.crf.ser.gz
+
+type = crf
+
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/englishGigaword.200.pruned
+#distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw.bnc.200
+distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters
+useDistSim = true
+
+map = word=0,answer=1
+
+saveFeatureIndexToDisk = true
+
+useClassFeature=true
+useWord=true
+#useWordPairs=true
+useNGrams=true
+noMidNGrams=true
+maxNGramLeng=6
+usePrev=true
+useNext=true
+#useTags=true
+#useWordTag=true
+useLongSequences=true
+useSequences=true
+usePrevSequences=true
+maxLeft=1
+useTypeSeqs=true
+useTypeSeqs2=true
+useTypeySequences=true
+useOccurrencePatterns=true
+useLastRealWord=true
+useNextRealWord=true
+#useReverse=false
+normalize=true
+# normalizeTimex=true
+wordShape=chris2useLC
+useDisjunctive=true
+disjunctionWidth=5
+#useDisjunctiveShapeInteraction=true
+
+readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter
+
+useObservedSequencesOnly=true
+
+useQN = true
+QNsize = 25
+
+# makes it go faster
+featureDiffThresh=0.05