Mercurial > hg > anteater
view src/classifiers/english.conll.4class.distsim.prop @ 0:036535fcd179
anteater
author | jdamerow |
---|---|
date | Fri, 14 Sep 2012 10:30:43 +0200 |
parents | |
children |
line wrap: on
line source
# This is better than Jenny's either with or without distsim turned on # And using iob2 is better for optimal CoNLL performance. # Features titled "chris2009" trainFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.train testFile = /u/nlp/data/ner/goodClassifiers/data/conll.jenny.testa serializeTo = english.conll.4class.distsim.crf.ser.gz useDistSim = true distSimLexicon = /u/nlp/data/pos_tags_are_useless/egw4-reut.512.clusters map = word=0,answer=1 saveFeatureIndexToDisk = true useTitle = true useClassFeature=true useWord=true # useWordPairs=true useNGrams=true noMidNGrams=true # maxNGramLeng=6 # Having them all helps, which is the default usePrev=true useNext=true # useTags=true # useWordTag=true useLongSequences=true useSequences=true usePrevSequences=true maxLeft=1 useTypeSeqs=true useTypeSeqs2=true useTypeySequences=true useOccurrencePatterns=true useLastRealWord=true useNextRealWord=true #useReverse=false normalize=true # normalizeTimex=true # dan2 better than chris2 on CoNLL data... wordShape=dan2useLC useDisjunctive=true # disjunctionWidth 4 is better than 5 on CoNLL data disjunctionWidth=4 #useDisjunctiveShapeInteraction=true type=crf readerAndWriter=edu.stanford.nlp.sequences.ColumnDocumentReaderAndWriter useObservedSequencesOnly=true sigma = 20 useQN = true QNsize = 25 # makes it go faster featureDiffThresh=0.05