List of usage examples for edu.stanford.nlp.process WordShapeClassifier wordShape
public static String wordShape(String inStr, int wordShaper)
From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java
License:Open Source License
private void initLexicon(SeqClassifierFlags flags) { if (flags.distSimLexicon == null) { return;/*from w w w . j a va 2s.c o m*/ } if (lexicon != null) { return; } Timing.startDoing("Loading distsim lexicon from " + flags.distSimLexicon); lexicon = Generics.newHashMap(); boolean terryKoo = "terryKoo".equals(flags.distSimFileFormat); for (String line : ObjectBank.getLineIterator(flags.distSimLexicon, flags.inputEncoding)) { String word; String wordClass; if (terryKoo) { String[] bits = line.split("\\t"); word = bits[1]; wordClass = bits[0]; if (flags.distSimMaxBits > 0 && wordClass.length() > flags.distSimMaxBits) { wordClass = wordClass.substring(0, flags.distSimMaxBits); } } else { // "alexClark" String[] bits = line.split("\\s+"); word = bits[0]; wordClass = bits[1]; } if (!flags.casedDistSim) { word = word.toLowerCase(); } if (flags.numberEquivalenceDistSim) { word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS); } lexicon.put(word, wordClass); } Timing.endDoing(); }
From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java
License:Open Source License
private void distSimAnnotate(PaddedList<IN> info) { for (CoreLabel fl : info) { if (fl.has(CoreAnnotations.DistSimAnnotation.class)) { return; }/* ww w . j a v a2 s . c o m*/ String word = getWord(fl); if (!flags.casedDistSim) { word = word.toLowerCase(); } if (flags.numberEquivalenceDistSim) { word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS); } String distSim = lexicon.get(word); if (distSim == null) { distSim = flags.unknownWordDistSimClass; } fl.set(CoreAnnotations.DistSimAnnotation.class, distSim); } }
From source file:edu.cmu.geolocator.nlp.ner.FeatureExtractor.ACE_En_FeatureGenerator.java
License:Apache License
private static void genWordShapeFeatures(List<Feature> f, Token[] tokens, int i) throws IOException { int classifierToUse = WORDSHAPECHRIS1; String shapeIns = WordShapeClassifier.wordShape(tokens[i].getToken(), classifierToUse); addFeature(f, tokens[i].getToken() + "_" + shapeIns + "_0_shape"); if (i - 1 > 0) { String previousShape = WordShapeClassifier.wordShape(tokens[i - 1].getToken(), classifierToUse); addFeature(f, tokens[i - 1].getToken() + "_" + previousShape + "_-1_shape"); }//from w ww .ja v a 2 s . c o m if (i - 2 > 0) { String previousShape2 = WordShapeClassifier.wordShape(tokens[i - 2].getToken(), classifierToUse); addFeature(f, tokens[i - 2].getToken() + "_" + previousShape2 + "_-2_shape"); } if (i + 1 < tokens.length) { String nextShape = WordShapeClassifier.wordShape(tokens[i + 1].getToken(), classifierToUse); addFeature(f, tokens[i + 1].getToken() + "_" + nextShape + "_+1_shape"); } if (i + 2 < tokens.length) { String nextShape2 = WordShapeClassifier.wordShape(tokens[i + 2].getToken(), classifierToUse); addFeature(f, tokens[i + 2].getToken() + "_" + nextShape2 + "_+2_shape"); } }
From source file:edu.cmu.geolocator.nlp.ner.FeatureExtractor.SkipNewACE_En_FeatureGenerator.java
License:Apache License
private static void genWordShapeFeatures(List<Feature> f, Token[] tokens, int i) throws IOException { int classifierToUse = WORDSHAPECHRIS1; String shapeIns = WordShapeClassifier.wordShape(tokens[i].getToken(), classifierToUse); addFeature(f, tokens[i].getToken() + shapeIns); if (i - 1 > 0) addFeature(f, tokens[i - 1].getToken() + shapeIns + "@-1"); if (i + 1 < tokens.length) addFeature(f, tokens[i + 1].getToken() + shapeIns + "@1"); }