Example usage for edu.stanford.nlp.process WordShapeClassifier wordShape

List of usage examples for edu.stanford.nlp.process WordShapeClassifier wordShape

Introduction

In this page you can find the example usage for edu.stanford.nlp.process WordShapeClassifier wordShape.

Prototype

public static String wordShape(String inStr, int wordShaper) 

Source Link

Document

Specify the String and the int identifying which word shaper to use and this returns the result of using that wordshaper on the String.

Usage

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

private void initLexicon(SeqClassifierFlags flags) {
    if (flags.distSimLexicon == null) {
        return;/*from   w w  w  .  j a  va  2s.c o  m*/
    }
    if (lexicon != null) {
        return;
    }
    Timing.startDoing("Loading distsim lexicon from " + flags.distSimLexicon);
    lexicon = Generics.newHashMap();
    boolean terryKoo = "terryKoo".equals(flags.distSimFileFormat);
    for (String line : ObjectBank.getLineIterator(flags.distSimLexicon, flags.inputEncoding)) {
        String word;
        String wordClass;
        if (terryKoo) {
            String[] bits = line.split("\\t");
            word = bits[1];
            wordClass = bits[0];
            if (flags.distSimMaxBits > 0 && wordClass.length() > flags.distSimMaxBits) {
                wordClass = wordClass.substring(0, flags.distSimMaxBits);
            }
        } else {
            // "alexClark"
            String[] bits = line.split("\\s+");
            word = bits[0];
            wordClass = bits[1];
        }
        if (!flags.casedDistSim) {
            word = word.toLowerCase();
        }
        if (flags.numberEquivalenceDistSim) {
            word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS);
        }
        lexicon.put(word, wordClass);
    }
    Timing.endDoing();
}

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

private void distSimAnnotate(PaddedList<IN> info) {
    for (CoreLabel fl : info) {
        if (fl.has(CoreAnnotations.DistSimAnnotation.class)) {
            return;
        }/* ww w . j  a  v  a2  s .  c o m*/
        String word = getWord(fl);
        if (!flags.casedDistSim) {
            word = word.toLowerCase();
        }
        if (flags.numberEquivalenceDistSim) {
            word = WordShapeClassifier.wordShape(word, WordShapeClassifier.WORDSHAPEDIGITS);
        }
        String distSim = lexicon.get(word);
        if (distSim == null) {
            distSim = flags.unknownWordDistSimClass;
        }
        fl.set(CoreAnnotations.DistSimAnnotation.class, distSim);
    }
}

From source file:edu.cmu.geolocator.nlp.ner.FeatureExtractor.ACE_En_FeatureGenerator.java

License:Apache License

private static void genWordShapeFeatures(List<Feature> f, Token[] tokens, int i) throws IOException {
    int classifierToUse = WORDSHAPECHRIS1;
    String shapeIns = WordShapeClassifier.wordShape(tokens[i].getToken(), classifierToUse);

    addFeature(f, tokens[i].getToken() + "_" + shapeIns + "_0_shape");
    if (i - 1 > 0) {
        String previousShape = WordShapeClassifier.wordShape(tokens[i - 1].getToken(), classifierToUse);
        addFeature(f, tokens[i - 1].getToken() + "_" + previousShape + "_-1_shape");
    }//from w  ww  .ja  v  a  2 s .  c o m
    if (i - 2 > 0) {
        String previousShape2 = WordShapeClassifier.wordShape(tokens[i - 2].getToken(), classifierToUse);
        addFeature(f, tokens[i - 2].getToken() + "_" + previousShape2 + "_-2_shape");

    }
    if (i + 1 < tokens.length) {
        String nextShape = WordShapeClassifier.wordShape(tokens[i + 1].getToken(), classifierToUse);
        addFeature(f, tokens[i + 1].getToken() + "_" + nextShape + "_+1_shape");
    }
    if (i + 2 < tokens.length) {
        String nextShape2 = WordShapeClassifier.wordShape(tokens[i + 2].getToken(), classifierToUse);
        addFeature(f, tokens[i + 2].getToken() + "_" + nextShape2 + "_+2_shape");
    }
}

From source file:edu.cmu.geolocator.nlp.ner.FeatureExtractor.SkipNewACE_En_FeatureGenerator.java

License:Apache License

private static void genWordShapeFeatures(List<Feature> f, Token[] tokens, int i) throws IOException {
    int classifierToUse = WORDSHAPECHRIS1;
    String shapeIns = WordShapeClassifier.wordShape(tokens[i].getToken(), classifierToUse);
    addFeature(f, tokens[i].getToken() + shapeIns);
    if (i - 1 > 0)
        addFeature(f, tokens[i - 1].getToken() + shapeIns + "@-1");
    if (i + 1 < tokens.length)
        addFeature(f, tokens[i + 1].getToken() + shapeIns + "@1");
}