Example usage for edu.stanford.nlp.sequences CoNLLDocumentReaderAndWriter BOUNDARY

List of usage examples for edu.stanford.nlp.sequences CoNLLDocumentReaderAndWriter BOUNDARY

Introduction

In this page you can find the example usage for edu.stanford.nlp.sequences CoNLLDocumentReaderAndWriter BOUNDARY.

Prototype

String BOUNDARY

To view the source code for edu.stanford.nlp.sequences CoNLLDocumentReaderAndWriter BOUNDARY.

Click Source Link

Usage

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

protected Collection<String> featuresCpCp2C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);//  w w w . j av a  2 s . co  m
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);

    String pWord = getWord(p);
    // String p2Word = getWord(p2);

    Collection<String> featuresCpCp2C = new ArrayList<String>();

    if (flags.useInternal && flags.useExternal) {

        /*if (flags.strictGoodCoNLL && ! flags.removeStrictGoodCoNLLDuplicates && flags.useTypeySequences && flags.maxLeft >= 2) {
        // this feature duplicates -TYPETYPES below, so probably don't include it, but it was in original tests of CMM goodCoNLL
        featuresCpCp2C.add(p2.get(CoreAnnotations.ShapeAnnotation.class) + '-' + p.get(CoreAnnotations.ShapeAnnotation.class) + '-' + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTPS");
        }*/

        if (flags.useAbbr) {
            featuresCpCp2C.add(p2.get(CoreAnnotations.AbbrAnnotation.class) + '-'
                    + p.get(CoreAnnotations.AbbrAnnotation.class) + '-'
                    + c.get(CoreAnnotations.AbbrAnnotation.class) + "-2PABBRANS");
        }

        if (flags.useChunks) {
            featuresCpCp2C.add(p2.get(CoreAnnotations.ChunkAnnotation.class) + '-'
                    + p.get(CoreAnnotations.ChunkAnnotation.class) + '-'
                    + c.get(CoreAnnotations.ChunkAnnotation.class) + "-2PCHUNKS");
        }

        if (flags.useLongSequences) {
            featuresCpCp2C.add("PPSEQ");
        }
        if (flags.useBoundarySequences && pWord.equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
            featuresCpCp2C.add("BNDRY-SPAN-PPSEQ");
        }
        // This more complex consistency checker didn't help!
        // if (flags.useBoundarySequences) {
        //   // try enforce consistency over "and" and "," as well as boundary
        //   if (pWord.equals(CoNLLDocumentIteratorFactory.BOUNDARY) ||
        //       pWord.equalsIgnoreCase("and") || pWord.equalsIgnoreCase("or") ||
        //       pWord.equals(",")) {
        //   }
        // }

        if (flags.useTaggySequences) {
            if (flags.useTags) {
                featuresCpCp2C.add(p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                        + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                        + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-TTS");
                if (flags.useTaggySequencesShapeInteraction) {
                    featuresCpCp2C.add(p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTS-CS");
                }
            }
            if (flags.useDistSim) {
                featuresCpCp2C.add(p2.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                        + p.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                        + c.get(CoreAnnotations.DistSimAnnotation.class) + "-DISTSIM_TTS1");
                if (flags.useTaggySequencesShapeInteraction) {
                    featuresCpCp2C.add(p2.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + p.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + c.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + c.get(CoreAnnotations.ShapeAnnotation.class) + "-DISTSIM_TTS1-CS");
                }
            }
        }

        if (((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) || flags.useShapeStrings) && flags.useTypeSeqs
                && flags.useTypeSeqs2 && flags.maxLeft >= 2) {
            String cShape = c.get(CoreAnnotations.ShapeAnnotation.class);
            String pShape = p.get(CoreAnnotations.ShapeAnnotation.class);
            String p2Shape = p2.get(CoreAnnotations.ShapeAnnotation.class);
            featuresCpCp2C.add(p2Shape + '-' + pShape + '-' + cShape + "-TYPETYPES");
        }
    } else if (flags.useInternal) {

        if (flags.useLongSequences) {
            featuresCpCp2C.add("PPSEQ");
        }
    } else if (flags.useExternal) {

        if (flags.useLongSequences) {
            featuresCpCp2C.add("PPSEQ");
        }

        if (((flags.wordShape > WordShapeClassifier.NOWORDSHAPE) || flags.useShapeStrings) && flags.useTypeSeqs
                && flags.useTypeSeqs2 && flags.maxLeft >= 2) {
            String cShape = c.get(CoreAnnotations.ShapeAnnotation.class);
            String pShape = p.get(CoreAnnotations.ShapeAnnotation.class);
            String p2Shape = p2.get(CoreAnnotations.ShapeAnnotation.class);
            featuresCpCp2C.add(p2Shape + '-' + pShape + '-' + cShape + "-TYPETYPES");
        }
    }

    return featuresCpCp2C;
}

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

protected Collection<String> featuresCpCp2Cp3C(PaddedList<IN> cInfo, int loc) {
    CoreLabel c = cInfo.get(loc);/*  w w  w .  j  a va 2 s .  com*/
    CoreLabel p = cInfo.get(loc - 1);
    CoreLabel p2 = cInfo.get(loc - 2);
    CoreLabel p3 = cInfo.get(loc - 3);

    Collection<String> featuresCpCp2Cp3C = new ArrayList<String>();

    if (flags.useTaggySequences) {
        if (flags.useTags) {
            if (flags.maxLeft >= 3 && !flags.dontExtendTaggy) {
                featuresCpCp2Cp3C.add(p3.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                        + p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                        + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                        + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + "-TTTS");
                if (flags.useTaggySequencesShapeInteraction) {
                    featuresCpCp2Cp3C.add(p3.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + p2.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + p.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + c.getString(CoreAnnotations.PartOfSpeechAnnotation.class) + '-'
                            + c.get(CoreAnnotations.ShapeAnnotation.class) + "-TTTS-CS");
                }
            }
        }
        if (flags.useDistSim) {
            if (flags.maxLeft >= 3 && !flags.dontExtendTaggy) {
                featuresCpCp2Cp3C.add(p3.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                        + p2.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                        + p.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                        + c.get(CoreAnnotations.DistSimAnnotation.class) + "-DISTSIM_TTTS1");
                if (flags.useTaggySequencesShapeInteraction) {
                    featuresCpCp2Cp3C.add(p3.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + p2.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + p.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + c.get(CoreAnnotations.DistSimAnnotation.class) + '-'
                            + c.get(CoreAnnotations.ShapeAnnotation.class) + "-DISTSIM_TTTS1-CS");
                }
            }
        }
    }

    if (flags.maxLeft >= 3) {
        if (flags.useLongSequences) {
            featuresCpCp2Cp3C.add("PPPSEQ");
        }
        if (flags.useBoundarySequences && getWord(p).equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
            featuresCpCp2Cp3C.add("BNDRY-SPAN-PPPSEQ");
        }
    }

    return featuresCpCp2Cp3C;
}

From source file:de.iisys.ocr.pos.CustomNERFeatureFactory.java

License:Open Source License

protected Collection<String> featuresCpCp2Cp3Cp4C(PaddedList<IN> cInfo, int loc) {
    Collection<String> featuresCpCp2Cp3Cp4C = new ArrayList<String>();

    CoreLabel p = cInfo.get(loc - 1);//w  ww . j a va 2  s.  co m

    if (flags.maxLeft >= 4) {
        if (flags.useLongSequences) {
            featuresCpCp2Cp3Cp4C.add("PPPPSEQ");
        }
        if (flags.useBoundarySequences && getWord(p).equals(CoNLLDocumentReaderAndWriter.BOUNDARY)) {
            featuresCpCp2Cp3Cp4C.add("BNDRY-SPAN-PPPPSEQ");
        }
    }

    return featuresCpCp2Cp3Cp4C;
}