Example usage for edu.stanford.nlp.pipeline Annotation toString

Introduction

In this page you can find the example usage for edu.stanford.nlp.pipeline Annotation toString.

Prototype

@Override
public String toString()

Source Link

Document

The basic toString() method of an Annotation simply prints out the text over which any annotations have been made (TextAnnotation).

Usage

From source file:org.aksw.simba.cetus.annotator.CetusSurfaceFormExtractor.java

License:Open Source License

private void getTypeSurfaceForms(Annotation document, int neStartPos, int neEndPos,
        List<ExtendedTypedNamedEntity> results) {
    boolean entityFound = false;
    StringBuilder parseableTextBuilder = new StringBuilder();
    List<CoreLabel> tokens = document.get(TokensAnnotation.class);
    int tokenPositions[][] = new int[tokens.size()][4];
    int id = 0;/*from  w  ww . j  av  a  2  s .c o  m*/
    for (CoreLabel token : tokens) {
        tokenPositions[id][ORIG_TEXT_START] = token.get(CharacterOffsetBeginAnnotation.class);
        tokenPositions[id][ORIG_TEXT_END] = token.get(CharacterOffsetEndAnnotation.class);
        if ((tokenPositions[id][ORIG_TEXT_END] <= neStartPos)
                || (tokenPositions[id][ORIG_TEXT_START] >= neEndPos)) {
            if (parseableTextBuilder.length() > 0) {
                parseableTextBuilder.append(' ');
            }
            tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length();
            parseableTextBuilder.append(token.getString(TextAnnotation.class));
            parseableTextBuilder.append('_');
            parseableTextBuilder.append(token.getString(LemmaAnnotation.class));
            parseableTextBuilder.append('_');
            parseableTextBuilder.append(token.getString(PartOfSpeechAnnotation.class));
            tokenPositions[id][GEN_TEXT_END] = parseableTextBuilder.length();
        } else {
            if (!entityFound) {
                if (parseableTextBuilder.length() > 0) {
                    parseableTextBuilder.append(' ');
                }
                tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length();
                parseableTextBuilder.append(TypeExtractor.ENTITY_MARKING);
                entityFound = true;
            } else {
                tokenPositions[id][GEN_TEXT_START] = parseableTextBuilder.length();
            }
            tokenPositions[id][GEN_TEXT_END] = parseableTextBuilder.length();
        }
        ++id;
    }
    if (!entityFound) {
        LOGGER.error("Couldn't find the named entity (" + neStartPos + ", " + neEndPos
                + ") inside the document \"" + document.toString() + "\".");
        return;
    }
    String parseableText = parseableTextBuilder.toString();
    List<String> types = extractor.extractTypeStrings(parseableText);
    if (types != null) {
        if (LOGGER.isInfoEnabled()) {
            LOGGER.info("Found types " + Arrays.toString(types.toArray()) + " inside the sentence \""
                    + parseableText + "\".");
        }
        generateNEsForTypes(document.get(TokensAnnotation.class), tokenPositions, parseableText, types,
                results);
    } else {
        LOGGER.warn("Extractor was not able to process the text \"" + parseableText + "\".");
    }
}