Example usage for edu.stanford.nlp.ling IndexedWord originalText

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling IndexedWord originalText.

Prototype

@Override
    public String originalText()

Source Link

Usage

From source file:edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler.java

License:Open Source License

@Override
public void addView(TextAnnotation textAnnotation) throws AnnotatorException {
    // If the sentence is longer than STFRD_MAX_SENTENCE_LENGTH there is no point in trying to
    // parse/*w  w w.jav a  2  s  .c o  m*/
    StanfordParseHandler.checkLength(textAnnotation, throwExceptionOnSentenceLengthCheck,
            maxParseSentenceLength);

    TreeView treeView = new TreeView(ViewNames.DEPENDENCY_STANFORD, "StanfordDepHandler", textAnnotation, 1d);
    // The (tokenized) sentence offset in case we have more than one sentences in the record
    List<CoreMap> sentences = StanfordParseHandler.buildStanfordSentences(textAnnotation);
    Annotation document = new Annotation(sentences);
    posAnnotator.annotate(document);
    parseAnnotator.annotate(document);
    sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    if (sentences.get(0).get(TreeCoreAnnotations.TreeAnnotation.class).nodeString().equals("X")) {
        // This is most like because we ran out of time
        throw new AnnotatorException("Unable to parse TextAnnotation " + textAnnotation.getId() + ". "
                + "This is most likely due to a timeout.");
    }

    for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
        boolean runtimeExceptionWasThrown = false;
        CoreMap sentence = sentences.get(sentenceId);
        if (maxParseSentenceLength > 0 && sentence.size() > maxParseSentenceLength) {
            logger.warn(HandlerUtils.getSentenceLengthError(textAnnotation.getId(), sentence.toString(),
                    maxParseSentenceLength));
        } else {
            SemanticGraph depGraph = sentence
                    .get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
            IndexedWord root = null;

            try {
                root = depGraph.getFirstRoot();
            } catch (RuntimeException e) {
                String msg = "ERROR in getting root of dep graph for sentence.  Sentence is:\n"
                        + sentence.toString() + "'\nDependency graph is:\n" + depGraph.toCompactString()
                        + "\nText is:\n" + textAnnotation.getText();
                logger.error(msg);
                System.err.println(msg);
                e.printStackTrace();
                if (throwExceptionOnSentenceLengthCheck)
                    throw e;
                else
                    runtimeExceptionWasThrown = true;
            }

            if (!runtimeExceptionWasThrown) {
                int tokenStart = getNodePosition(textAnnotation, root, sentenceId);
                Pair<String, Integer> nodePair = new Pair<>(root.originalText(), tokenStart);
                Tree<Pair<String, Integer>> tree = new Tree<>(nodePair);
                populateChildren(depGraph, root, tree, textAnnotation, sentenceId);
                treeView.setDependencyTree(sentenceId, tree);
            }
        }
    }
    textAnnotation.addView(getViewName(), treeView);
}

From source file:edu.illinois.cs.cogcomp.pipeline.handlers.StanfordDepHandler.java

License:Open Source License

private void populateChildren(SemanticGraph depGraph, IndexedWord root, Tree<Pair<String, Integer>> tree,
        TextAnnotation ta, int sentId) {
    if (depGraph.getChildren(root).size() == 0)
        return;//from   www .j  av  a 2 s  .  c  om
    for (IndexedWord child : depGraph.getChildren(root)) {
        int childPosition = getNodePosition(ta, child, sentId);
        Pair<String, Integer> nodePair = new Pair<>(child.originalText(), childPosition);
        Tree<Pair<String, Integer>> childTree = new Tree<>(nodePair);
        tree.addSubtree(childTree, new Pair<>(depGraph.getEdge(root, child).toString(), childPosition));
        populateChildren(depGraph, child, childTree, ta, sentId);
    }
}

From source file:eu.ubipol.opinionmining.nlp_engine.Sentence.java

License:Open Source License

protected Sentence(SemanticGraph dependencies, int indexStart, DatabaseAdapter adp, int beginPosition) {
    IndexedWord rootWord = dependencies.getFirstRoot();
    sentenceRoot = new Token(rootWord.originalText(), rootWord.lemma(), rootWord.tag(), null, null,
            rootWord.index() + indexStart, rootWord.beginPosition(), rootWord.endPosition(), adp,
            beginPosition);/*w  ww. j  a  v  a  2 s .c o  m*/
    addChildTokens(sentenceRoot, rootWord, dependencies, indexStart, adp, beginPosition);
    sentenceRoot.transferScores();
    if (sentenceRoot.isAKeyword())
        sentenceRoot.addAspectScore(sentenceRoot.getScore(), sentenceRoot.getWeight(),
                sentenceRoot.getAspect());
    indexStart += dependencies.size();
}

From source file:eu.ubipol.opinionmining.nlp_engine.Sentence.java

License:Open Source License

private void addChildTokens(Token rootToken, IndexedWord currentRoot, SemanticGraph dependencies,
        int indexStart, DatabaseAdapter adp, int beginPosition) {
    for (IndexedWord child : dependencies.getChildren(currentRoot)) {
        Token childToken = new Token(child.originalText(), child.lemma(), child.tag(), rootToken,
                dependencies.getEdge(currentRoot, child).toString(), child.index() + indexStart,
                child.beginPosition(), child.endPosition(), adp, beginPosition);
        rootToken.addChildToken(childToken);
        addChildTokens(childToken, child, dependencies, indexStart, adp, beginPosition);
    }/* w  w  w  . java  2 s  . c o m*/
}

From source file:featureExtractor.NLPFeatures.java

static void processLine(String text, int lineId) throws IOException {
    bw_root.write(Integer.toString(lineId));
    bw_subj.write(Integer.toString(lineId));
    bw_underRoot.write(Integer.toString(lineId));
    bw_nerType.write(Integer.toString(lineId));

    //text = "A gigantic Hong Kong set was constructed in downtown Detroit. The set was so big that the Detroit People Mover track ended up becoming part of the set and shooting had to be adjusted to allow the track to move through the set.  ";//"One of three new television series scheduled for release in 2014 based on DC Comics characters. The others being Constantine (2014) and The Flash (2014).  ";
    HashMap<String, Integer> nerCount = new HashMap<>();
    int superlativePOS = 0;

    try {//from  ww w .  j ava2s. c o m
        Annotation document = new Annotation(text);
        pipeline.annotate(document);

        List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

        for (CoreMap sentence : sentences) {
            SemanticGraph dependencies = sentence
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
            // getting root words
            for (IndexedWord rword : dependencies.getRoots()) {
                //System.out.println(rword.lemma());
                //System.out.println(rword.ner());
                if (rword.ner().equals("O"))
                    bw_root.write("\t" + rword.ner() + ":" + rword.lemma());
                //else if(rword.ner().equals("PERSON"))
                else
                    bw_root.write("\t" + rword.ner() + ":" + rword.originalText());
                /*
                else
                bw_root.write(" entity_" + rword.ner());
                */
                // under root
                for (IndexedWord child : dependencies.getChildren(rword)) {
                    //System.out.println("here: " + child.originalText());
                    /*
                    if(child.ner().equals("PERSON"))
                    bw_underRoot.write(" " + child.originalText());
                    else*/
                    if (!child.ner().equals("O"))
                        bw_underRoot.write("\t" + child.ner() + ":" + child.originalText());
                }

                // nsubj | nsubpass words
                GrammaticalRelation[] subjects = { EnglishGrammaticalRelations.NOMINAL_SUBJECT,
                        EnglishGrammaticalRelations.NOMINAL_PASSIVE_SUBJECT };
                for (IndexedWord current : dependencies.descendants(rword))
                    for (IndexedWord nsubWord : dependencies.getChildrenWithRelns(current,
                            Arrays.asList(subjects))) {
                        //System.out.println("wow: " + nsubWord.originalText());
                        if (!nsubWord.ner().equals("O"))
                            bw_subj.write("\t" + nsubWord.ner() + ":" + nsubWord.originalText());
                        else {
                            //System.out.println(nsubWord.lemma());
                            bw_subj.write("\t" + nsubWord.ner() + ":" + nsubWord.lemma());
                        } /*
                          else
                          bw_subj.write(" entity_"+nsubWord.ner());
                          */
                    }
            }

            // NER Types frequency
            for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
                String pos = token.get(CoreAnnotations.PartOfSpeechAnnotation.class);
                String ne = token.get(CoreAnnotations.NamedEntityTagAnnotation.class);

                if (pos.equals("JJS") || pos.equals("RBS"))
                    superlativePOS++;

                nerCount.putIfAbsent(ne, 0);
                nerCount.put(ne, nerCount.get(ne) + 1);
            }

            //System.out.println("dependency graph:\n" + dependencies);
        }
    } catch (Exception e) {
        System.out.println("IGNORED:");
    }

    bw_nerType.write("\t" + Integer.toString(superlativePOS));

    for (String ne : ners) {
        if (nerCount.containsKey(ne))
            bw_nerType.write("\t" + nerCount.get(ne).toString());
        else
            bw_nerType.write("\t0");
    }
    bw_root.write("\n");
    bw_underRoot.write("\n");
    bw_nerType.write("\n");
    bw_subj.write("\n");
    if (lineId % 25 == 0) {
        bw_root.flush();
        bw_underRoot.flush();
        bw_nerType.flush();
        bw_subj.flush();
    }
}