Example usage for edu.stanford.nlp.semgraph SemanticGraph typedDependencies

List of usage examples for edu.stanford.nlp.semgraph SemanticGraph typedDependencies

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph typedDependencies.

Prototype

public Collection<TypedDependency> typedDependencies() 

Source Link

Document

Returns a list of TypedDependency in the graph.

Usage

From source file:com.project.NLP.Requirement.ParserTreeGenerator.java

/**
 * method to identify the passive sentence using the dependencies generated by stanford coreNLP
 * for a particular sentence /*from  w  w w  .  j  ava  2  s .  c  o m*/
 * 
 * @param sentence
 * @param tree 
 */
public void passiveSentenceIdentification(CoreMap sentence, Tree tree) {
    boolean passive = false;

    /*sentence dependency annotation*/
    SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    Collection collection = dependencies.typedDependencies();

    Iterator iterator = collection.iterator();
    while (iterator.hasNext()) {
        String dependency = iterator.next().toString();
        String dependencyArray[] = dependency.split("\\(");
        if (dependencyArray[0].equalsIgnoreCase("nsubjpass")) {
            passiveSentenceMap.put(tree, true);
        }
    }

}

From source file:coreferenceresolver.util.StanfordUtil.java

public void init(boolean simpleInit) throws FileNotFoundException, IOException {
    String outPosFilePath = "./input.txt.pos";
    FileWriter fw = new FileWriter(new File(outPosFilePath));
    BufferedWriter bw = new BufferedWriter(fw);
    props = new Properties();
    if (simpleInit) {
        props.put("annotators", "tokenize, ssplit, pos, parse");
    } else {/*  w w  w  . ja v  a2s  . co m*/
        props.put("annotators", "tokenize, ssplit, pos, parse, sentiment");
    }
    pipeline = new StanfordCoreNLP(props);

    reviews = new ArrayList<>();

    FileReader fileReader = new FileReader(documentFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String reviewLine;
    int reviewId = 0;
    int sentenceId;
    //read input file line by line and count the number sentences of each lines
    while ((reviewLine = bufferedReader.readLine()) != null) {
        sentenceId = 0;
        Review newReview = new Review();

        //Add to reviews list
        newReview.setRawContent(reviewLine);

        // create an empty Annotation just with the given text
        document = new Annotation(reviewLine);

        // run all Annotators on this text
        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        //Begin extracting from paragraphs
        for (CoreMap sentence : sentences) {
            int sentenceOffsetBegin = sentence.get(CharacterOffsetBeginAnnotation.class);
            int sentenceOffsetEnd = sentence.get(CharacterOffsetEndAnnotation.class);
            Sentence newSentence = new Sentence();
            newSentence.setReviewId(reviewId);
            newSentence.setRawContent(sentence.toString());
            newSentence.setOffsetBegin(sentenceOffsetBegin);
            newSentence.setOffsetEnd(sentenceOffsetEnd);

            if (!simpleInit) {
                int sentimentLevel = RNNCoreAnnotations
                        .getPredictedClass(sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class));
                newSentence.setSentimentLevel(sentimentLevel);

                //Dependency Parsing
                SemanticGraph collCCDeps = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
                Collection<TypedDependency> typedDeps = collCCDeps.typedDependencies();
                newSentence.setDependencies(typedDeps);
            }

            List<Tree> sentenceTreeLeaves = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).getLeaves();

            int i = 0;
            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                Token newToken = new Token();

                Tree tokenTree = sentenceTreeLeaves.get(i);
                newToken.setTokenTree(tokenTree);

                String word = token.get(TextAnnotation.class);
                newToken.setWord(word);

                String pos = token.get(PartOfSpeechAnnotation.class);
                newToken.setPOS(pos);

                int offsetBegin = token.get(CharacterOffsetBeginAnnotation.class);
                newToken.setOffsetBegin(offsetBegin);

                int offsetEnd = token.get(CharacterOffsetEndAnnotation.class);
                newToken.setOffsetEnd(offsetEnd);

                if (!simpleInit) {
                    //Check NP relative clause
                    Tree twoLevelsAncestor = tokenTree.ancestor(2,
                            sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
                    if (twoLevelsAncestor.value().equals("WHNP") && !word.toLowerCase().equals("who")
                            && !word.toLowerCase().equals("what")) {
                        newToken.setRelativePronoun(true);
                    }

                    //Calculate sentiment for this token
                    int newTokenSentiment = Util.retrieveOriginalSentiment(newToken.getWord());
                    newToken.setSentimentOrientation(newTokenSentiment, newSentence.getDependencies());
                }

                newSentence.addToken(newToken);
                bw.write(token.word() + "/" + token.tag() + " ");
                ++i;
            }
            bw.newLine();

            if (!simpleInit) {

                //Check if this sentence contains a comparative indicator. 
                //If yes, it is a comparative sentence. Identify which NP is superior or inferior in this sentence
                List<Token> comparativeTokens = FeatureExtractor.findComparativeIndicator(newSentence, null,
                        null);
                //TODO
                //Check special comparative samples
                if (!comparativeTokens.isEmpty()) {
                    newSentence.initComparatives(comparativeTokens);
                }
            }

            newReview.addSentence(newSentence);

            ++sentenceId;
        }

        bw.write("./.");
        bw.newLine();

        reviews.add(newReview);
        ++reviewId;
    }
    bw.close();
}

From source file:ims.cs.corenlp.TokenAligner.java

License:Open Source License

/**
 * Aligns the tokens of a sentence//from  w  w w. j av a 2s. com
 * @param pTokens
 * @param cSentence
 */
private void alignTokensStrict(List<Token> pTokens, CoreMap cSentence) {

    Tree tree = cSentence.get(TreeAnnotation.class);
    SemanticGraph dependencies = cSentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    List<CoreLabel> cTokens = cSentence.get(CoreAnnotations.TokensAnnotation.class);

    Iterator<IndexedWord> depIterator = new IndexedWordIterator(dependencies.vertexListSorted().iterator());
    pcTokenList = new ArrayList<Token>(cTokens.size());
    List<Tree> leaves = tree.getLeaves();
    Iterator<Tree> leafIterator = leaves.iterator();

    indexedWord2CoreLabel = new HashMap<IndexedWord, Token>();
    tree2CoreLabel = new HashMap<Tree, Token>();

    // state variables
    Token prevCombinedToken = null;

    Iterator<CoreLabel> cTokenIter = cTokens.iterator();
    Iterator<Token> pTokenIter = pTokens.iterator();
    int currentCoreNlpSentenceIndex = 0;

    CoreLabel cToken = cTokenIter.next();
    Token pToken = pTokenIter.next();
    Token prevPToken = null;

    int pFinal = pTokens.get(pTokens.size() - 1).goldByteCount.getEnd();
    int cFinal = cTokens.get(cTokens.size() - 1).endPosition();

    int pBegin = pToken.goldByteCount.getBegin();
    int pEnd = pToken.goldByteCount.getEnd();

    int cBegin = cToken.beginPosition();
    int cEnd = cToken.endPosition();

    // for compatibility: TreeGraphNode bookkeeping
    Collection<TypedDependency> dependencyEdges = dependencies.typedDependencies();
    List<TreeGraphNode> tgnList = new ArrayList<TreeGraphNode>(cTokens.size());

    for (int i = 0; i < cTokens.size() + 1; i++)
        tgnList.add(null);

    for (TypedDependency edge : dependencyEdges) {
        tgnList.set(edge.gov().index(), edge.gov());
        tgnList.set(edge.dep().index(), edge.dep());
    }

    Iterator<TreeGraphNode> tgnIterator = tgnList.iterator();

    IndexedWord dep = null;
    Tree leaf = null;
    TreeGraphNode tgn = null;

    // move dep and tree iterators forward by 1
    if (depIterator.hasNext())
        dep = depIterator.next();
    if (leafIterator.hasNext())
        leaf = leafIterator.next();
    if (tgnIterator.hasNext())
        tgn = tgnIterator.next();

    // guess a pSentence for debug messages -- may be null if there is no sentence annotation
    Sentence pSentence = pTokens.get(pTokens.size() - 1).sentence;
    String pSentenceId;

    if (pSentence != null) {
        SentenceId id = pSentence.sentenceId;
        pSentenceId = id == null ? "null" : id.toString();
    } else {
        pSentenceId = null;
    }

    boolean usedPToken = false;

    // loop until we reach the end of either sentence
    while ((pFinal != pEnd) || (cFinal != cEnd)) {
        // Check for unwanted conditions:

        //   1. No PARC tokens left?
        //      this happens when the raw text contained tokens that are missing in the PARC data. these are mostly
        //      sentence-final punctuation marks.
        if (pToken == null) {
            // try to recover here for final quotes that the parser predicted. This may be good or bad.
            if (useCoreNlpQuoteCompletion && Helper.isQuote(cToken)) {
                Token combinedToken = combineTokens(prevPToken, cToken, currentCoreNlpSentenceIndex);

                prevCombinedToken.dependencyBackpointer = dep;
                prevCombinedToken.treeBackpointer = leaf;

                // bookkeeping with new token
                if (usedPToken) {
                    // avoid making subsequent tokens start tokens!
                    combinedToken.paragraphBegins = false;
                }

                addNewWord(combinedToken, prevCombinedToken);
            } else {
                if (StaticConfig.verbose)
                    System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: "
                            + pTokens + " )");
            }

            // stop processing this sentence, drop remaining CoreNLP data -- in practice, these will never be needed
            break;
        }

        //   2. No CoreNLP tokens left
        if (cToken == null) {
            if (StaticConfig.verbose)
                System.out.println("Unaligned Token(s) in " + pSentenceId + " " + pToken);

            break;
        }

        // check whether tokens at least overlap before continuing processing ...
        pBegin = pToken.goldByteCount.getBegin();
        pEnd = pToken.goldByteCount.getEnd();

        cBegin = cToken.beginPosition();
        cEnd = cToken.endPosition();

        // ... if they don't, try to recover by syncing up
        if (cBegin > pEnd) {
            if (usedPToken) {
                if (StaticConfig.verbose)
                    System.out.println(
                            pSentenceId + " out of sync " + pToken + " " + cToken + " -- trying to fix");

                if (pTokenIter.hasNext()) {
                    prevPToken = pToken;
                    pToken = pTokenIter.next();
                    continue; // restart the iteration
                } else {
                    if (StaticConfig.verbose)
                        System.out.println(pSentenceId + " Dropping unmatched " + cToken + " "
                                + "(PARC tokens: " + pTokens + " )");
                    break;
                }
            } else { /* this may happen when tokens from previous iterations have a wrong byte count -- skip */
                if (StaticConfig.verbose)
                    System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: "
                            + pTokens + " )");
                break;
            }
        }

        // Now the main part. There are three conditions which could occur.
        if (pEnd == cEnd) {
            // 1. Tokens have identical end points
            //    In this case, just combine the tokens and move on
            Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // move iterators
            if (cTokenIter.hasNext()) {
                cToken = cTokenIter.next();
                currentCoreNlpSentenceIndex++;
            } else {
                cToken = null;
            }
            if (pTokenIter.hasNext()) {
                prevPToken = pToken;
                pToken = pTokenIter.next();
            } else {
                pToken = null;
            }
            usedPToken = false;

            // add parse information
            if (depIterator.hasNext())
                dep = depIterator.next();
            if (leafIterator.hasNext())
                leaf = leafIterator.next();
            if (tgnIterator.hasNext())
                tgn = tgnIterator.next();

        } else if (cEnd > pEnd) {
            // 2. The CoreNLP token is longer than the PARC token
            //    split the CoreNLP token into two parts

            Token combinedToken;
            CoreLabel[] splitCToken = null;

            if (splitType == SplitType.SPLIT) {
                splitCToken = splitToken(cToken, pEnd);

                combinedToken = combineTokens(pToken, splitCToken[0], currentCoreNlpSentenceIndex);
            } else if (splitType == SplitType.NONE_CORENLP) {
                throw new Error();
            } else {
                combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);
            }

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // get new pToken to match the remaining bit
            if (pTokenIter.hasNext()) {
                prevPToken = pToken;
                pToken = pTokenIter.next();
            } else {
                pToken = null;
            }

            if (splitType == SplitType.SPLIT)
                cToken = splitCToken[1];

            usedPToken = false;

        } else { // cEnd < pEnd
            // 3. The PARC token is longer than the CoreNLP token
            //    Attach the PARC token to multiple CoreNLP tokens

            Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // get new cToken and other CoreNLP data
            if (cTokenIter.hasNext()) {
                cToken = cTokenIter.next();
                currentCoreNlpSentenceIndex++;
            } else {
                cToken = null;
            }
            usedPToken = true;

            if (depIterator.hasNext())
                dep = depIterator.next();
            if (leafIterator.hasNext())
                leaf = leafIterator.next();
            if (tgnIterator.hasNext())
                tgn = tgnIterator.next();
        }
    }
}

From source file:jnetention.nlp.TextParse.java

public SemanticGraph getDependencies(boolean b) {
    List<TypedDependency> l = new ArrayList();
    for (CoreMap s : getSentences()) {
        SemanticGraph g = getDependencies(s, b);
        l.addAll(g.typedDependencies());
    }/*from  ww  w. j  a v  a2  s. co m*/
    SemanticGraph graph = new SemanticGraph(l);
    return graph;
}

From source file:qa.StanfordDepParser.java

public synchronized DependencyTree parse(String documentText) throws IOException {

    // Create an empty Annotation just with the given text
    Annotation document = new Annotation(documentText);
    // run all Annotators on this text
    this.pipeline.annotate(document);
    // Iterate over all of the sentences found
    SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0)
            .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);

    Collection<TypedDependency> dependencies = ccProcessed.typedDependencies();

    CoNLLOutputter.conllPrint(document, new FileOutputStream(new File("temp.dep")));
    String conllString = FileUtil.readCoNLLFormat("temp.dep");
    //System.out.println(documentText);
    DependencyTree tree = DependencyTree.fromCoNLLFormatString(conllString);

    return tree;//from www  .j a  v  a  2  s .  c o  m
}

From source file:qa.StanfordDepParser.java

public String parseCoNLL(String documentText) throws IOException {
    // Create an empty Annotation just with the given text
    Annotation document = new Annotation(documentText);
    // run all Annotators on this text
    this.pipeline.annotate(document);
    // Iterate over all of the sentences found
    SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0)
            .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);

    Collection<TypedDependency> dependencies = ccProcessed.typedDependencies();
    CoNLLOutputter.conllPrint(document, new FileOutputStream(new File("temp.dep")));
    String conllString = FileUtil.readCoNLLFormat("temp.dep");

    return conllString;
}