Example usage for edu.stanford.nlp.semgraph SemanticGraph vertexListSorted

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph vertexListSorted.

Prototype

public List<IndexedWord> vertexListSorted()

Source Link

Document

This returns an ordered list of vertices (based upon their indices in the sentence).

Usage

From source file:ims.cs.corenlp.TokenAligner.java

License:Open Source License

/**
 * Aligns the tokens of a sentence//from  w w w  .  j a v  a  2 s  .  co  m
 * @param pTokens
 * @param cSentence
 */
private void alignTokensStrict(List<Token> pTokens, CoreMap cSentence) {

    Tree tree = cSentence.get(TreeAnnotation.class);
    SemanticGraph dependencies = cSentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    List<CoreLabel> cTokens = cSentence.get(CoreAnnotations.TokensAnnotation.class);

    Iterator<IndexedWord> depIterator = new IndexedWordIterator(dependencies.vertexListSorted().iterator());
    pcTokenList = new ArrayList<Token>(cTokens.size());
    List<Tree> leaves = tree.getLeaves();
    Iterator<Tree> leafIterator = leaves.iterator();

    indexedWord2CoreLabel = new HashMap<IndexedWord, Token>();
    tree2CoreLabel = new HashMap<Tree, Token>();

    // state variables
    Token prevCombinedToken = null;

    Iterator<CoreLabel> cTokenIter = cTokens.iterator();
    Iterator<Token> pTokenIter = pTokens.iterator();
    int currentCoreNlpSentenceIndex = 0;

    CoreLabel cToken = cTokenIter.next();
    Token pToken = pTokenIter.next();
    Token prevPToken = null;

    int pFinal = pTokens.get(pTokens.size() - 1).goldByteCount.getEnd();
    int cFinal = cTokens.get(cTokens.size() - 1).endPosition();

    int pBegin = pToken.goldByteCount.getBegin();
    int pEnd = pToken.goldByteCount.getEnd();

    int cBegin = cToken.beginPosition();
    int cEnd = cToken.endPosition();

    // for compatibility: TreeGraphNode bookkeeping
    Collection<TypedDependency> dependencyEdges = dependencies.typedDependencies();
    List<TreeGraphNode> tgnList = new ArrayList<TreeGraphNode>(cTokens.size());

    for (int i = 0; i < cTokens.size() + 1; i++)
        tgnList.add(null);

    for (TypedDependency edge : dependencyEdges) {
        tgnList.set(edge.gov().index(), edge.gov());
        tgnList.set(edge.dep().index(), edge.dep());
    }

    Iterator<TreeGraphNode> tgnIterator = tgnList.iterator();

    IndexedWord dep = null;
    Tree leaf = null;
    TreeGraphNode tgn = null;

    // move dep and tree iterators forward by 1
    if (depIterator.hasNext())
        dep = depIterator.next();
    if (leafIterator.hasNext())
        leaf = leafIterator.next();
    if (tgnIterator.hasNext())
        tgn = tgnIterator.next();

    // guess a pSentence for debug messages -- may be null if there is no sentence annotation
    Sentence pSentence = pTokens.get(pTokens.size() - 1).sentence;
    String pSentenceId;

    if (pSentence != null) {
        SentenceId id = pSentence.sentenceId;
        pSentenceId = id == null ? "null" : id.toString();
    } else {
        pSentenceId = null;
    }

    boolean usedPToken = false;

    // loop until we reach the end of either sentence
    while ((pFinal != pEnd) || (cFinal != cEnd)) {
        // Check for unwanted conditions:

        //   1. No PARC tokens left?
        //      this happens when the raw text contained tokens that are missing in the PARC data. these are mostly
        //      sentence-final punctuation marks.
        if (pToken == null) {
            // try to recover here for final quotes that the parser predicted. This may be good or bad.
            if (useCoreNlpQuoteCompletion && Helper.isQuote(cToken)) {
                Token combinedToken = combineTokens(prevPToken, cToken, currentCoreNlpSentenceIndex);

                prevCombinedToken.dependencyBackpointer = dep;
                prevCombinedToken.treeBackpointer = leaf;

                // bookkeeping with new token
                if (usedPToken) {
                    // avoid making subsequent tokens start tokens!
                    combinedToken.paragraphBegins = false;
                }

                addNewWord(combinedToken, prevCombinedToken);
            } else {
                if (StaticConfig.verbose)
                    System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: "
                            + pTokens + " )");
            }

            // stop processing this sentence, drop remaining CoreNLP data -- in practice, these will never be needed
            break;
        }

        //   2. No CoreNLP tokens left
        if (cToken == null) {
            if (StaticConfig.verbose)
                System.out.println("Unaligned Token(s) in " + pSentenceId + " " + pToken);

            break;
        }

        // check whether tokens at least overlap before continuing processing ...
        pBegin = pToken.goldByteCount.getBegin();
        pEnd = pToken.goldByteCount.getEnd();

        cBegin = cToken.beginPosition();
        cEnd = cToken.endPosition();

        // ... if they don't, try to recover by syncing up
        if (cBegin > pEnd) {
            if (usedPToken) {
                if (StaticConfig.verbose)
                    System.out.println(
                            pSentenceId + " out of sync " + pToken + " " + cToken + " -- trying to fix");

                if (pTokenIter.hasNext()) {
                    prevPToken = pToken;
                    pToken = pTokenIter.next();
                    continue; // restart the iteration
                } else {
                    if (StaticConfig.verbose)
                        System.out.println(pSentenceId + " Dropping unmatched " + cToken + " "
                                + "(PARC tokens: " + pTokens + " )");
                    break;
                }
            } else { /* this may happen when tokens from previous iterations have a wrong byte count -- skip */
                if (StaticConfig.verbose)
                    System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: "
                            + pTokens + " )");
                break;
            }
        }

        // Now the main part. There are three conditions which could occur.
        if (pEnd == cEnd) {
            // 1. Tokens have identical end points
            //    In this case, just combine the tokens and move on
            Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // move iterators
            if (cTokenIter.hasNext()) {
                cToken = cTokenIter.next();
                currentCoreNlpSentenceIndex++;
            } else {
                cToken = null;
            }
            if (pTokenIter.hasNext()) {
                prevPToken = pToken;
                pToken = pTokenIter.next();
            } else {
                pToken = null;
            }
            usedPToken = false;

            // add parse information
            if (depIterator.hasNext())
                dep = depIterator.next();
            if (leafIterator.hasNext())
                leaf = leafIterator.next();
            if (tgnIterator.hasNext())
                tgn = tgnIterator.next();

        } else if (cEnd > pEnd) {
            // 2. The CoreNLP token is longer than the PARC token
            //    split the CoreNLP token into two parts

            Token combinedToken;
            CoreLabel[] splitCToken = null;

            if (splitType == SplitType.SPLIT) {
                splitCToken = splitToken(cToken, pEnd);

                combinedToken = combineTokens(pToken, splitCToken[0], currentCoreNlpSentenceIndex);
            } else if (splitType == SplitType.NONE_CORENLP) {
                throw new Error();
            } else {
                combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);
            }

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // get new pToken to match the remaining bit
            if (pTokenIter.hasNext()) {
                prevPToken = pToken;
                pToken = pTokenIter.next();
            } else {
                pToken = null;
            }

            if (splitType == SplitType.SPLIT)
                cToken = splitCToken[1];

            usedPToken = false;

        } else { // cEnd < pEnd
            // 3. The PARC token is longer than the CoreNLP token
            //    Attach the PARC token to multiple CoreNLP tokens

            Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // get new cToken and other CoreNLP data
            if (cTokenIter.hasNext()) {
                cToken = cTokenIter.next();
                currentCoreNlpSentenceIndex++;
            } else {
                cToken = null;
            }
            usedPToken = true;

            if (depIterator.hasNext())
                dep = depIterator.next();
            if (leafIterator.hasNext())
                leaf = leafIterator.next();
            if (tgnIterator.hasNext())
                tgn = tgnIterator.next();
        }
    }
}

From source file:it.uniroma2.sag.kelp.input.parser.impl.StanfordParserWrapper.java

License:Apache License

@Override
public DependencyGraph parse(String sentenceString) {
    Annotation document = new Annotation(sentenceString);
    pipeline.annotate(document);/*  ww  w  . ja v a2s  .c om*/
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    CoreMap sentence = sentences.get(0);
    DependencyGraph graph = new DependencyGraph();
    graph.setSentence(sentenceString);
    graph.setParserName("StanfordParser");
    graph.setParserVersion("3.6.0");
    graph.setNodes(new ArrayList<DGNode>());
    int nId = 1;
    for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
        DGNode node = new DGNode();
        Map<String, Object> nodeProps = new HashMap<String, Object>();
        nodeProps.put("surface", token.originalText());
        nodeProps.put("lemma", token.lemma());
        nodeProps.put("pos", token.tag());
        nodeProps.put("start", token.beginPosition());
        nodeProps.put("end", token.endPosition());
        nodeProps.put("id", nId);
        nId++;
        graph.getNodes().add(node);
        node.setProperties(nodeProps);
    }

    SemanticGraph dependencies = null;

    switch (dependencyType) {
    case BASIC:
        dependencies = sentence.get(BasicDependenciesAnnotation.class);
        break;
    case COLLAPSED:
        dependencies = sentence.get(CollapsedDependenciesAnnotation.class);
        break;
    case COLLAPSED_CCPROCESSED:
        dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        break;
    default:
        dependencies = sentence.get(BasicDependenciesAnnotation.class);
        break;
    }
    dependencies.edgeListSorted();
    List<DGRelation> relations = new ArrayList<DGRelation>();
    for (IndexedWord node : dependencies.vertexListSorted()) {
        DGRelation relation = new DGRelation();
        relation.setProperties(new HashMap<String, Object>());
        DGNode child = graph.getDGNodeById(node.index());
        relation.setTarget(child);

        Collection<IndexedWord> parentsTmp = dependencies.getParents(node);
        ArrayList<IndexedWord> parents = new ArrayList<IndexedWord>();
        for (IndexedWord par : parentsTmp) {
            SemanticGraphEdge edge = dependencies.getEdge(par, node);
            DGNode parent = graph.getDGNodeById(edge.getGovernor().index());
            if (parent.getProperties().get("id") != child.getProperties().get("id"))
                parents.add(par);
        }

        if (parents.isEmpty()) {
            relation.getProperties().put("type", "root");
            relation.getProperties().put("fromId", new Integer(0));
            relation.setSource(null);
            graph.setRoot(relation);
        } else {
            Iterator<IndexedWord> it = parents.iterator();
            while (it.hasNext()) {
                IndexedWord par = it.next();
                SemanticGraphEdge edge = dependencies.getEdge(par, node);
                DGNode parent = graph.getDGNodeById(edge.getGovernor().index());

                relation.setSource(parent);
                relation.getProperties().put("fromId", parent.getProperties().get("id"));
                relation.getProperties().put("type", edge.getRelation().toString());
            }
        }
        relations.add(relation);
    }

    graph.setRelations(relations);
    return graph;
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

public void addGraph(SemanticGraph newGraph) {
    int oldGraphSize = graph.size();
    for (IndexedWord iw : newGraph.vertexListSorted()) {
        IndexedWord copy = new IndexedWord(iw);
        copy.setIndex(graph.size());/* w ww .j a  va2s .c o m*/
        graph.addVertex(copy);
    }
    for (SemanticGraphEdge edge : newGraph.edgeListSorted()) {
        int dep = edge.getDependent().index() + oldGraphSize;
        int gov = edge.getGovernor().index() + oldGraphSize;
        GrammaticalRelation rel = edge.getRelation();
        addEdge(gov, dep, rel.getLongName());
    }
    cachedHashCode = 0;
}