Example usage for edu.stanford.nlp.semgraph SemanticGraph vertexListSorted

List of usage examples for edu.stanford.nlp.semgraph SemanticGraph vertexListSorted

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph vertexListSorted.

Prototype

public List<IndexedWord> vertexListSorted() 

Source Link

Document

This returns an ordered list of vertices (based upon their indices in the sentence).

Usage

From source file:ims.cs.corenlp.TokenAligner.java

License:Open Source License

/**
 * Aligns the tokens of a sentence//from  w w w  .  j a v  a  2 s  .  co  m
 * @param pTokens
 * @param cSentence
 */
private void alignTokensStrict(List<Token> pTokens, CoreMap cSentence) {

    Tree tree = cSentence.get(TreeAnnotation.class);
    SemanticGraph dependencies = cSentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    List<CoreLabel> cTokens = cSentence.get(CoreAnnotations.TokensAnnotation.class);

    Iterator<IndexedWord> depIterator = new IndexedWordIterator(dependencies.vertexListSorted().iterator());
    pcTokenList = new ArrayList<Token>(cTokens.size());
    List<Tree> leaves = tree.getLeaves();
    Iterator<Tree> leafIterator = leaves.iterator();

    indexedWord2CoreLabel = new HashMap<IndexedWord, Token>();
    tree2CoreLabel = new HashMap<Tree, Token>();

    // state variables
    Token prevCombinedToken = null;

    Iterator<CoreLabel> cTokenIter = cTokens.iterator();
    Iterator<Token> pTokenIter = pTokens.iterator();
    int currentCoreNlpSentenceIndex = 0;

    CoreLabel cToken = cTokenIter.next();
    Token pToken = pTokenIter.next();
    Token prevPToken = null;

    int pFinal = pTokens.get(pTokens.size() - 1).goldByteCount.getEnd();
    int cFinal = cTokens.get(cTokens.size() - 1).endPosition();

    int pBegin = pToken.goldByteCount.getBegin();
    int pEnd = pToken.goldByteCount.getEnd();

    int cBegin = cToken.beginPosition();
    int cEnd = cToken.endPosition();

    // for compatibility: TreeGraphNode bookkeeping
    Collection<TypedDependency> dependencyEdges = dependencies.typedDependencies();
    List<TreeGraphNode> tgnList = new ArrayList<TreeGraphNode>(cTokens.size());

    for (int i = 0; i < cTokens.size() + 1; i++)
        tgnList.add(null);

    for (TypedDependency edge : dependencyEdges) {
        tgnList.set(edge.gov().index(), edge.gov());
        tgnList.set(edge.dep().index(), edge.dep());
    }

    Iterator<TreeGraphNode> tgnIterator = tgnList.iterator();

    IndexedWord dep = null;
    Tree leaf = null;
    TreeGraphNode tgn = null;

    // move dep and tree iterators forward by 1
    if (depIterator.hasNext())
        dep = depIterator.next();
    if (leafIterator.hasNext())
        leaf = leafIterator.next();
    if (tgnIterator.hasNext())
        tgn = tgnIterator.next();

    // guess a pSentence for debug messages -- may be null if there is no sentence annotation
    Sentence pSentence = pTokens.get(pTokens.size() - 1).sentence;
    String pSentenceId;

    if (pSentence != null) {
        SentenceId id = pSentence.sentenceId;
        pSentenceId = id == null ? "null" : id.toString();
    } else {
        pSentenceId = null;
    }

    boolean usedPToken = false;

    // loop until we reach the end of either sentence
    while ((pFinal != pEnd) || (cFinal != cEnd)) {
        // Check for unwanted conditions:

        //   1. No PARC tokens left?
        //      this happens when the raw text contained tokens that are missing in the PARC data. these are mostly
        //      sentence-final punctuation marks.
        if (pToken == null) {
            // try to recover here for final quotes that the parser predicted. This may be good or bad.
            if (useCoreNlpQuoteCompletion && Helper.isQuote(cToken)) {
                Token combinedToken = combineTokens(prevPToken, cToken, currentCoreNlpSentenceIndex);

                prevCombinedToken.dependencyBackpointer = dep;
                prevCombinedToken.treeBackpointer = leaf;

                // bookkeeping with new token
                if (usedPToken) {
                    // avoid making subsequent tokens start tokens!
                    combinedToken.paragraphBegins = false;
                }

                addNewWord(combinedToken, prevCombinedToken);
            } else {
                if (StaticConfig.verbose)
                    System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: "
                            + pTokens + " )");
            }

            // stop processing this sentence, drop remaining CoreNLP data -- in practice, these will never be needed
            break;
        }

        //   2. No CoreNLP tokens left
        if (cToken == null) {
            if (StaticConfig.verbose)
                System.out.println("Unaligned Token(s) in " + pSentenceId + " " + pToken);

            break;
        }

        // check whether tokens at least overlap before continuing processing ...
        pBegin = pToken.goldByteCount.getBegin();
        pEnd = pToken.goldByteCount.getEnd();

        cBegin = cToken.beginPosition();
        cEnd = cToken.endPosition();

        // ... if they don't, try to recover by syncing up
        if (cBegin > pEnd) {
            if (usedPToken) {
                if (StaticConfig.verbose)
                    System.out.println(
                            pSentenceId + " out of sync " + pToken + " " + cToken + " -- trying to fix");

                if (pTokenIter.hasNext()) {
                    prevPToken = pToken;
                    pToken = pTokenIter.next();
                    continue; // restart the iteration
                } else {
                    if (StaticConfig.verbose)
                        System.out.println(pSentenceId + " Dropping unmatched " + cToken + " "
                                + "(PARC tokens: " + pTokens + " )");
                    break;
                }
            } else { /* this may happen when tokens from previous iterations have a wrong byte count -- skip */
                if (StaticConfig.verbose)
                    System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: "
                            + pTokens + " )");
                break;
            }
        }

        // Now the main part. There are three conditions which could occur.
        if (pEnd == cEnd) {
            // 1. Tokens have identical end points
            //    In this case, just combine the tokens and move on
            Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // move iterators
            if (cTokenIter.hasNext()) {
                cToken = cTokenIter.next();
                currentCoreNlpSentenceIndex++;
            } else {
                cToken = null;
            }
            if (pTokenIter.hasNext()) {
                prevPToken = pToken;
                pToken = pTokenIter.next();
            } else {
                pToken = null;
            }
            usedPToken = false;

            // add parse information
            if (depIterator.hasNext())
                dep = depIterator.next();
            if (leafIterator.hasNext())
                leaf = leafIterator.next();
            if (tgnIterator.hasNext())
                tgn = tgnIterator.next();

        } else if (cEnd > pEnd) {
            // 2. The CoreNLP token is longer than the PARC token
            //    split the CoreNLP token into two parts

            Token combinedToken;
            CoreLabel[] splitCToken = null;

            if (splitType == SplitType.SPLIT) {
                splitCToken = splitToken(cToken, pEnd);

                combinedToken = combineTokens(pToken, splitCToken[0], currentCoreNlpSentenceIndex);
            } else if (splitType == SplitType.NONE_CORENLP) {
                throw new Error();
            } else {
                combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);
            }

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // get new pToken to match the remaining bit
            if (pTokenIter.hasNext()) {
                prevPToken = pToken;
                pToken = pTokenIter.next();
            } else {
                pToken = null;
            }

            if (splitType == SplitType.SPLIT)
                cToken = splitCToken[1];

            usedPToken = false;

        } else { // cEnd < pEnd
            // 3. The PARC token is longer than the CoreNLP token
            //    Attach the PARC token to multiple CoreNLP tokens

            Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex);

            combinedToken.dependencyBackpointer = dep;
            combinedToken.treeBackpointer = leaf;
            combinedToken.tgn = tgn;

            // bookkeeping with new token
            if (usedPToken) { // avoid making subsequent tokens start tokens!
                combinedToken.paragraphBegins = false;
            }

            addNewWord(combinedToken, prevCombinedToken);
            prevCombinedToken = combinedToken;

            // get new cToken and other CoreNLP data
            if (cTokenIter.hasNext()) {
                cToken = cTokenIter.next();
                currentCoreNlpSentenceIndex++;
            } else {
                cToken = null;
            }
            usedPToken = true;

            if (depIterator.hasNext())
                dep = depIterator.next();
            if (leafIterator.hasNext())
                leaf = leafIterator.next();
            if (tgnIterator.hasNext())
                tgn = tgnIterator.next();
        }
    }
}

From source file:it.uniroma2.sag.kelp.input.parser.impl.StanfordParserWrapper.java

License:Apache License

@Override
public DependencyGraph parse(String sentenceString) {
    Annotation document = new Annotation(sentenceString);
    pipeline.annotate(document);/*  ww  w  . ja v a2s  .c om*/
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    CoreMap sentence = sentences.get(0);
    DependencyGraph graph = new DependencyGraph();
    graph.setSentence(sentenceString);
    graph.setParserName("StanfordParser");
    graph.setParserVersion("3.6.0");
    graph.setNodes(new ArrayList<DGNode>());
    int nId = 1;
    for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
        DGNode node = new DGNode();
        Map<String, Object> nodeProps = new HashMap<String, Object>();
        nodeProps.put("surface", token.originalText());
        nodeProps.put("lemma", token.lemma());
        nodeProps.put("pos", token.tag());
        nodeProps.put("start", token.beginPosition());
        nodeProps.put("end", token.endPosition());
        nodeProps.put("id", nId);
        nId++;
        graph.getNodes().add(node);
        node.setProperties(nodeProps);
    }

    SemanticGraph dependencies = null;

    switch (dependencyType) {
    case BASIC:
        dependencies = sentence.get(BasicDependenciesAnnotation.class);
        break;
    case COLLAPSED:
        dependencies = sentence.get(CollapsedDependenciesAnnotation.class);
        break;
    case COLLAPSED_CCPROCESSED:
        dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
        break;
    default:
        dependencies = sentence.get(BasicDependenciesAnnotation.class);
        break;
    }
    dependencies.edgeListSorted();
    List<DGRelation> relations = new ArrayList<DGRelation>();
    for (IndexedWord node : dependencies.vertexListSorted()) {
        DGRelation relation = new DGRelation();
        relation.setProperties(new HashMap<String, Object>());
        DGNode child = graph.getDGNodeById(node.index());
        relation.setTarget(child);

        Collection<IndexedWord> parentsTmp = dependencies.getParents(node);
        ArrayList<IndexedWord> parents = new ArrayList<IndexedWord>();
        for (IndexedWord par : parentsTmp) {
            SemanticGraphEdge edge = dependencies.getEdge(par, node);
            DGNode parent = graph.getDGNodeById(edge.getGovernor().index());
            if (parent.getProperties().get("id") != child.getProperties().get("id"))
                parents.add(par);
        }

        if (parents.isEmpty()) {
            relation.getProperties().put("type", "root");
            relation.getProperties().put("fromId", new Integer(0));
            relation.setSource(null);
            graph.setRoot(relation);
        } else {
            Iterator<IndexedWord> it = parents.iterator();
            while (it.hasNext()) {
                IndexedWord par = it.next();
                SemanticGraphEdge edge = dependencies.getEdge(par, node);
                DGNode parent = graph.getDGNodeById(edge.getGovernor().index());

                relation.setSource(parent);
                relation.getProperties().put("fromId", parent.getProperties().get("id"));
                relation.getProperties().put("type", edge.getRelation().toString());
            }
        }
        relations.add(relation);
    }

    graph.setRelations(relations);
    return graph;
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

public void addGraph(SemanticGraph newGraph) {
    int oldGraphSize = graph.size();
    for (IndexedWord iw : newGraph.vertexListSorted()) {
        IndexedWord copy = new IndexedWord(iw);
        copy.setIndex(graph.size());/* w ww .j a  va2s .c o m*/
        graph.addVertex(copy);
    }
    for (SemanticGraphEdge edge : newGraph.edgeListSorted()) {
        int dep = edge.getDependent().index() + oldGraphSize;
        int gov = edge.getGovernor().index() + oldGraphSize;
        GrammaticalRelation rel = edge.getRelation();
        addEdge(gov, dep, rel.getLongName());
    }
    cachedHashCode = 0;
}