Example usage for edu.stanford.nlp.semgraph SemanticGraph SemanticGraph

List of usage examples for edu.stanford.nlp.semgraph SemanticGraph SemanticGraph

Introduction

In this page you can find the example usage for edu.stanford.nlp.semgraph SemanticGraph SemanticGraph.

Prototype

public SemanticGraph(Collection<TypedDependency> dependencies) 

Source Link

Document

This is the constructor used by the parser.

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.DKPro2CoreNlp.java

License:Open Source License

public Annotation convert(JCas aSource, Annotation aTarget) {
    // Document annotation
    aTarget.set(CoreAnnotations.TextAnnotation.class, aSource.getDocumentText());

    // Sentences//w w  w.  java 2  s. c  o  m
    List<CoreMap> sentences = new ArrayList<>();
    for (Sentence s : select(aSource, Sentence.class)) {
        if (StringUtils.isBlank(s.getCoveredText())) {
            continue;
        }

        String sentenceText = s.getCoveredText();
        if (encoding != null && !"UTF-8".equals(encoding.name())) {
            sentenceText = new String(sentenceText.getBytes(StandardCharsets.UTF_8), encoding);
        }

        Annotation sentence = new Annotation(sentenceText);
        sentence.set(CharacterOffsetBeginAnnotation.class, s.getBegin());
        sentence.set(CharacterOffsetEndAnnotation.class, s.getEnd());
        sentence.set(SentenceIndexAnnotation.class, sentences.size());

        // Tokens
        Map<Token, IndexedWord> idxTokens = new HashMap<>();
        List<CoreLabel> tokens = new ArrayList<>();
        for (Token t : selectCovered(Token.class, s)) {
            String tokenText = t.getCoveredText();
            if (encoding != null && !"UTF-8".equals(encoding.name())) {
                tokenText = new String(tokenText.getBytes(StandardCharsets.UTF_8), encoding);
            }

            CoreLabel token = tokenFactory.makeToken(tokenText, t.getBegin(), t.getEnd() - t.getBegin());
            // First add token so that tokens.size() returns a 1-based counting as required
            // by IndexAnnotation
            tokens.add(token);
            token.set(SentenceIndexAnnotation.class, sentences.size());
            token.set(IndexAnnotation.class, tokens.size());
            token.set(TokenKey.class, t);
            idxTokens.put(t, new IndexedWord(token));

            // POS tags
            if (readPos && t.getPos() != null) {
                token.set(PartOfSpeechAnnotation.class, t.getPos().getPosValue());
            }

            // Lemma
            if (t.getLemma() != null) {
                token.set(LemmaAnnotation.class, t.getLemma().getValue());
            }

            // Stem
            if (t.getStem() != null) {
                token.set(StemAnnotation.class, t.getStem().getValue());
            }

            // NamedEntity
            // TODO: only token-based NEs are supported, but not multi-token NEs
            // Supporting multi-token NEs via selectCovering would be very slow. To support
            // them, another approach would need to be implemented, e.g. via indexCovering.
            List<NamedEntity> nes = selectCovered(NamedEntity.class, t);
            if (nes.size() > 0) {
                token.set(NamedEntityTagAnnotation.class, nes.get(0).getValue());
            } else {
                token.set(NamedEntityTagAnnotation.class, "O");
            }
        }

        // Constituents
        for (ROOT r : selectCovered(ROOT.class, s)) {
            Tree tree = createStanfordTree(r, idxTokens);
            tree.indexSpans();
            sentence.set(TreeAnnotation.class, tree);
        }

        // Dependencies
        List<TypedDependency> dependencies = new ArrayList<>();
        for (Dependency d : selectCovered(Dependency.class, s)) {
            TypedDependency dep = new TypedDependency(GrammaticalRelation.valueOf(d.getDependencyType()),
                    idxTokens.get(d.getGovernor()), idxTokens.get(d.getDependent()));
            if (DependencyFlavor.ENHANCED.equals(d.getFlavor())) {
                dep.setExtra();
            }
            dependencies.add(dep);
        }
        sentence.set(EnhancedDependenciesAnnotation.class, new SemanticGraph(dependencies));

        if (ptb3Escaping) {
            tokens = applyPtbEscaping(tokens, quoteBegin, quoteEnd);
        }

        sentence.set(TokensAnnotation.class, tokens);
        sentences.add(sentence);
    }
    aTarget.set(SentencesAnnotation.class, sentences);

    return aTarget;
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

/**
 *
 * @param inputStreamFile//from   ww  w  . j  a v  a 2  s. co  m
 * @param morphology
 * @param posTagger
 * @param parser
 * @return
 * @throws Exception
 */
public static StringBuilder parseBNCXML(InputStream inputStreamFile, Morphology morphology,
        MaxentTagger posTagger, ParserGrammar parser) throws Exception {
    StringBuilder results = new StringBuilder();
    int counterSent = 0;
    List<List<List<WordLemmaTag>>> parseBNCXMLTokenized = parseBNCXMLTokenized(inputStreamFile);
    for (List<List<WordLemmaTag>> xparseBNCXMLL : parseBNCXMLTokenized) {
        results.append("<p>\n");
        for (List<WordLemmaTag> para : xparseBNCXMLL) {
            if (counterSent++ % 20 == 0) {
                System.out.print(".");
            }
            results.append("<s>\n");
            List<TaggedWord> tagSentence = posTagger.tagSentence(para, true);

            Tree parseTree = parser.parse(tagSentence);

            GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
                    parser.treebankLanguagePack().punctuationWordRejectFilter(),
                    parser.getTLPParams().typedDependencyHeadFinder());

            Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
            SemanticGraph depTree = new SemanticGraph(deps);

            for (int i = 0; i < tagSentence.size(); ++i) {

                int head = -1;
                String deprel = null;
                //                    if (depTree != null) {
                Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index)
                        .collect(Collectors.toSet());
                IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
                if (node != null) {
                    List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
                    if (!edgeList.isEmpty()) {
                        assert edgeList.size() == 1;
                        head = edgeList.get(0).getGovernor().index();
                        deprel = edgeList.get(0).getRelation().toString();
                    } else if (rootSet.contains(i + 1)) {
                        head = 0;
                        deprel = "ROOT";
                    }
                }
                //     }

                // Write the token
                TaggedWord lexHead = null;
                if (head > 0) {
                    lexHead = tagSentence.get(head - 1);
                }
                results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
            }
            results.append("</s>\n");
        }
        results.append("</p>\n");
    }
    System.out.println("");
    inputStreamFile.close();

    return results;
}

From source file:ie.pars.bnc.preprocess.ProcessNLP.java

License:Open Source License

private static StringBuilder parseTheSentence(String sentence, Morphology morphology, MaxentTagger posTagger,
        ParserGrammar parser, String sid) {
    TokenizerFactory<Word> newTokenizerFactory = PTBTokenizerFactory.newTokenizerFactory();
    //        TokenizerFactory<WordLemmaTag> tokenizerFactory;
    //        TokenizerFactory<CoreLabel> factory = PTBTokenizer.factory(new CoreLabelTokenFactory() , "");
    //        TokenizerFactory<Word> factory1 = PTBTokenizer.factory();

    StringBuilder results = new StringBuilder();
    results.append("<s id='" + sid + "'>\n");

    StringReader sr = new StringReader(sentence);
    Tokenizer<Word> tokenizer = newTokenizerFactory.getTokenizer(sr);
    List<Word> tokenize = tokenizer.tokenize();

    List<TaggedWord> tagSentence = posTagger.tagSentence(tokenize);

    Tree parseTree = parser.parse(tagSentence);

    GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(parseTree,
            parser.treebankLanguagePack().punctuationWordRejectFilter(),
            parser.getTLPParams().typedDependencyHeadFinder());

    Collection<TypedDependency> deps = gs.typedDependenciesCollapsedTree();
    SemanticGraph depTree = new SemanticGraph(deps);

    for (int i = 0; i < tagSentence.size(); ++i) {

        int head = -1;
        String deprel = null;/*from   w w  w  .  j ava 2 s.  co m*/
        //                    if (depTree != null) {
        Set<Integer> rootSet = depTree.getRoots().stream().map(IndexedWord::index).collect(Collectors.toSet());
        IndexedWord node = depTree.getNodeByIndexSafe(i + 1);
        if (node != null) {
            List<SemanticGraphEdge> edgeList = depTree.getIncomingEdgesSorted(node);
            if (!edgeList.isEmpty()) {
                assert edgeList.size() == 1;
                head = edgeList.get(0).getGovernor().index();
                deprel = edgeList.get(0).getRelation().toString();
            } else if (rootSet.contains(i + 1)) {
                head = 0;
                deprel = "ROOT";
            }
        }
        //     }

        // Write the token
        TaggedWord lexHead = null;
        if (head > 0) {
            lexHead = tagSentence.get(head - 1);
        }
        results.append(line(i + 1, tagSentence.get(i), morphology, head, deprel, lexHead)).append("\n");
    }
    results.append("</s>\n");
    return results;
}

From source file:jnetention.nlp.TextParse.java

public SemanticGraph getDependencies(boolean b) {
    List<TypedDependency> l = new ArrayList();
    for (CoreMap s : getSentences()) {
        SemanticGraph g = getDependencies(s, b);
        l.addAll(g.typedDependencies());
    }//from  ww  w .  j  ava  2  s  . c  o  m
    SemanticGraph graph = new SemanticGraph(l);
    return graph;
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

@Override
public RelationalVal copy() {
    RelationalVal val = new RelationalVal();
    val.graph = new SemanticGraph(graph);
    return val;
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

public RelationalVal getSubGraph(int i) {
    RelationalVal val = new RelationalVal();
    val.graph = new SemanticGraph(graph);
    val.graph.setRoot(val.graph.getNodeByIndex(i));
    return val;
}