Example usage for edu.stanford.nlp.ling IndexedWord value

List of usage examples for edu.stanford.nlp.ling IndexedWord value

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling IndexedWord value.

Prototype

@Override
    public String value() 

Source Link

Usage

From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.StanfordCoreferenceResolver.java

License:Open Source License

@Override
public void process(JCas aJCas) throws AnalysisEngineProcessException {
    modelProvider.configure(aJCas.getCas());

    List<Tree> trees = new ArrayList<Tree>();
    List<CoreMap> sentences = new ArrayList<CoreMap>();
    List<List<CoreLabel>> sentenceTokens = new ArrayList<List<CoreLabel>>();
    for (ROOT root : select(aJCas, ROOT.class)) {
        // Copy all relevant information from the tokens
        List<CoreLabel> tokens = new ArrayList<CoreLabel>();
        for (Token token : selectCovered(Token.class, root)) {
            tokens.add(tokenToWord(token));
        }//from ww w  . ja v a  2 s  .co  m
        sentenceTokens.add(tokens);

        // SemanticHeadFinder (nonTerminalInfo) does not know about PRN0, so we have to replace
        // it with PRN to avoid NPEs.
        TreeFactory tFact = new LabeledScoredTreeFactory(CoreLabel.factory()) {
            @Override
            public Tree newTreeNode(String aParent, List<Tree> aChildren) {
                String parent = aParent;
                if ("PRN0".equals(parent)) {
                    parent = "PRN";
                }
                Tree node = super.newTreeNode(parent, aChildren);
                return node;
            }
        };

        // deep copy of the tree. These are modified inside coref!
        Tree treeCopy = TreeUtils.createStanfordTree(root, tFact).treeSkeletonCopy();
        treeCopy.indexSpans();
        trees.add(treeCopy);

        // Build the sentence
        CoreMap sentence = new CoreLabel();
        sentence.set(TreeAnnotation.class, treeCopy);
        sentence.set(TokensAnnotation.class, tokens);
        sentence.set(RootKey.class, root);
        sentences.add(sentence);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=590
        // We currently do not copy over dependencies from the CAS. This is supposed to fill
        // in the dependencies so we do not get NPEs.
        TreebankLanguagePack tlp = new PennTreebankLanguagePack();
        GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory(tlp.punctuationWordRejectFilter(),
                tlp.typedDependencyHeadFinder());
        ParserAnnotatorUtils.fillInParseAnnotations(false, true, gsf, sentence, treeCopy,
                GrammaticalStructure.Extras.NONE);

        // https://code.google.com/p/dkpro-core-asl/issues/detail?id=582
        SemanticGraph deps = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        for (IndexedWord vertex : deps.vertexSet()) {
            vertex.setWord(vertex.value());
        }

        // merge the new CoreLabels with the tree leaves
        MentionExtractor.mergeLabels(treeCopy, tokens);
        MentionExtractor.initializeUtterance(tokens);
    }

    Annotation document = new Annotation(aJCas.getDocumentText());
    document.set(SentencesAnnotation.class, sentences);

    Coreferencer coref = modelProvider.getResource();

    // extract all possible mentions
    // Reparsing only works when the full CoreNLP pipeline system is set up! Passing false here
    // disables reparsing.
    RuleBasedCorefMentionFinder finder = new RuleBasedCorefMentionFinder(false);
    List<List<Mention>> allUnprocessedMentions = finder.extractPredictedMentions(document, 0,
            coref.corefSystem.dictionaries());

    // add the relevant info to mentions and order them for coref
    Map<Integer, CorefChain> result;
    try {
        Document doc = coref.mentionExtractor.arrange(document, sentenceTokens, trees, allUnprocessedMentions);
        result = coref.corefSystem.coref(doc);
    } catch (Exception e) {
        throw new AnalysisEngineProcessException(e);
    }

    for (CorefChain chain : result.values()) {
        CoreferenceLink last = null;
        for (CorefMention mention : chain.getMentionsInTextualOrder()) {
            CoreLabel beginLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.startIndex - 1);
            CoreLabel endLabel = sentences.get(mention.sentNum - 1).get(TokensAnnotation.class)
                    .get(mention.endIndex - 2);
            CoreferenceLink link = new CoreferenceLink(aJCas, beginLabel.get(TokenKey.class).getBegin(),
                    endLabel.get(TokenKey.class).getEnd());

            if (mention.mentionType != null) {
                link.setReferenceType(mention.mentionType.toString());
            }

            if (last == null) {
                // This is the first mention. Here we'll initialize the chain
                CoreferenceChain corefChain = new CoreferenceChain(aJCas);
                corefChain.setFirst(link);
                corefChain.addToIndexes();
            } else {
                // For the other mentions, we'll add them to the chain.
                last.setNext(link);
            }
            last = link;

            link.addToIndexes();
        }
    }
}

From source file:edu.anu.spice.SpiceParser.java

License:Open Source License

/**
 * Attaches particles to the main predicate.
 *//*  w  w w .  j a v a 2 s .  c  o m*/
protected String getPredicate(SemanticGraph sg, IndexedWord mainPred) {
    if (sg.hasChildWithReln(mainPred, UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE)) {
        IndexedWord part = sg.getChildWithReln(mainPred,
                UniversalEnglishGrammaticalRelations.PHRASAL_VERB_PARTICLE);
        return String.format("%s %s", mainPred.lemma().equals("be") ? "" : mainPred.lemma(), part.value());
    }
    return mainPred.lemma();
}

From source file:edu.anu.spice.SpiceParser.java

License:Open Source License

protected ProposedTuples parseAnnotation(Annotation ann) {
    ProposedTuples tuples = new ProposedTuples();
    ArrayList<SemanticGraph> sgs = new ArrayList<SemanticGraph>();
    for (CoreMap sentence : ann.get(CoreAnnotations.SentencesAnnotation.class)) {
        SemanticGraph sg = sentence/*from ww  w  .j a  va2 s  .  c o m*/
                .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class);
        sgs.add(sg);
    }
    for (SemanticGraph sg : sgs) {
        // Everything from RuleBasedParser except resolvePlurals(sg);
        SemanticGraphEnhancer.processQuanftificationModifiers(sg);
        SemanticGraphEnhancer.collapseCompounds(sg);
        SemanticGraphEnhancer.collapseParticles(sg);
        SemanticGraphEnhancer.resolvePronouns(sg);

        SemgrexMatcher matcher = SUBJ_PRED_OBJ_TRIPLET_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            String reln = matcher.getRelnString("objreln");
            String predicate = getPredicate(sg, pred);
            if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
                predicate += reln.replace("nmod:", " ").replace("_", " ");
            }
            tuples.addTuple(subj, obj, predicate);
        }

        matcher = ACL_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            String reln = matcher.getRelnString("objreln");
            String predicate = getPredicate(sg, pred);
            if (reln.startsWith("nmod:") && !reln.equals("nmod:poss") && !reln.equals("nmod:agent")) {
                predicate += reln.replace("nmod:", " ").replace("_", " ");
            }
            tuples.addTuple(subj, obj, predicate);
        }

        SemgrexPattern[] subjPredPatterns = { SUBJ_PRED_PAIR_PATTERN, COPULAR_PATTERN };
        for (SemgrexPattern p : subjPredPatterns) {
            matcher = p.matcher(sg);
            while (matcher.find()) {
                IndexedWord subj = matcher.getNode("subj");
                IndexedWord pred = matcher.getNode("pred");
                if (sg.hasChildWithReln(pred, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
                    IndexedWord caseMarker = sg.getChildWithReln(pred,
                            UniversalEnglishGrammaticalRelations.CASE_MARKER);
                    String prep = caseMarker.value();
                    if (sg.hasChildWithReln(caseMarker,
                            UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                        for (IndexedWord additionalCaseMarker : sg.getChildrenWithReln(caseMarker,
                                UniversalEnglishGrammaticalRelations.MULTI_WORD_EXPRESSION)) {
                            prep = prep + " " + additionalCaseMarker.value();
                        }
                    }
                    tuples.addTuple(subj, pred, prep);
                } else {
                    if (!pred.lemma().equals("be")) {
                        tuples.addTuple(subj, pred);
                    }
                }
            }
        }

        matcher = ADJ_MOD_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord adj = matcher.getNode("adj");
            tuples.addTuple(obj, adj);
        }

        matcher = ADJ_PRED_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord adj = matcher.getNode("adj");
            tuples.addTuple(obj, adj);
        }

        matcher = PP_MOD_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            String reln = matcher.getRelnString("reln");
            String predicate = reln.replace("nmod:", "").replace("_", " ");
            if (predicate.equals("poss") || predicate.equals("agent")) {
                continue;
            }
            tuples.addTuple(gov, mod, predicate);
        }

        matcher = POSS_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord gov = matcher.getNode("gov");
            IndexedWord mod = matcher.getNode("mod");
            tuples.addTuple(mod, gov, "have");
        }

        matcher = AGENT_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            IndexedWord pred = matcher.getNode("pred");
            tuples.addTuple(subj, obj, getPredicate(sg, pred));
        }

        matcher = PLURAL_SUBJECT_OBJECT_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord subj = matcher.getNode("subj");
            IndexedWord obj = matcher.getNode("obj");
            checkForNumericAttribute(tuples, sg, subj);
            checkForNumericAttribute(tuples, sg, obj);
        }

        matcher = PLURAL_SUBJECT_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord subj = matcher.getNode("subj");
            checkForNumericAttribute(tuples, sg, subj);
        }

        matcher = PLURAL_OTHER_PATTERN.matcher(sg);
        while (matcher.findNextMatchingNode()) {
            IndexedWord word = matcher.getNode("word");
            checkForNumericAttribute(tuples, sg, word);
        }

        matcher = COMPOUND_NOUN_PATTERN.matcher(sg);
        Set<IndexedWord> compoundNouns = new HashSet<IndexedWord>();
        while (matcher.find()) {
            IndexedWord tail = matcher.getNode("tail");
            IndexedWord head = matcher.getNode("head");
            compoundNouns.add(tail);
            compoundNouns.add(head);
            tuples.addTuple(tail, head);
        }

        // Must happen last, since it will reuse existing parts of the scene
        // graph
        matcher = NOUN_CONJ_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord tail = matcher.getNode("tail");
            IndexedWord head = matcher.getNode("head");
            int original_length = tuples.tuples.size();
            for (int i = 0; i < original_length; ++i) {
                ArrayList<String> prop = tuples.tuples.get(i);
                if (prop.size() == 3 && prop.get(0).equals(head)) {
                    tuples.addTuple(head, prop.get(1), prop.get(2));
                }
                if (prop.size() == 3 && prop.get(1).equals(tail)) {
                    tuples.addTuple(tail, prop.get(1), prop.get(2));
                }
            }
        }

        matcher = NOUN_PATTERN.matcher(sg);
        while (matcher.find()) {
            IndexedWord word = matcher.getNode("word");
            if (!compoundNouns.contains(word)) {
                tuples.addTuple(word);
            }
        }
    }
    return tuples;
}

From source file:edu.nus.comp.nlp.stanford.UtilParser.java

License:Open Source License

public static DefaultMutableTreeNode toDMTree(IndexedWord root, SemanticGraph dependencies) {

    if (root == null) {
        root = dependencies.getFirstRoot();
    }// w ww.  j  av  a 2s .  com

    DefaultMutableTreeNode node = new DefaultMutableTreeNode();

    String nodeContent = root.value();

    for (SemanticGraphEdge edge : dependencies.edgeIterable()) {
        if (edge.getDependent().equals(root)) {
            nodeContent = "<-" + edge.getRelation() + "- " + nodeContent;
            break;
        }
    }

    node.setUserObject(nodeContent);
    for (IndexedWord c : dependencies.getChildList(root)) {
        DefaultMutableTreeNode n = toDMTree(c, dependencies);
        node.add(n);
    }
    return node;
}

From source file:main.java.parsers.StanfordParser.java

/**
 * Gets a map that links words on one of the ends of the dependency paths to the dependency paths
 * //  www .  j a  v  a2 s  .  c  o  m
 * @param offsets1
 * @param offsets2
 * @param first
 * @param startOffsetIndexedWord
 * @param graph
 * @return 
 */
public static Map<String, List<String>> getWordLinkedDependencyPaths(int[] offsets1, int[] offsets2,
        boolean first, Map<Integer, IndexedWord> startOffsetIndexedWord, SemanticGraph graph) {
    Map<String, List<String>> wordLinkedDependencyPaths = new HashMap<>();

    for (int startOffset1 = offsets1[0]; startOffset1 < offsets1[1]; startOffset1++) {
        if (!startOffsetIndexedWord.containsKey(startOffset1))
            continue;
        IndexedWord iw1 = startOffsetIndexedWord.get(startOffset1);
        for (int startOffset2 = offsets2[0]; startOffset2 < offsets2[1]; startOffset2++) {
            if (!startOffsetIndexedWord.containsKey(startOffset2))
                continue;
            IndexedWord iw2 = startOffsetIndexedWord.get(startOffset2);

            String path = getPath(iw1, iw2, graph);
            if (path.equals(""))
                continue;

            String word = first ? iw1.value() : iw2.value();
            List<String> dependencyPaths = wordLinkedDependencyPaths.get(word);
            if (dependencyPaths == null)
                wordLinkedDependencyPaths.put(word, dependencyPaths = new ArrayList<>());
            if (!dependencyPaths.contains(path))
                dependencyPaths.add(path);
        }
    }

    return wordLinkedDependencyPaths;
}

From source file:main.java.spatialrelex.ling.Features.java

/**
 * Gets lexical pattern containing spatial element roles and words in between.
 * //from  w  w w . ja  va2s .c  o  m
 * @param startOffsetIndexedWord is a map containing all tokens of the document
 * to which the spatial element belongs.
 * @param startOffsetSpatialElement is a sorted map linking the starting offset positions
 * of the two or three spatial elements to the spatial elements.
 * @return String which is the lexical pattern.
 */
public static String getLexicalPatternStr(Map<Integer, IndexedWord> startOffsetIndexedWord,
        Map<Integer, SpatialElement> startOffsetSpatialElement) {
    String lexicalPattern = "";

    int start = -1;
    int end = -1;

    for (int startOffset : startOffsetSpatialElement.keySet()) {
        SpatialElement se = startOffsetSpatialElement.get(startOffset);
        if (start == -1)
            start = se.end;
        else if (end == -1) {
            end = se.start;

            String substring = "";
            int i = start;
            while (i < end) {
                if (!startOffsetIndexedWord.containsKey(i)) {
                    i++;
                    continue;
                }
                IndexedWord iw = startOffsetIndexedWord.get(i);
                substring += iw.value() + " ";
                i++;
            }
            substring = substring.trim();

            if (!substring.equals(""))
                lexicalPattern += "_" + substring;
            start = se.end;
            end = -1;
        }
        lexicalPattern = lexicalPattern.equals("") ? se.role : lexicalPattern + "_" + se.role;
    }

    return lexicalPattern;
}

From source file:main.java.spatialrelex.markup.SpatialElement.java

public static SpatialElement setSpatialElementFeatures(Doc document, SpatialElement se) {
    IndexedWord iw = document.startOffsetIndexedWord.get(se.start);
    se.lemmaText = iw.lemma();/*  w  w w . ja  v a  2 s  .  com*/
    se.startToken = iw.index();
    se.endToken = iw.index();
    int i = se.start + 1;
    while (i < se.end) {
        if (!document.startOffsetIndexedWord.containsKey(i)) {
            i++;
            continue;
        }

        iw = document.startOffsetIndexedWord.get(i);
        se.endToken = iw.index();
        se.lemmaText += " " + iw.lemma();
        if (iw.tag().contains("NN")) {
            se.generalInquirerCategories = GeneralInquirer
                    .getGeneralInquirerCategories(se.generalInquirerCategories, iw.value().toLowerCase());
            se = WordNet.setWordNetSynsetsAndHypernyms(se, iw.tag(), "NN");
        } else if (iw.tag().contains("VB")) {
            se.verbNetClasses = VerbNet.getVerbNetClasses(se.verbNetClasses, iw.value().toLowerCase());
            se = WordNet.setWordNetSynsetsAndHypernyms(se, iw.tag(), "VB");
        }
        List<String> tokenSRLs = document.startOffsetSRLRoles.get(i);
        i++;

        if (tokenSRLs == null)
            continue;
        for (String tokenSRL : tokenSRLs) {
            if (se.srls.contains(tokenSRL))
                continue;
            se.srls.add(tokenSRL);
        }
    }

    return se;
}

From source file:me.aatma.languagetologic.graph.pattern.KBNLPatternEventAdvclEvent.java

public boolean check() {
    //    String mark = null;
    GrammaticalRelation markGR = NLPTools.getGR("mark", null);
    if (this.dependencies.hasChildWithReln(this.toNl, markGR)) {
        this.advclMark = this.dependencies.getChildWithReln(this.toNl, markGR).value();
        log.info("Marker of advcl: " + this.advclMark);
    }//from   w  w  w .  j a v a 2s.c  o  m

    if (edgeNlRelation.equals(NLPConstants.xcomp)) {
        if (this.dependencies.hasChildWithReln(this.toNl, NLPConstants.aux)) {
            Set<IndexedWord> auxs = this.dependencies.getChildrenWithReln(this.toNl, NLPConstants.aux);
            for (IndexedWord aux : auxs) {
                if (aux.value().equalsIgnoreCase("toKbNl")) {
                    this.xcompTo = true;
                }
            }
        }
    }

    // This is inter event relationship
    return this.fromKbNl instanceof KBNLEventNodeCloud && this.toKbNl instanceof KBNLEventNodeCloud
            && (this.edgeNlRelation.toString().equals("advcl") || this.edgeNlRelation.toString().equals("vmod")
                    || // TODO: Check when it relates two events, if its always a purpose..
                    prepGRs.contains(this.edgeNlRelation) || this.xcompTo);
}

From source file:org.sam_agent.csparser.ContinuousParser.java

License:Open Source License

public String stringify(Collection<IndexedWord> roots) {
    List<String> rootTokens = new ArrayList<String>();
    for (IndexedWord root : roots) {
        rootTokens.add(String.format("\"%s-%d\"", esc(root.value()), root.index()));
    }/*from  w ww .  ja va2s .  c  o m*/
    return "[" + String.join(",", rootTokens) + "]";
}

From source file:semRewrite.datesandnumber.DateAndNumbersGeneration.java

License:Open Source License

/** ***************************************************************
 *//*from  w  w  w.  j ava2s  .c  o  m*/
private String lemmatizeWord(IndexedWord measuredEntity) {

    String value = measuredEntity.value();
    if (!measuredEntity.tag().equals("NNP") || !measuredEntity.tag().equals("NNPS")) {
        value = measuredEntity.lemma();
    }
    return value;
}