Example usage for edu.stanford.nlp.ling IndexedWord IndexedWord

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling IndexedWord IndexedWord.

Prototype

public IndexedWord(CoreLabel w)

Source Link

Document

Construct an IndexedWord from a CoreLabel just as for a CoreMap.

Usage

From source file:count_dep.Count_dep.java

public LinkedList<Event> GetEvents(SemanticGraph dependencies, CoreMap sentence) {
    LinkedList<Event> res = new LinkedList<>();
    LinkedList<IndexedWord> roots = new LinkedList<>();
    List<CoreLabel> words = sentence.get(TokensAnnotation.class);
    List<GrammaticalRelation> senserel = new LinkedList<>();
    senserel.add(GrammaticalRelation.valueOf("nsubj"));
    senserel.add(GrammaticalRelation.valueOf("dobj"));
    for (CoreLabel word : words) {
        if (word.tag().length() >= 2
                && ("VB".equals(word.tag().substring(0, 2)) || "NN".equals(word.tag().substring(0, 2)))) {
            IndexedWord iword = new IndexedWord(word);
            roots.add(iword);//from  w  w w .  ja v  a2s . c o  m
        }
    }
    for (IndexedWord word : roots) {
        Event e = new Event();
        e.trigger = word.word();
        try {
            Set<IndexedWord> children = dependencies.getChildren(word);
            children.stream().forEach((iw) -> {
                e.arguments.add(new EventArgument(iw.word(), ""));
            });
            if (dependencies.inDegree(word) > 0) {
                IndexedWord parent = dependencies.getParent(word);
                if (parent.tag().length() >= 2 && "VB".equals(parent.tag().substring(0, 2))) {
                    Set<IndexedWord> children1 = dependencies.getChildrenWithRelns(parent, senserel);
                    children1.remove(word);
                    children1.stream().forEach((iw) -> {
                        e.arguments.add(new EventArgument(iw.word(), ""));
                    });
                } else {
                    e.arguments.add(new EventArgument(dependencies.getParent(word).word(), ""));
                }
            }
        } catch (java.lang.IllegalArgumentException error) {
            continue;
        }
        res.add(e);
    }
    return res;
}

From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.DKPro2CoreNlp.java

License:Open Source License

public Annotation convert(JCas aSource, Annotation aTarget) {
    // Document annotation
    aTarget.set(CoreAnnotations.TextAnnotation.class, aSource.getDocumentText());

    // Sentences/*w  ww  .j  a  va2s. c  o  m*/
    List<CoreMap> sentences = new ArrayList<>();
    for (Sentence s : select(aSource, Sentence.class)) {
        if (StringUtils.isBlank(s.getCoveredText())) {
            continue;
        }

        String sentenceText = s.getCoveredText();
        if (encoding != null && !"UTF-8".equals(encoding.name())) {
            sentenceText = new String(sentenceText.getBytes(StandardCharsets.UTF_8), encoding);
        }

        Annotation sentence = new Annotation(sentenceText);
        sentence.set(CharacterOffsetBeginAnnotation.class, s.getBegin());
        sentence.set(CharacterOffsetEndAnnotation.class, s.getEnd());
        sentence.set(SentenceIndexAnnotation.class, sentences.size());

        // Tokens
        Map<Token, IndexedWord> idxTokens = new HashMap<>();
        List<CoreLabel> tokens = new ArrayList<>();
        for (Token t : selectCovered(Token.class, s)) {
            String tokenText = t.getCoveredText();
            if (encoding != null && !"UTF-8".equals(encoding.name())) {
                tokenText = new String(tokenText.getBytes(StandardCharsets.UTF_8), encoding);
            }

            CoreLabel token = tokenFactory.makeToken(tokenText, t.getBegin(), t.getEnd() - t.getBegin());
            // First add token so that tokens.size() returns a 1-based counting as required
            // by IndexAnnotation
            tokens.add(token);
            token.set(SentenceIndexAnnotation.class, sentences.size());
            token.set(IndexAnnotation.class, tokens.size());
            token.set(TokenKey.class, t);
            idxTokens.put(t, new IndexedWord(token));

            // POS tags
            if (readPos && t.getPos() != null) {
                token.set(PartOfSpeechAnnotation.class, t.getPos().getPosValue());
            }

            // Lemma
            if (t.getLemma() != null) {
                token.set(LemmaAnnotation.class, t.getLemma().getValue());
            }

            // Stem
            if (t.getStem() != null) {
                token.set(StemAnnotation.class, t.getStem().getValue());
            }

            // NamedEntity
            // TODO: only token-based NEs are supported, but not multi-token NEs
            // Supporting multi-token NEs via selectCovering would be very slow. To support
            // them, another approach would need to be implemented, e.g. via indexCovering.
            List<NamedEntity> nes = selectCovered(NamedEntity.class, t);
            if (nes.size() > 0) {
                token.set(NamedEntityTagAnnotation.class, nes.get(0).getValue());
            } else {
                token.set(NamedEntityTagAnnotation.class, "O");
            }
        }

        // Constituents
        for (ROOT r : selectCovered(ROOT.class, s)) {
            Tree tree = createStanfordTree(r, idxTokens);
            tree.indexSpans();
            sentence.set(TreeAnnotation.class, tree);
        }

        // Dependencies
        List<TypedDependency> dependencies = new ArrayList<>();
        for (Dependency d : selectCovered(Dependency.class, s)) {
            TypedDependency dep = new TypedDependency(GrammaticalRelation.valueOf(d.getDependencyType()),
                    idxTokens.get(d.getGovernor()), idxTokens.get(d.getDependent()));
            if (DependencyFlavor.ENHANCED.equals(d.getFlavor())) {
                dep.setExtra();
            }
            dependencies.add(dep);
        }
        sentence.set(EnhancedDependenciesAnnotation.class, new SemanticGraph(dependencies));

        if (ptb3Escaping) {
            tokens = applyPtbEscaping(tokens, quoteBegin, quoteEnd);
        }

        sentence.set(TokensAnnotation.class, tokens);
        sentences.add(sentence);
    }
    aTarget.set(SentencesAnnotation.class, sentences);

    return aTarget;
}

From source file:main.java.parsers.StanfordParser.java

/** 
 * Parses a given input text document using the Stanford CoreNLP parser.
 * //from w ww  . jav  a  2s .  co m
 * @param document
 * @throws java.io.UnsupportedEncodingException 
 * @throws java.lang.InterruptedException 
 */
public static void parse(Doc document) throws UnsupportedEncodingException, IOException, InterruptedException {

    // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor.
    Annotation annotation = new Annotation(new String(document.text.getBytes("UTF-8"), "UTF-8"));
    // run all the selected Annotators on this text
    pipeline.annotate(annotation);

    // An Annotation is a Map and you can get and use the various analyses individually.
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);

    //returns if the annotation is empty.
    if (sentences == null || sentences.isEmpty())
        return;

    //map linking token offsets with their tokens annotation from the Stanford tool.        
    for (CoreMap sentence : sentences) {
        String sentenceStr = "";
        int sentenceNum = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);

        Map<Integer, Integer> tokenNumStartOffset = document.sentenceTokenNumStartOffset.get(sentenceNum);
        if (tokenNumStartOffset == null)
            document.sentenceTokenNumStartOffset.put(sentenceNum, tokenNumStartOffset = new HashMap<>());

        Map<Integer, List<String>> startOffsetSRLRoles = new TreeMap<>();
        //extracting tokenized information from the stanford parser output.
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            sentenceStr += token.value() + " ";
            document.startOffsetIndexedWord.put(token.beginPosition(), new IndexedWord(token));
            tokenNumStartOffset.put(token.index(), token.beginPosition());
            startOffsetSRLRoles.put(token.beginPosition(), null);
        }

        //write the tokenized sentence to an output file
        FileOutputStream output = new FileOutputStream(Main.RESOURCES_DIR + "\\senna\\log.txt");
        output.write(sentenceStr.getBytes());
        //the semantic roles labels for the sentence are obtained by applying SENNA
        startOffsetSRLRoles = SENNASrl.getSRLRoles(startOffsetSRLRoles);
        //set the srl tags
        document.startOffsetSRLRoles.putAll(startOffsetSRLRoles);

        //parse tree of the sentence
        String stanfordParseTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString();
        ParseTree parseTree = new ParseTree(stanfordParseTree);
        parseTree.convertParseTree();
        document.setSentenceParseTree(sentenceNum, parseTree);

        //dependency graph of the sentence
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        document.setSentenceDependencyGraph(sentenceNum, graph);
    }
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

public void addGraph(SemanticGraph newGraph) {
    int oldGraphSize = graph.size();
    for (IndexedWord iw : newGraph.vertexListSorted()) {
        IndexedWord copy = new IndexedWord(iw);
        copy.setIndex(graph.size());/*from  www  . j  av  a2  s  .co m*/
        graph.addVertex(copy);
    }
    for (SemanticGraphEdge edge : newGraph.edgeListSorted()) {
        int dep = edge.getDependent().index() + oldGraphSize;
        int gov = edge.getGovernor().index() + oldGraphSize;
        GrammaticalRelation rel = edge.getRelation();
        addEdge(gov, dep, rel.getLongName());
    }
    cachedHashCode = 0;
}

From source file:opendial.bn.values.RelationalVal.java

License:Open Source License

public int addNode(String value) {
    CoreLabel label = new CoreLabel();
    label.setWord(value);//from   w w  w.  j  av a  2  s  . c  o  m
    label.setValue(value);
    IndexedWord fword = new IndexedWord(label);
    fword.setIndex(graph.size());
    graph.addVertex(fword);
    cachedHashCode = 0;
    return fword.index();
}

From source file:sleventextraction.SLEntity.java

public SLEntity(AceMention m, CoreMap senCM, SemanticGraph senSG) {
    this();//w  ww .  j a va2s  . c  o  m
    isArg = m.isArg;
    argProb = m.argProb;
    role = m.role;
    if (m.getParent() instanceof AceJet.AceEntity) {
        this.entitytype = ((AceEntity) m.getParent()).type;
        this.entitysubtype = ((AceEntity) m.getParent()).subtype;
    } else if (m.getParent() instanceof AceJet.AceTimex) {
        this.entitytype = "";
        this.entitysubtype = "";
    } else if (m.getParent() instanceof AceJet.AceValue) {
        this.entitytype = ((AceValue) m.getParent()).type;
        this.entitysubtype = ((AceValue) m.getParent()).subtype;
    } else {
        this.entitytype = "";
        this.entitysubtype = "";
    }
    this.mentiontype = m.getType();

    System.arraycopy(m.roleProb, 0, roleProb, 0, m.roleProb.length);
    ground = m.ground;
    span = senCM;
    SemanticGraph totaldep = span.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

    this.content = m.text.trim();
    if (m.text.charAt(0) == '\"') {
        this.content = m.text.substring(1).trim();
    }
    if ("s\nb".equals(this.content)) {
        this.content = "his brother";
    } else if (" f".equals(this.content)) {
        this.content = "foreign";
    } else if ("-l".equals(this.content)) {
        this.content = "US-led";
    } else if ("s a".equals(this.content)) {
        if (span.toString().contains("Arafat's administration")) {
            this.content = "Arafat's administration";
        } else if (span.toString().contains("bus attack")) {
            this.content = "bus attack";
        }
    } else if ("33-month".equals(this.content)) {
        this.content = "33-month-old";
    } else if ("U.S".equals(this.content)) {
        this.content = "U.S.";
    } else if ("four-day".equals(this.content)) {
        this.content = "four-day-old";
    } else if ("U.N".equals(this.content)) {
        this.content = "U.N.";
    } else if ("33-year".equals(this.content)) {
        this.content = "33-year-old";
    }
    Annotation document = ParseSentence(this.content);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    CoreMap cm = sentences.get(0);
    int pathlength = -1, imin = 1000;
    for (int i = 0; i < senCM.get(TokensAnnotation.class).size(); i++) {
        IndexedWord debug = new IndexedWord(senCM.get(TokensAnnotation.class).get(i));
        boolean canmatch = true;
        for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) {
            IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j));
            IndexedWord shortiw = new IndexedWord(cm.get(TokensAnnotation.class).get(j));
            if (!iw.word().equals(shortiw.word())) {
                if (SLEventExtraction.overlap(iw.word(), shortiw.word()) <= 0
                        || Double.isNaN(SLEventExtraction.overlap(iw.word(), shortiw.word()))) {
                    canmatch = false;
                    break;
                }
            }
        }
        if (canmatch) {
            for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) {
                IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j));
                this.ContentIws.add(iw);
                try {
                    pathlength = totaldep.getPathToRoot(iw).size();
                } catch (java.lang.IllegalArgumentException err) {
                    pathlength = 100;
                }
                if (imin > pathlength) {
                    imin = pathlength;
                    this.head = iw;
                }
            }
            break;
        }
    }
    if (this.head == null) {
        return;
    }
    this.predicate = totaldep.getParent(this.head);
    if (this.predicate == null) {
        this.predicate = this.head;
    } else {
        IndexedWord curr = head;
        dep = totaldep.getEdge(predicate, curr).getRelation().getShortName();
        if (totaldep.getEdge(predicate, curr).getRelation().getSpecific() != null) {
            dep += "_" + totaldep.getEdge(predicate, curr).getRelation().getSpecific();
        }
    }

}

From source file:sleventextraction.SLEventTypeClassifier.java

public static IndexedWord GetCorrespondingIndexedWord(String anchor, CoreMap cm) {
    anchor = anchor.replaceAll("[.|,|\n|\"]", " ");
    String[] split = anchor.split("[ |']");
    IndexedWord iw = null;//from  w w w .  j av a  2 s .c om
    for (int i = split.length - 1; i >= 0; i--) {
        for (CoreLabel token : cm.get(TokensAnnotation.class)) {
            iw = new IndexedWord(token);

            if (split[i].contains(iw.word()) || iw.word().contains(split[i])) {
                if (Math.abs(split[i].length() - iw.word().length()) <= 3) {
                    return iw;
                } else if (iw.word().contains("-")) {
                    String[] split1 = iw.word().split("-");
                    boolean match = false;
                    for (int j = 0; j < split1.length; j++) {
                        if (split1[j].equals(split[i])) {
                            match = true;
                        }
                    }
                    if (match) {
                        return iw;
                    }
                }
            }

            if (split[i].contains(iw.lemma())) {
                if (Math.abs(split[i].length() - iw.lemma().length()) <= 2) {
                    return iw;
                }
            }
        }
    }
    return null;
}

From source file:sleventextraction.SLEventTypeClassifier.java

private LinkedList<String> GetCandidateTriggers(Annotation parsedsen) {
    LinkedList<String> res = new LinkedList<>();
    List<CoreMap> sentences = parsedsen.get(CoreAnnotations.SentencesAnnotation.class);

    assert sentences.size() == 1;

    CoreMap cm = sentences.get(0);//w w  w . ja v a2 s  .co m
    for (CoreLabel token : cm.get(TokensAnnotation.class)) {
        IndexedWord iw = new IndexedWord(token);
        if (iw.tag().contains("NN") || iw.tag().contains("VB")) {
            res.add(iw.word());
        }
    }
    return res;
}

From source file:slvectormodel.SLVectorModel.java

private LinkedList<Double> LexicalChain(SLEntity e) {
    LinkedList<Double> res = new LinkedList<>();
    res = SLMath.Vector_0(SLEventExtraction.dim);
    CoreMap sentence = e.span;//from w  w w . j av  a2s  .  c om
    List<CoreLabel> cmwords = sentence.get(TokensAnnotation.class);
    List<IndexedWord> iwwords = new LinkedList<>();
    int pos = -1;
    for (int i = 0; i < cmwords.size(); i++) {
        IndexedWord iw = new IndexedWord(cmwords.get(i));
        iwwords.add(iw);
        if (iw == e.head) {
            pos = i;
        }
    }
    int count = 0;
    for (int i = pos - 1; i >= 0; i--) {
        if (iwwords.get(i).tag().contains("VB") || iwwords.get(i).tag().contains("JJ")) {
            count++;
            if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word())) {
                res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).word()));
            } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word().toLowerCase())) {
                res = SLMath.Vector_add(res,
                        SLEventExtraction.word2vec.get(iwwords.get(i).word().toLowerCase()));
            } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).lemma())) {
                res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).lemma()));
            }
        }
        if (count >= halfwindow) {
            break;
        }
    }
    count = 0;
    for (int i = pos + 1; i < iwwords.size(); i++) {
        if (iwwords.get(i).tag().contains("VB") || iwwords.get(i).tag().contains("JJ")) {
            count++;
            if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word())) {
                res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).word()));
            } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).word().toLowerCase())) {
                res = SLMath.Vector_add(res,
                        SLEventExtraction.word2vec.get(iwwords.get(i).word().toLowerCase()));
            } else if (SLEventExtraction.word2vec.containsKey(iwwords.get(i).lemma())) {
                res = SLMath.Vector_add(res, SLEventExtraction.word2vec.get(iwwords.get(i).lemma()));
            }
        }
        if (count >= halfwindow) {
            break;
        }
    }
    return res;
}