Example usage for edu.stanford.nlp.ling IndexedWord word

List of usage examples for edu.stanford.nlp.ling IndexedWord word

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling IndexedWord word.

Prototype

@Override
    public String word() 

Source Link

Usage

From source file:ca.ualberta.exemplar.core.RelationExtraction.java

License:Open Source License

public List<RelationInstance> extractNAryRelations(CoreMap sentence) {

    List<RelationInstance> ret = new ArrayList<RelationInstance>();

    SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
    if (dependencies == null) {
        return ret;
    }/*from   ww w. java  2  s. com*/

    /*System.out.println();
    System.out.println(dependencies.toFormattedString());
            
    for(IndexedWord word : dependencies.vertexSet()){
       System.out.println("Word: " + word + " -- " + word.ner() + "--" + word.beginPosition() + "--" + word.index());
    }
            
    System.out.println();
    List<CoreLabel> tokens = sentence.get(TokensAnnotation.class);
    for(CoreLabel token : tokens){
       String word = token.get(TextAnnotation.class);
       String ne = token.get(NamedEntityTagAnnotation.class);
       System.out.println(word + " -- " + ne + " -- " + token.beginPosition());
    }*/

    // Template A
    {
        SemgrexMatcher matcher = patternA.matcher(dependencies);
        while (matcher.find()) {
            IndexedWord rel = matcher.getNode("rel");
            IndexedWord relnom = matcher.getNode("relnom");
            boolean nom = relnom != null;
            if (nom)
                rel = relnom;
            if (!hasNerChild(rel, dependencies))
                continue;

            RelationInstance instance = new RelationInstance();
            List<IndexedWord> triggers = new ArrayList<IndexedWord>(2);

            if (nom) {
                instance.setNormalizedRelation(nominalizedVerbMap.get(relnom.word()));
                instance.setOriginalRelation(relnom.word());
                triggers.add(relnom);
            } else {
                instance.setNormalizedRelation(produceRelationName(rel, null, dependencies, true));
                instance.setOriginalRelation(produceRelationName(rel, null, dependencies, false));
                triggers.add(rel);
            }

            instance.setTriggerIndex(triggers.get(0).index() - 1);
            //System.out.println("Template A: " + relText);
            argExtractor.extractArgumentsTemplateA(sentence, dependencies, triggers, instance);
            if (shouldAddInstance(instance)) {
                ret.add(instance);
            }

        }
    }

    // Template B
    {
        SemgrexMatcher matcher = patternB.matcher(dependencies);
        while (matcher.find()) {
            IndexedWord rel = matcher.getNode("rel");
            if (!hasNerChild(rel, dependencies))
                continue;
            List<IndexedWord> triggers = new ArrayList<IndexedWord>();
            triggers.add(rel);
            IndexedWord dobj = matcher.getNode("dobj");
            if (dobj != null) {
                triggers.add(dobj);
            }

            RelationInstance instance = new RelationInstance();
            instance.setTriggerIndex(triggers.get(0).index() - 1);
            instance.setNormalizedRelation(produceRelationName(rel, dobj, dependencies, true));
            instance.setOriginalRelation(produceRelationName(rel, dobj, dependencies, false));

            //System.out.println("Template B: " + instance.getOriginalRelation());
            argExtractor.extractArgumentsTemplateB(sentence, dependencies, triggers, instance);
            if (shouldAddInstance(instance)) {
                ret.add(instance);
            }
        }
    }

    // Template C
    {
        SemgrexMatcher matcher = patternC.matcher(dependencies);
        while (matcher.find()) {
            IndexedWord copula = matcher.getNode("copula");
            IndexedWord rel = matcher.getNode("rel");
            List<IndexedWord> triggers = new ArrayList<IndexedWord>();
            triggers.add(rel);

            RelationInstance instance = new RelationInstance();

            if (copula != null) {
                triggers.add(copula);
                instance.setNormalizedRelation(produceRelationName(copula, rel, dependencies, true));
                instance.setOriginalRelation(produceRelationName(copula, rel, dependencies, false));
            } else {
                instance.setNormalizedRelation(produceRelationName("be", rel, dependencies, true));
                instance.setOriginalRelation(produceRelationName("", rel, dependencies, false));
            }
            instance.setTriggerIndex(triggers.get(0).index() - 1);
            //System.out.println("Template C: " + instance.getOriginalRelation());
            argExtractor.extractArgumentsTemplateC(sentence, dependencies, triggers, instance);
            if (shouldAddInstance(instance)) {
                ret.add(instance);
            }
        }
    }
    //System.out.println(instanceList);
    return ret;

}

From source file:ca.ualberta.exemplar.core.RelationExtraction.java

License:Open Source License

private static String produceRelationName(IndexedWord verb, IndexedWord noun, SemanticGraph dependencies,
        boolean shouldNormalize) {
    StringBuilder rel = new StringBuilder();
    List<IndexedWord> verbPhrase = new ArrayList<IndexedWord>();
    List<IndexedWord> nounPhrase = new ArrayList<IndexedWord>();

    if (verb != null)
        verbPhrase.add(verb);//from  w  w w  . j a va  2  s  .c  o  m
    if (noun != null)
        nounPhrase.add(noun);

    if (!shouldNormalize) {
        if (noun != null)
            addModifiers(nounPhrase, noun, dependencies);
        if (verb != null)
            addModifiers(verbPhrase, verb, dependencies);
    }

    sortWordsByIndex(verbPhrase);
    sortWordsByIndex(nounPhrase);

    for (IndexedWord word : verbPhrase) {
        if (shouldNormalize)
            rel.append(word.lemma());
        else
            rel.append(word.word());

        rel.append(' ');
    }

    for (IndexedWord word : nounPhrase) {
        if (shouldNormalize)
            rel.append(word.lemma());
        else
            rel.append(word.word());

        rel.append(' ');
    }

    return rel.toString().trim();
}

From source file:ca.ualberta.exemplar.core.RelationExtraction.java

License:Open Source License

private static String produceRelationName(String verb, IndexedWord noun, SemanticGraph dependencies,
        boolean shouldNormalize) {
    StringBuilder rel = new StringBuilder();
    List<IndexedWord> nounPhrase = new ArrayList<IndexedWord>();
    rel.append(verb + ' ');

    if (noun != null)
        nounPhrase.add(noun);//from www.  ja va2 s  .  c o  m

    if (!shouldNormalize)
        if (noun != null)
            addModifiers(nounPhrase, noun, dependencies);

    sortWordsByIndex(nounPhrase);

    for (IndexedWord word : nounPhrase) {
        if (shouldNormalize)
            rel.append(word.lemma());
        else
            rel.append(word.word());

        rel.append(' ');
    }

    return rel.toString().trim();
}

From source file:org.sam_agent.csparser.ContinuousParser.java

License:Open Source License

public String stringify(SemanticGraph dependencies) {
    List<String> depsList = new ArrayList<String>();

    for (SemanticGraphEdge eit : dependencies.edgeIterable()) {
        String rel = eit.getRelation().toString();
        IndexedWord gov = eit.getGovernor(), dep = eit.getDependent();
        String arg0 = gov.word().toString() + "-" + gov.index();
        String arg1 = dep.word().toString() + "-" + dep.index();
        depsList.add(String.format("{\"rel\":\"%s\",\"arg0\":\"%s\",\"arg1\":\"%s\"}", rel, arg0, arg1));
    }/*from   w w w .  j a  va 2 s  .com*/

    return String.format("\"dependencies\":[%s]", String.join(",", depsList));
}

From source file:semRewrite.datesandnumber.DateAndNumbersGeneration.java

License:Open Source License

/** ***************************************************************
 * * Generates a set of sumo terms corresponding to measure functions. Identifies the unit of measurement,
 *  value of measurement and entity of the measurement by performing a graph search. 
 * @param input: The tokens, count of measure, utility value.
 *///from  w  w  w . ja v  a 2  s  .co  m
private void measureFn(Tokens token, int count, Utilities utilities) {

    IndexedWord tokenNode = utilities.StanfordDependencies.getNodeByIndex(token.getId());
    IndexedWord unitOfMeasurementNode = utilities.StanfordDependencies.getParent(tokenNode);
    IndexedWord measuredEntity = null;
    String posTagRemover = null;
    String unitOfMeasurementStr = "";
    String sumoUnitOfMeasure = "";
    List<String> visitedNodes = new ArrayList<String>();
    Matcher posTagRemoverMatcher = null;
    String measuredEntityStr = null;
    boolean flag = false;
    //int x = 0;
    // fix to remove comma in numbers
    if (token.getWord().contains(",")) {
        token.setWord(token.getWord().replaceAll(",", ""));
    }
    if (unitOfMeasurementNode != null) {
        //unitOfMeasurementStr = lemmatizeWord(unitOfMeasurementNode);
        unitOfMeasurementStr = unitOfMeasurementNode.word();
        measuredEntity = utilities.StanfordDependencies.getParent(unitOfMeasurementNode);
        visitedNodes.add(unitOfMeasurementNode.toString() + "-" + unitOfMeasurementNode.index());
    }
    if ((measuredEntity == null) && (unitOfMeasurementNode != null)) {
        for (SemanticGraphEdge e : utilities.StanfordDependencies.getOutEdgesSorted(unitOfMeasurementNode)) {
            if ((e.getRelation().toString().equals("nsubj")) || (e.getRelation().toString().equals("dobj"))) {
                measuredEntity = e.getDependent();
                flag = true;
                break;
            }
        }
    } else if ((measuredEntity == null) && (unitOfMeasurementNode == null)) {
        return;
    }
    while ((measuredEntity != null) && (!flag)) {
        measuredEntityStr = measuredEntity.value() + "-" + measuredEntity.index();
        if (!visitedNodes.contains(measuredEntityStr)) {
            visitedNodes.add(measuredEntityStr);
        }
        posTagRemoverMatcher = POS_TAG_REMOVER.matcher(measuredEntity.toString());
        if (posTagRemoverMatcher.find()) {
            posTagRemover = posTagRemoverMatcher.group(1);
            if (Utilities.nounTags.contains(posTagRemover)) {
                break;
            }
            //IndexedWord tempMeasuredEntity = StanfordDependencies.getParent(measuredEntity);
            if (utilities.StanfordDependencies.getParent(measuredEntity) == null) {
                Set<IndexedWord> childrenSet = utilities.StanfordDependencies.getChildren(measuredEntity);
                //which means it is unitOfMeasurementNode. Hence remove infinite looping condition
                if ((childrenSet.size() == 1)) {
                    measuredEntity = unitOfMeasurementNode;
                    //String lemmatizedWord = lemmatizeWord(measuredEntity);
                    utilities.sumoTerms.add("measure(" + measuredEntity.word() + "-" + measuredEntity.index()
                            + ", measure" + count + ")");
                    utilities.sumoTerms.add("unit(measure" + count + ", " + "memberCount" + ")");
                    utilities.sumoTerms.add("value(measure" + count + ", " + token.getWord() + ")");
                    utilities.sumoTerms.add("valueToken(" + token.getWord() + "," + token.getWord() + "-"
                            + token.getId() + ")");
                    flag = true;
                    return;
                }
                IndexedWord measuredEntity_temp = null;
                for (IndexedWord child : childrenSet) {
                    String childPosTagRemover = null;
                    posTagRemoverMatcher = POS_TAG_REMOVER.matcher(child.toString());
                    //childPosTagRemover = posTagRemoverMatcher.group(1);
                    if (posTagRemoverMatcher.find()) {
                        childPosTagRemover = posTagRemoverMatcher.group(1);
                    }
                    if (!(visitedNodes.contains(child.toString() + "-" + child.index()))
                            && (Utilities.nounTags.contains(childPosTagRemover.replaceFirst("\\/", "")))) {
                        if ((utilities.StanfordDependencies.reln(measuredEntity, child) != null)
                                && (utilities.StanfordDependencies.reln(measuredEntity, child).getShortName()
                                        .equals("nsubj"))) {
                            measuredEntity = child;
                            visitedNodes.add(child.toString() + "-" + child.index());
                            flag = true;
                            break;
                        }
                        measuredEntity_temp = child;
                        visitedNodes.add(child.toString() + "-" + child.index());
                    }
                }
                if (!flag) {
                    measuredEntity = measuredEntity_temp;
                    flag = true;
                }

            } else {
                measuredEntity = utilities.StanfordDependencies.getParent(measuredEntity);
            }
        }
    }
    if (measuredEntity != null) {
        String lemmatizedWord = lemmatizeWord(measuredEntity);
        utilities.sumoTerms
                .add("measure(" + lemmatizedWord + "-" + measuredEntity.index() + ", measure" + count + ")");
    }
    sumoUnitOfMeasure = lemmatizeWord(unitOfMeasurementNode);
    sumoUnitOfMeasure = WSD.getBestDefaultSUMOsense(sumoUnitOfMeasure, 1);
    if ((sumoUnitOfMeasure != null) && (!sumoUnitOfMeasure.isEmpty())) {
        sumoUnitOfMeasure = sumoUnitOfMeasure.replaceAll("[^\\p{Alpha}\\p{Digit}]+", "");
    } else {
        if ((measuredEntity != null) && (unitOfMeasurementStr.equals(measuredEntity.value()))) {
            unitOfMeasurementStr = "memberCount";
        }
        sumoUnitOfMeasure = unitOfMeasurementStr;
    }
    utilities.sumoTerms.add("unit(measure" + count + ", " + sumoUnitOfMeasure + ")");
    utilities.sumoTerms.add("value(measure" + count + ", " + token.getWord() + ")");
    utilities.sumoTerms
            .add("valueToken(" + token.getWord() + "," + token.getWord() + "-" + token.getId() + ")");
    WordNet.wn.initOnce();
}

From source file:semRewrite.datesandnumber.Utilities.java

License:Open Source License

/** ***************************************************************
  *//*from   w ww  .  j  a v a 2s.  c  o  m*/
public String populateRootWord(int wordIndex) {

    IndexedWord tempParent = StanfordDependencies.getNodeByIndex(wordIndex);
    while (!tempParent.equals(StanfordDependencies.getFirstRoot())) {
        tempParent = StanfordDependencies.getParent(tempParent);
        if (containsIndexWord(tempParent.tag())) {
            return tempParent.word() + "-" + tempParent.index();
        }
    }
    return null;
}

From source file:sleventextraction.SLEntity.java

public SLEntity(AceMention m, CoreMap senCM, SemanticGraph senSG) {
    this();/*from   w  w w.  j a  v  a 2s  .  c om*/
    isArg = m.isArg;
    argProb = m.argProb;
    role = m.role;
    if (m.getParent() instanceof AceJet.AceEntity) {
        this.entitytype = ((AceEntity) m.getParent()).type;
        this.entitysubtype = ((AceEntity) m.getParent()).subtype;
    } else if (m.getParent() instanceof AceJet.AceTimex) {
        this.entitytype = "";
        this.entitysubtype = "";
    } else if (m.getParent() instanceof AceJet.AceValue) {
        this.entitytype = ((AceValue) m.getParent()).type;
        this.entitysubtype = ((AceValue) m.getParent()).subtype;
    } else {
        this.entitytype = "";
        this.entitysubtype = "";
    }
    this.mentiontype = m.getType();

    System.arraycopy(m.roleProb, 0, roleProb, 0, m.roleProb.length);
    ground = m.ground;
    span = senCM;
    SemanticGraph totaldep = span.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);

    this.content = m.text.trim();
    if (m.text.charAt(0) == '\"') {
        this.content = m.text.substring(1).trim();
    }
    if ("s\nb".equals(this.content)) {
        this.content = "his brother";
    } else if (" f".equals(this.content)) {
        this.content = "foreign";
    } else if ("-l".equals(this.content)) {
        this.content = "US-led";
    } else if ("s a".equals(this.content)) {
        if (span.toString().contains("Arafat's administration")) {
            this.content = "Arafat's administration";
        } else if (span.toString().contains("bus attack")) {
            this.content = "bus attack";
        }
    } else if ("33-month".equals(this.content)) {
        this.content = "33-month-old";
    } else if ("U.S".equals(this.content)) {
        this.content = "U.S.";
    } else if ("four-day".equals(this.content)) {
        this.content = "four-day-old";
    } else if ("U.N".equals(this.content)) {
        this.content = "U.N.";
    } else if ("33-year".equals(this.content)) {
        this.content = "33-year-old";
    }
    Annotation document = ParseSentence(this.content);
    List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);

    CoreMap cm = sentences.get(0);
    int pathlength = -1, imin = 1000;
    for (int i = 0; i < senCM.get(TokensAnnotation.class).size(); i++) {
        IndexedWord debug = new IndexedWord(senCM.get(TokensAnnotation.class).get(i));
        boolean canmatch = true;
        for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) {
            IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j));
            IndexedWord shortiw = new IndexedWord(cm.get(TokensAnnotation.class).get(j));
            if (!iw.word().equals(shortiw.word())) {
                if (SLEventExtraction.overlap(iw.word(), shortiw.word()) <= 0
                        || Double.isNaN(SLEventExtraction.overlap(iw.word(), shortiw.word()))) {
                    canmatch = false;
                    break;
                }
            }
        }
        if (canmatch) {
            for (int j = 0; j < cm.get(TokensAnnotation.class).size(); j++) {
                IndexedWord iw = new IndexedWord(senCM.get(TokensAnnotation.class).get(i + j));
                this.ContentIws.add(iw);
                try {
                    pathlength = totaldep.getPathToRoot(iw).size();
                } catch (java.lang.IllegalArgumentException err) {
                    pathlength = 100;
                }
                if (imin > pathlength) {
                    imin = pathlength;
                    this.head = iw;
                }
            }
            break;
        }
    }
    if (this.head == null) {
        return;
    }
    this.predicate = totaldep.getParent(this.head);
    if (this.predicate == null) {
        this.predicate = this.head;
    } else {
        IndexedWord curr = head;
        dep = totaldep.getEdge(predicate, curr).getRelation().getShortName();
        if (totaldep.getEdge(predicate, curr).getRelation().getSpecific() != null) {
            dep += "_" + totaldep.getEdge(predicate, curr).getRelation().getSpecific();
        }
    }

}

From source file:sleventextraction.SLEventTypeClassifier.java

public static IndexedWord GetCorrespondingIndexedWord(String anchor, CoreMap cm) {
    anchor = anchor.replaceAll("[.|,|\n|\"]", " ");
    String[] split = anchor.split("[ |']");
    IndexedWord iw = null;
    for (int i = split.length - 1; i >= 0; i--) {
        for (CoreLabel token : cm.get(TokensAnnotation.class)) {
            iw = new IndexedWord(token);

            if (split[i].contains(iw.word()) || iw.word().contains(split[i])) {
                if (Math.abs(split[i].length() - iw.word().length()) <= 3) {
                    return iw;
                } else if (iw.word().contains("-")) {
                    String[] split1 = iw.word().split("-");
                    boolean match = false;
                    for (int j = 0; j < split1.length; j++) {
                        if (split1[j].equals(split[i])) {
                            match = true;
                        }//from  w  w  w.j  a v a2 s.  c  o m
                    }
                    if (match) {
                        return iw;
                    }
                }
            }

            if (split[i].contains(iw.lemma())) {
                if (Math.abs(split[i].length() - iw.lemma().length()) <= 2) {
                    return iw;
                }
            }
        }
    }
    return null;
}

From source file:sleventextraction.SLEventTypeClassifier.java

public static LinkedList<Double> GetFeatures(String anchor, Annotation document) {
    LinkedList<Double> res = new LinkedList<>();
    LinkedList<SLEntity> entities = new LinkedList<>();
    IndexedWord trigger = ParseSentenceAndGetEntities(anchor, entities, document, pipeline);
    if (trigger == null) {
        if (word2vec.containsKey(anchor.toLowerCase())) {
            res.addAll(word2vec.get(anchor.toLowerCase()));
        } else {/*www . jav a 2  s .  c o m*/
            return null;
        }
    } else if (word2vec.containsKey(trigger.word().toLowerCase())) {
        res.addAll(word2vec.get(trigger.word().toLowerCase()));
    } else if (word2vec.containsKey(trigger.lemma())) {
        res.addAll(word2vec.get(trigger.lemma()));
    } else {
        return null;
    }

    Lookup_All_wordvec(entities);
    LinkedList<Double> EntityHeadVector = SLMath.Vector_0(SLEventExtraction.dim);
    int valid_entity_num = 0;
    for (SLEntity ent : entities) {
        if (!ent.headwordvec.isEmpty()) {
            valid_entity_num++;
            EntityHeadVector = SLMath.Vector_add(EntityHeadVector, ent.headwordvec);
        }
    }
    if (valid_entity_num == 0) {
        return null;
    }
    EntityHeadVector = SLMath.Vector_divide_num(EntityHeadVector, valid_entity_num);
    res.addAll(EntityHeadVector);

    return res;
}

From source file:sleventextraction.SLEventTypeClassifier.java

private LinkedList<String> GetCandidateTriggers(Annotation parsedsen) {
    LinkedList<String> res = new LinkedList<>();
    List<CoreMap> sentences = parsedsen.get(CoreAnnotations.SentencesAnnotation.class);

    assert sentences.size() == 1;

    CoreMap cm = sentences.get(0);// w  ww.  j  a  va2  s.  c  o  m
    for (CoreLabel token : cm.get(TokensAnnotation.class)) {
        IndexedWord iw = new IndexedWord(token);
        if (iw.tag().contains("NN") || iw.tag().contains("VB")) {
            res.add(iw.word());
        }
    }
    return res;
}