Example usage for edu.stanford.nlp.ling CoreLabel value

List of usage examples for edu.stanford.nlp.ling CoreLabel value

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel value.

Prototype

@Override
public final String value() 

Source Link

Usage

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

private CoreLabel getCoreLabel(int labelIndex) {
    if (originalCoreLabels[labelIndex] != null) {
        CoreLabel terminalLabel = originalCoreLabels[labelIndex];
        if (terminalLabel.value() == null && terminalLabel.word() != null) {
            terminalLabel.setValue(terminalLabel.word());
        }/*from w w w.  j a  va  2  s.  com*/
        return terminalLabel;
    }

    String wordStr = wordIndex.get(words[labelIndex]);
    CoreLabel terminalLabel = new CoreLabel();
    terminalLabel.setValue(wordStr);
    terminalLabel.setWord(wordStr);
    terminalLabel.setBeginPosition(beginOffsets[labelIndex]);
    terminalLabel.setEndPosition(endOffsets[labelIndex]);
    if (originalTags[labelIndex] != null) {
        terminalLabel.setTag(originalTags[labelIndex].tag());
    }
    return terminalLabel;
}

From source file:de.uni_stuttgart.ims.comparatives.nlp.SentenceSplitterStanford.java

License:Creative Commons License

/**
 * Split the string into sentences with Stanford.
 * @return List of spans with the start/end positions of each sentence. 
 *//*w w  w  . j av  a 2  s.  co  m*/
public TextSpan[] split(String document) {
    StringReader reader = new StringReader(document);
    DocumentPreprocessor dp = new DocumentPreprocessor(reader);
    dp.setTokenizerFactory(ptbTokenizerFactory);

    ArrayList<TextSpan> sentenceSpansList = new ArrayList<TextSpan>();
    for (List<HasWord> sent : dp) {
        CoreLabel firstword = (CoreLabel) sent.get(0);
        CoreLabel lastword = (CoreLabel) sent.get(sent.size() - 1);
        String coveredText = "";
        for (int i = 0; i < sent.size(); i++) {
            CoreLabel word = (CoreLabel) sent.get(i);
            coveredText += word.value() + " ";
        }
        sentenceSpansList.add(new TextSpan(firstword.beginPosition(), lastword.endPosition(), coveredText));
    }

    return sentenceSpansList.toArray(new TextSpan[0]);

}

From source file:edu.ucla.cs.scai.qa.questionclassifier.SyntacticTreeNode.java

public SyntacticTreeNode(Tree t, ArrayList<CoreLabel> tokens, SyntacticTreeNode parent) throws Exception {
    this.parent = parent;
    value = t.value();//from  ww w.  ja v  a  2s .  co m
    if (t.isLeaf()) {
        CoreLabel c = tokens.remove(0);
        begin = c.beginPosition();
        end = c.endPosition();
        if (c == null) {
            throw new Exception("Mapping between TreeNode and CoreLabel not found");
        } else {
            lemma = c.lemma();
            ner = c.ner();
            //System.out.println(value + " -> " + c.value());
            if (!value.equals(c.value())) {
                throw new Exception("Different words have been matched!");
            }
        }
    } else {
        boolean hasNPchildren = false;
        boolean hasWHNPchildren = false;
        boolean hasQPchildren = false;
        begin = Integer.MAX_VALUE;
        end = Integer.MIN_VALUE;
        for (Tree c : t.children()) {
            SyntacticTreeNode child = new SyntacticTreeNode(c, tokens, this);
            children.add(child);
            if (child.value.equals("NP")) {
                hasNPchildren = true;
            } else if (child.value.equals("QP")) {
                hasQPchildren = true;
            } else if (child.value.equals("WHNP")) {
                hasWHNPchildren = true;
            }
            begin = Math.min(begin, child.begin);
            end = Math.max(end, child.end);
        }
        if (value.equals("NP")) {
            if (hasNPchildren) {
                npCompound = true;
            } else if (hasQPchildren) {
                npQp = true;
            } else {
                npSimple = true;
            }
        } else if (value.equals("WHNP")) { //can a WHNP node have QP children?
            if (hasNPchildren || hasWHNPchildren) {
                whnpCompound = true;
            } else if (!hasQPchildren) {
                whnpSimple = true;
            }
        }
    }
}

From source file:main.java.parsers.StanfordParser.java

/** 
 * Parses a given input text document using the Stanford CoreNLP parser.
 * /*from   ww w. ja v a2 s.  c om*/
 * @param document
 * @throws java.io.UnsupportedEncodingException 
 * @throws java.lang.InterruptedException 
 */
public static void parse(Doc document) throws UnsupportedEncodingException, IOException, InterruptedException {

    // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor.
    Annotation annotation = new Annotation(new String(document.text.getBytes("UTF-8"), "UTF-8"));
    // run all the selected Annotators on this text
    pipeline.annotate(annotation);

    // An Annotation is a Map and you can get and use the various analyses individually.
    List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class);

    //returns if the annotation is empty.
    if (sentences == null || sentences.isEmpty())
        return;

    //map linking token offsets with their tokens annotation from the Stanford tool.        
    for (CoreMap sentence : sentences) {
        String sentenceStr = "";
        int sentenceNum = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class);

        Map<Integer, Integer> tokenNumStartOffset = document.sentenceTokenNumStartOffset.get(sentenceNum);
        if (tokenNumStartOffset == null)
            document.sentenceTokenNumStartOffset.put(sentenceNum, tokenNumStartOffset = new HashMap<>());

        Map<Integer, List<String>> startOffsetSRLRoles = new TreeMap<>();
        //extracting tokenized information from the stanford parser output.
        for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
            sentenceStr += token.value() + " ";
            document.startOffsetIndexedWord.put(token.beginPosition(), new IndexedWord(token));
            tokenNumStartOffset.put(token.index(), token.beginPosition());
            startOffsetSRLRoles.put(token.beginPosition(), null);
        }

        //write the tokenized sentence to an output file
        FileOutputStream output = new FileOutputStream(Main.RESOURCES_DIR + "\\senna\\log.txt");
        output.write(sentenceStr.getBytes());
        //the semantic roles labels for the sentence are obtained by applying SENNA
        startOffsetSRLRoles = SENNASrl.getSRLRoles(startOffsetSRLRoles);
        //set the srl tags
        document.startOffsetSRLRoles.putAll(startOffsetSRLRoles);

        //parse tree of the sentence
        String stanfordParseTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString();
        ParseTree parseTree = new ParseTree(stanfordParseTree);
        parseTree.convertParseTree();
        document.setSentenceParseTree(sentenceNum, parseTree);

        //dependency graph of the sentence
        SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        document.setSentenceDependencyGraph(sentenceNum, graph);
    }
}

From source file:nlp.prototype.POSToken.java

public POSToken(CoreLabel token) {
    this.nextToken = null;
    this.token = token;
    this.pos = PartOfSpeech.toPartOfSpeech(token.value());
    this.childTokens = new ArrayList<POSToken>() {
    };/*ww w.  j  a v a  2 s. co m*/
}

From source file:opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor.java

License:Apache License

public static <T> String wordToString(T o, final boolean justValue, final String separator) {
    if (justValue && o instanceof Label) {
        if (o instanceof CoreLabel) {
            CoreLabel l = (CoreLabel) o;
            String w = l.value();
            if (w == null)
                w = l.word();/* ww w. java  2s. c o m*/
            return w;
        } else {
            return (((Label) o).value());
        }
    } else if (o instanceof CoreLabel) {
        CoreLabel l = ((CoreLabel) o);
        String w = l.value();
        if (w == null)
            w = l.word();
        if (l.tag() != null) {
            if (separator == null) {
                return w + CoreLabel.TAG_SEPARATOR + l.tag();
            } else {
                return w + separator + l.tag();
            }
        }
        return w;
        // an interface that covered these next four cases would be
        // nice, but we're moving away from these data types anyway
    } else if (separator != null && o instanceof TaggedWord) {
        return ((TaggedWord) o).toString(separator);
    } else if (separator != null && o instanceof LabeledWord) {
        return ((LabeledWord) o).toString();
    } else if (separator != null && o instanceof WordLemmaTag) {
        return ((WordLemmaTag) o).toString(separator);
    } else if (separator != null && o instanceof WordTag) {
        return ((WordTag) o).toString(separator);
    } else {
        return (o.toString());
    }
}

From source file:semRewrite.substitutor.CoreLabelSequence.java

License:Open Source License

/** ***************************************************************
 * Generate a String where the CoreLabel values are separated by
 * spaces and do not have a token number suffix
 *//* w w w . j  a  v a2  s  . c o  m*/
public String toString() {

    StringBuffer sb = new StringBuffer();
    for (CoreLabel cl : labels) {
        if (!StringUtil.emptyString(sb.toString()))
            sb.append(" ");
        sb.append(cl.value());
    }
    return labels.toString();
}

From source file:semRewrite.substitutor.CoreLabelSequence.java

License:Open Source License

/** *************************************************************
 * Checks if label is part of current sequence
 * @param text the label to be checked for in the sequence
 * @param sentIndex can be CoreLabelSequence.IGNORE_SENTENCE
 *///from   w  w w.j ava2s.  c  o  m
public boolean containsLabel(int sentIndex, String text, int index) {

    //System.out.println("CoreLabelSequence.containsLabel(): sentIndex: " + sentIndex);
    //System.out.println("CoreLabelSequence.containsLabel(): text: " + text);
    //System.out.println("CoreLabelSequence.containsLabel(): index: " + index);
    //System.out.println("CoreLabelSequence.containsLabel(): labels: " + labels);

    for (CoreLabel label : labels) {
        //System.out.println("CoreLabelSequence.containsLabel(): value: " + label.value());
        //System.out.println("CoreLabelSequence.containsLabel():index: " + label.index());
        if ((sentIndex == label.sentIndex() || sentIndex == IGNORE_SENTENCE) && text.equals(label.value())
        // && index == label.index() FIXME: total hack!
        ) {
            //System.out.println("CoreLabelSequence.containsLabel(): success ");
            return true;
        }
    }
    //return labels.stream().anyMatch(label ->
    //                (sentIndex == label.sentIndex() || sentIndex == IGNORE_SENTENCE)
    //                        && text.equals(label.originalText())
    //                        && index == label.index()
    //);
    //System.out.println("CoreLabelSequence.containsLabel(): failure - label not in sequence ");
    return false;
}

From source file:semRewrite.substitutor.CoreLabelSequence.java

License:Open Source License

/** *************************************************************
 * Change the value() of each CoreLabel to be all caps
 *///from  ww  w.java2 s.  c o m
public semRewrite.substitutor.CoreLabelSequence toUpperCase() {

    //System.out.println("CoreLabelSequence.toUpperCase(): labels: " + labels);
    List<CoreLabel> lcl = new ArrayList<>();
    for (CoreLabel cl : labels) {
        CoreLabel newcl = new CoreLabel();
        newcl.setValue(cl.value().toUpperCase());
        newcl.setIndex(cl.index());
        lcl.add(newcl);
    }
    semRewrite.substitutor.CoreLabelSequence cls = new semRewrite.substitutor.CoreLabelSequence(lcl);
    //System.out.println("CoreLabelSequence.toUpperCase(): cls: " + cls);
    return cls;
}

From source file:semRewrite.substitutor.CoreLabelSequence.java

License:Open Source License

/** *************************************************************
 *//*from   ww  w.  java 2  s.c  om*/
public semRewrite.substitutor.CoreLabelSequence removePunctuation() {

    //System.out.println("CoreLabelSequence.toUpperCase(): removePunctuation: " + labels);
    semRewrite.substitutor.CoreLabelSequence cls = new semRewrite.substitutor.CoreLabelSequence(labels);
    for (CoreLabel cl : labels) {
        String puncRE = "[\\.\\,\\;\\:\\[\\]\\{\\}\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\-\\=\\_\\+\\`\\~\\<\\>\\/\\?]";
        if (cl.value().matches(puncRE))
            cl.setValue(cl.value().replace(puncRE, ""));
    }
    //System.out.println("CoreLabelSequence.toUpperCase(): cls: " + cls);
    return cls;
}