List of usage examples for edu.stanford.nlp.ling CoreLabel value
@Override public final String value()
From source file:conditionalCFG.ConditionalCFGParser.java
License:Open Source License
private CoreLabel getCoreLabel(int labelIndex) { if (originalCoreLabels[labelIndex] != null) { CoreLabel terminalLabel = originalCoreLabels[labelIndex]; if (terminalLabel.value() == null && terminalLabel.word() != null) { terminalLabel.setValue(terminalLabel.word()); }/*from w w w. j a va 2 s. com*/ return terminalLabel; } String wordStr = wordIndex.get(words[labelIndex]); CoreLabel terminalLabel = new CoreLabel(); terminalLabel.setValue(wordStr); terminalLabel.setWord(wordStr); terminalLabel.setBeginPosition(beginOffsets[labelIndex]); terminalLabel.setEndPosition(endOffsets[labelIndex]); if (originalTags[labelIndex] != null) { terminalLabel.setTag(originalTags[labelIndex].tag()); } return terminalLabel; }
From source file:de.uni_stuttgart.ims.comparatives.nlp.SentenceSplitterStanford.java
License:Creative Commons License
/** * Split the string into sentences with Stanford. * @return List of spans with the start/end positions of each sentence. *//*w w w . j av a 2 s. co m*/ public TextSpan[] split(String document) { StringReader reader = new StringReader(document); DocumentPreprocessor dp = new DocumentPreprocessor(reader); dp.setTokenizerFactory(ptbTokenizerFactory); ArrayList<TextSpan> sentenceSpansList = new ArrayList<TextSpan>(); for (List<HasWord> sent : dp) { CoreLabel firstword = (CoreLabel) sent.get(0); CoreLabel lastword = (CoreLabel) sent.get(sent.size() - 1); String coveredText = ""; for (int i = 0; i < sent.size(); i++) { CoreLabel word = (CoreLabel) sent.get(i); coveredText += word.value() + " "; } sentenceSpansList.add(new TextSpan(firstword.beginPosition(), lastword.endPosition(), coveredText)); } return sentenceSpansList.toArray(new TextSpan[0]); }
From source file:edu.ucla.cs.scai.qa.questionclassifier.SyntacticTreeNode.java
public SyntacticTreeNode(Tree t, ArrayList<CoreLabel> tokens, SyntacticTreeNode parent) throws Exception { this.parent = parent; value = t.value();//from ww w. ja v a 2s . co m if (t.isLeaf()) { CoreLabel c = tokens.remove(0); begin = c.beginPosition(); end = c.endPosition(); if (c == null) { throw new Exception("Mapping between TreeNode and CoreLabel not found"); } else { lemma = c.lemma(); ner = c.ner(); //System.out.println(value + " -> " + c.value()); if (!value.equals(c.value())) { throw new Exception("Different words have been matched!"); } } } else { boolean hasNPchildren = false; boolean hasWHNPchildren = false; boolean hasQPchildren = false; begin = Integer.MAX_VALUE; end = Integer.MIN_VALUE; for (Tree c : t.children()) { SyntacticTreeNode child = new SyntacticTreeNode(c, tokens, this); children.add(child); if (child.value.equals("NP")) { hasNPchildren = true; } else if (child.value.equals("QP")) { hasQPchildren = true; } else if (child.value.equals("WHNP")) { hasWHNPchildren = true; } begin = Math.min(begin, child.begin); end = Math.max(end, child.end); } if (value.equals("NP")) { if (hasNPchildren) { npCompound = true; } else if (hasQPchildren) { npQp = true; } else { npSimple = true; } } else if (value.equals("WHNP")) { //can a WHNP node have QP children? if (hasNPchildren || hasWHNPchildren) { whnpCompound = true; } else if (!hasQPchildren) { whnpSimple = true; } } } }
From source file:main.java.parsers.StanfordParser.java
/** * Parses a given input text document using the Stanford CoreNLP parser. * /*from ww w. ja v a2 s. c om*/ * @param document * @throws java.io.UnsupportedEncodingException * @throws java.lang.InterruptedException */ public static void parse(Doc document) throws UnsupportedEncodingException, IOException, InterruptedException { // Initialize an Annotation with some text to be annotated. The text is the argument to the constructor. Annotation annotation = new Annotation(new String(document.text.getBytes("UTF-8"), "UTF-8")); // run all the selected Annotators on this text pipeline.annotate(annotation); // An Annotation is a Map and you can get and use the various analyses individually. List<CoreMap> sentences = annotation.get(CoreAnnotations.SentencesAnnotation.class); //returns if the annotation is empty. if (sentences == null || sentences.isEmpty()) return; //map linking token offsets with their tokens annotation from the Stanford tool. for (CoreMap sentence : sentences) { String sentenceStr = ""; int sentenceNum = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class); Map<Integer, Integer> tokenNumStartOffset = document.sentenceTokenNumStartOffset.get(sentenceNum); if (tokenNumStartOffset == null) document.sentenceTokenNumStartOffset.put(sentenceNum, tokenNumStartOffset = new HashMap<>()); Map<Integer, List<String>> startOffsetSRLRoles = new TreeMap<>(); //extracting tokenized information from the stanford parser output. for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { sentenceStr += token.value() + " "; document.startOffsetIndexedWord.put(token.beginPosition(), new IndexedWord(token)); tokenNumStartOffset.put(token.index(), token.beginPosition()); startOffsetSRLRoles.put(token.beginPosition(), null); } //write the tokenized sentence to an output file FileOutputStream output = new FileOutputStream(Main.RESOURCES_DIR + "\\senna\\log.txt"); output.write(sentenceStr.getBytes()); //the semantic roles labels for the sentence are obtained by applying SENNA startOffsetSRLRoles = SENNASrl.getSRLRoles(startOffsetSRLRoles); //set the srl tags document.startOffsetSRLRoles.putAll(startOffsetSRLRoles); //parse tree of the sentence String stanfordParseTree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).toString(); ParseTree parseTree = new ParseTree(stanfordParseTree); parseTree.convertParseTree(); document.setSentenceParseTree(sentenceNum, parseTree); //dependency graph of the sentence SemanticGraph graph = sentence.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class); document.setSentenceDependencyGraph(sentenceNum, graph); } }
From source file:nlp.prototype.POSToken.java
public POSToken(CoreLabel token) { this.nextToken = null; this.token = token; this.pos = PartOfSpeech.toPartOfSpeech(token.value()); this.childTokens = new ArrayList<POSToken>() { };/*ww w. j a v a 2 s. co m*/ }
From source file:opennlp.tools.parse_thicket.opinion_processor.DefaultSentimentProcessor.java
License:Apache License
public static <T> String wordToString(T o, final boolean justValue, final String separator) { if (justValue && o instanceof Label) { if (o instanceof CoreLabel) { CoreLabel l = (CoreLabel) o; String w = l.value(); if (w == null) w = l.word();/* ww w. java 2s. c o m*/ return w; } else { return (((Label) o).value()); } } else if (o instanceof CoreLabel) { CoreLabel l = ((CoreLabel) o); String w = l.value(); if (w == null) w = l.word(); if (l.tag() != null) { if (separator == null) { return w + CoreLabel.TAG_SEPARATOR + l.tag(); } else { return w + separator + l.tag(); } } return w; // an interface that covered these next four cases would be // nice, but we're moving away from these data types anyway } else if (separator != null && o instanceof TaggedWord) { return ((TaggedWord) o).toString(separator); } else if (separator != null && o instanceof LabeledWord) { return ((LabeledWord) o).toString(); } else if (separator != null && o instanceof WordLemmaTag) { return ((WordLemmaTag) o).toString(separator); } else if (separator != null && o instanceof WordTag) { return ((WordTag) o).toString(separator); } else { return (o.toString()); } }
From source file:semRewrite.substitutor.CoreLabelSequence.java
License:Open Source License
/** *************************************************************** * Generate a String where the CoreLabel values are separated by * spaces and do not have a token number suffix *//* w w w . j a v a2 s . c o m*/ public String toString() { StringBuffer sb = new StringBuffer(); for (CoreLabel cl : labels) { if (!StringUtil.emptyString(sb.toString())) sb.append(" "); sb.append(cl.value()); } return labels.toString(); }
From source file:semRewrite.substitutor.CoreLabelSequence.java
License:Open Source License
/** ************************************************************* * Checks if label is part of current sequence * @param text the label to be checked for in the sequence * @param sentIndex can be CoreLabelSequence.IGNORE_SENTENCE *///from w w w.j ava2s. c o m public boolean containsLabel(int sentIndex, String text, int index) { //System.out.println("CoreLabelSequence.containsLabel(): sentIndex: " + sentIndex); //System.out.println("CoreLabelSequence.containsLabel(): text: " + text); //System.out.println("CoreLabelSequence.containsLabel(): index: " + index); //System.out.println("CoreLabelSequence.containsLabel(): labels: " + labels); for (CoreLabel label : labels) { //System.out.println("CoreLabelSequence.containsLabel(): value: " + label.value()); //System.out.println("CoreLabelSequence.containsLabel():index: " + label.index()); if ((sentIndex == label.sentIndex() || sentIndex == IGNORE_SENTENCE) && text.equals(label.value()) // && index == label.index() FIXME: total hack! ) { //System.out.println("CoreLabelSequence.containsLabel(): success "); return true; } } //return labels.stream().anyMatch(label -> // (sentIndex == label.sentIndex() || sentIndex == IGNORE_SENTENCE) // && text.equals(label.originalText()) // && index == label.index() //); //System.out.println("CoreLabelSequence.containsLabel(): failure - label not in sequence "); return false; }
From source file:semRewrite.substitutor.CoreLabelSequence.java
License:Open Source License
/** ************************************************************* * Change the value() of each CoreLabel to be all caps *///from ww w.java2 s. c o m public semRewrite.substitutor.CoreLabelSequence toUpperCase() { //System.out.println("CoreLabelSequence.toUpperCase(): labels: " + labels); List<CoreLabel> lcl = new ArrayList<>(); for (CoreLabel cl : labels) { CoreLabel newcl = new CoreLabel(); newcl.setValue(cl.value().toUpperCase()); newcl.setIndex(cl.index()); lcl.add(newcl); } semRewrite.substitutor.CoreLabelSequence cls = new semRewrite.substitutor.CoreLabelSequence(lcl); //System.out.println("CoreLabelSequence.toUpperCase(): cls: " + cls); return cls; }
From source file:semRewrite.substitutor.CoreLabelSequence.java
License:Open Source License
/** ************************************************************* *//*from ww w. java 2 s.c om*/ public semRewrite.substitutor.CoreLabelSequence removePunctuation() { //System.out.println("CoreLabelSequence.toUpperCase(): removePunctuation: " + labels); semRewrite.substitutor.CoreLabelSequence cls = new semRewrite.substitutor.CoreLabelSequence(labels); for (CoreLabel cl : labels) { String puncRE = "[\\.\\,\\;\\:\\[\\]\\{\\}\\!\\@\\#\\$\\%\\^\\&\\*\\(\\)\\-\\=\\_\\+\\`\\~\\<\\>\\/\\?]"; if (cl.value().matches(puncRE)) cl.setValue(cl.value().replace(puncRE, "")); } //System.out.println("CoreLabelSequence.toUpperCase(): cls: " + cls); return cls; }