List of usage examples for edu.stanford.nlp.trees Tree pennString
public String pennString()
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
public static void convertPennTree(JCas aJCas, Annotation aDocument) { for (CoreMap s : aDocument.get(SentencesAnnotation.class)) { Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); int begin = s.get(CharacterOffsetBeginAnnotation.class); int end = s.get(CharacterOffsetEndAnnotation.class); // create tree with simple labels and get penn string from it tree = tree.deepCopy(tree.treeFactory(), StringLabel.factory()); // write Penn Treebank-style string to cas PennTree pTree = new PennTree(aJCas, begin, end); pTree.setPennTree(tree.pennString()); pTree.addToIndexes();/*w w w. j a v a 2 s .com*/ } }
From source file:de.tudarmstadt.ukp.dkpro.core.stanfordnlp.util.StanfordAnnotator.java
License:Open Source License
/** * Creates annotation with Penn Treebank style representations of the syntax tree * /*www. j a v a 2 s.co m*/ * @param aBegin * start offset. * @param aEnd * end offset. */ public void createPennTreeAnnotation(int aBegin, int aEnd) { Tree t = tokenTree.getTree(); // write Penn Treebank-style string to cas PennTree pTree = new PennTree(jCas, aBegin, aEnd); // create tree with simple labels and get penn string from it t = t.deepCopy(t.treeFactory(), StringLabel.factory()); pTree.setPennTree(t.pennString()); pTree.addToIndexes(); }
From source file:edu.albany.cubism.util.StanfordChineseParser.java
public void printTree(Tree t) { tp.printTree(t);//from w w w .j a v a2s. c om tp.printTree(t.headTerminal(new CollinsHeadFinder()));//SemanticHeadFinder())); //System.out.println("tree label: " + t.label()); List trees = t.subTreeList(); for (int i = 0; i < trees.size(); i++) { Tree sbt = (Tree) trees.get(i); /* * if (!sbt.isLeaf()) { trees.addAll(sbt.subTreeList()); } */ //System.out.println("sbt lable: " + sbt.label()); } //System.out.println("done"); List<Tree> leaves = t.getLeaves(); for (int i = 0; i < leaves.size(); i++) { Tree leaf = leaves.get(i); //if (leaf.parent() != null) { System.out.println(leaf.pennString() + " " + leaf.value()); //} } /* * Set dependencies = t.dependencies(); Iterator it = * dependencies.iterator(); while (it.hasNext()) { Dependency dependency * = (Dependency)it.next(); System.out.println(dependency.toString()); * System.out.println(dependency.name()); } */ }
From source file:elkfed.coref.discourse_entities.DiscourseEntity.java
License:Open Source License
private Tree massimoHeadFindHack(Tree npNode) { LanguagePlugin lang_plugin = ConfigProperties.getInstance().getLanguagePlugin(); /*/*from w ww. ja v a2 s . co m*/ * NOTE (yv): * We should really have a decent configurable head finder. * The "generic" head finder below probably works, but ... * this is ugly enough for English, but making it work for * English *and* Italian (and possibly other languages) * is only something for very enthusiastic people with * slight masochistic tendencies. */ //CollinsHeadFinder hf = new CollinsHeadFinder(); //ModCollinsHeadFinder hf = new ModCollinsHeadFinder(); /* -- trivial -- */ if (npNode.numChildren() == 0) return npNode; if (npNode.numChildren() == 1) { if (npNode.firstChild().numChildren() == 0) return npNode; return massimoHeadFindHack(npNode.firstChild()); } /* -- coordination -- */ if (npNode.numChildren() > 2) { for (Tree n : npNode.children()) { if (lang_plugin.labelCat(n.nodeString()) == NodeCategory.CC) return null; } } /* -- last child is a noun (common/proper) --*/ /* NB: will it work for italian though? */ NodeCategory firstpos = lang_plugin.labelCat(npNode.firstChild().nodeString()); NodeCategory nextpos = lang_plugin.labelCat(npNode.getChild(1).nodeString()); NodeCategory lastpos = lang_plugin.labelCat(npNode.lastChild().nodeString()); if (lastpos == NodeCategory.CN) return npNode.lastChild(); if (lastpos == NodeCategory.PN) return npNode.lastChild(); /* -- (NP (NP (DT the) (NN man)) (PP (in from) (NP (NNP UNCLE)))) -- */ if (firstpos == NodeCategory.NP && nextpos != NodeCategory.CN) return massimoHeadFindHack(npNode.firstChild()); /* -- misc -- */ Tree found_head = null; int state = 0; for (Tree n : npNode.children()) { NodeCategory ncat = lang_plugin.labelCat(n.nodeString()); if (ncat == NodeCategory.CN || ncat == NodeCategory.PN || ncat == NodeCategory.PRO) { state = 4; found_head = n; } else if (ncat == NodeCategory.NP && state < 3) { state = 3; found_head = n; } else if (ncat == NodeCategory.ADJ && state < 3) { state = 2; found_head = n; } } if (found_head != null) { if (state == 3) { return massimoHeadFindHack(found_head); } return found_head; } // if (ConfigProperties.getInstance().getDbgPrint()) System.err.println("Couldn't find a head for NP:" + npNode.pennString()); return null; }
From source file:jnetention.nlp.demo.NLPAnalyzer.java
protected void update(String text) throws Exception { TextParse p = client.parse(text);/* w w w . j a v a 2 s . c om*/ StringBuilder s = new StringBuilder(); s.append(p.getDependencies(true).toString()); s.append('\n'); //s.append(p.getCorefCluster().toString()); s.append(p.getNamedEntities()); s.append('\n'); s.append(p.getWords()); s.append('\n'); s.append(p.getNamedEntities()); s.append('\n'); s.append(p.getTrees().stream().map((Tree t) -> (String) (t.pennString() + '\n')) .collect(Collectors.toList())); s.append(p.getVerbPhrases()); s.append(p.getNounPhrases()); Platform.runLater(new Runnable() { @Override public void run() { TextArea ta = new TextArea(s.toString()); ta.setMaxHeight(Double.MAX_VALUE); ta.setMaxWidth(Double.MAX_VALUE); ScrollPane sp = new ScrollPane(ta); sp.setMaxHeight(Double.MAX_VALUE); sp.setMaxWidth(Double.MAX_VALUE); output.setCenter(ta); } }); }
From source file:knu.univ.lingvo.coref.SieveCoreferenceSystem.java
License:Open Source License
/** * For printing tree in a better format// w w w .ja va2s .c o m */ public static String formatPennTree(Tree parseTree) { String treeString = parseTree.pennString(); treeString = treeString.replaceAll("\\[TextAnnotation=", ""); treeString = treeString.replaceAll("(NamedEntityTag|Value|Index|PartOfSpeech)Annotation.+?\\)", ")"); treeString = treeString.replaceAll("\\[.+?\\]", ""); return treeString; }
From source file:org.aksw.simba.bengal.triple2nl.property.PropertyVerbalizer.java
License:Apache License
private PropertyVerbalization getTypeByLinguisticAnalysis(String propertyURI, String propertyText) { logger.debug("...using linguistical analysis..."); Annotation document = new Annotation(propertyText); pipeline.annotate(document);/* w ww . j a v a 2 s .co m*/ List<CoreMap> sentences = document.get(SentencesAnnotation.class); String pattern = ""; PropertyVerbalizationType verbalizationType = PropertyVerbalizationType.UNSPECIFIED; boolean firstTokenAuxiliary = false; for (CoreMap sentence : sentences) { List<CoreLabel> tokens = sentence.get(TokensAnnotation.class); // get the first word and check if it's 'is' or 'has' CoreLabel token = tokens.get(0); String word = token.get(TextAnnotation.class); String pos = token.get(PartOfSpeechAnnotation.class); String lemma = token.getString(LemmaAnnotation.class); firstTokenAuxiliary = auxiliaryVerbs.contains(lemma); if (lemma.equals("be") || word.equals("have")) { pattern += lemma.toUpperCase(); } else { if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } } if (tokens.size() > 1) { pattern += " "; for (int i = 1; i < tokens.size(); i++) { token = tokens.get(i); pos = token.get(PartOfSpeechAnnotation.class); if (pos.startsWith("N")) { pattern += "NP"; } else if (pos.startsWith("V")) { pattern += "VP"; } else { pattern += pos; } pattern += " "; } } // get the parse tree Tree tree = sentence.get(TreeAnnotation.class); // skip ROOT tag tree = tree.skipRoot(); logger.debug("Parse tree:" + tree.pennString()); // tree.pennPrint(); // check if VP is directly followed by NP // sometimes parent node is S,SINV,etc. if (tree.value().matches(Joiner.on('|').join(Lists.newArrayList(S, SBAR, SBARQ, SINV, FRAGMENT)))) { tree = tree.getChild(0); } boolean useDeterminer = false; if (tree.value().equals(VERB_PHRASE.getTag())) { for (Tree child : tree.getChildrenAsList()) { // check if first non terminal is NP and not contains a // determiner if (!child.isPreTerminal()) { if (child.value().equals(NOUN_PHRASE.getTag()) && !child.getChild(0).value().equals(DETERMINER.getTag())) { useDeterminer = true; } break; } } } // add determiner tag if (useDeterminer) { String[] split = pattern.split(" "); pattern = split[0] + " DET " + Joiner.on(" ").join(Arrays.copyOfRange(split, 1, split.length)); } } pattern = pattern.trim(); // if first token is an auxiliary can return verb if (firstTokenAuxiliary) { verbalizationType = PropertyVerbalizationType.VERB; } // check if pattern matches if (pattern.matches(VERB_PATTERN)) { logger.debug("...successfully determined type."); verbalizationType = PropertyVerbalizationType.VERB; } return new PropertyVerbalization(propertyURI, propertyText, pattern, verbalizationType); }
From source file:org.lambda3.text.simplification.discourse.model.serializer.TreeSerializer.java
License:Open Source License
@Override public void serialize(Tree value, JsonGenerator gen, SerializerProvider provider) throws IOException { gen.writeString(value.pennString().trim().replaceAll("\\s+", " ").replaceAll("[\\n\\t]", "")); }
From source file:pltag.parser.json.JsonResult.java
License:Open Source License
private String toPtbFormat(String input) { Tree tree = Tree.valueOf(input); return tree.pennString(); }
From source file:qmul.align.SentenceSyntacticSimilarityMeasure.java
License:Open Source License
public static void main(String[] args) { SentenceSyntacticSimilarityMeasure sm = new SentenceSyntacticSimilarityMeasure(); PennTreebankTokenizer tok = new PennTreebankTokenizer(true); StanfordParser p = new StanfordParser(); // TreeParser p = new RASPParser(); // String s1 = "Hello , how are you ?"; // String s2 = "UNCLEAR what can I do for this lady today ?"; String s1 = "You're full of catarrh."; String s2 = "Lot of wax in it, right enough."; p.parse(tok.getWordsFromString(s1)); Tree t1 = p.getBestParse(); p.parse(tok.getWordsFromString(s2)); Tree t2 = p.getBestParse();/*from w w w.j a va 2 s . c om*/ System.out.println(s1); System.out.println(t1.pennString()); System.out.println(s2); System.out.println(t2.pennString()); System.out.println(TreeKernel.resetAndCompute(t1, t2, TreeKernel.SYN_TREES)); System.out.println(TreeKernel.resetAndCompute(t1, t2, TreeKernel.SUB_TREES)); System.out.println(TreeKernel.resetAndCompute(t1, t2, TreeKernel.SUBSET_TREES)); // System.exit(0); DialogueTurn t = new DialogueTurn("t", 1, null, null); DialogueSentence a = new DialogueSentence("a", 1, t, "ok"); DialogueSentence b = new DialogueSentence("b", 1, t, "ok"); a.setSyntax(p.parse(a.getTranscription())); b.setSyntax(p.parse(b.getTranscription())); System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = " + sm.similarity(a, b)); a.setTranscription("ok ok ok ok"); a.setSyntax(p.parse(a.getTranscription())); System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = " + sm.similarity(a, b)); a.setTranscription("that's really not ok"); a.setSyntax(p.parse(a.getTranscription())); System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = " + sm.similarity(a, b)); b.setTranscription("that's really not ok"); b.setSyntax(p.parse(b.getTranscription())); System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = " + sm.similarity(a, b)); a.setTranscription("john likes the small bear"); a.setSyntax(p.parse(a.getTranscription())); b.setTranscription("jim hates the big rabbit"); b.setSyntax(p.parse(b.getTranscription())); // TreeKernel.setIncludeWords(true); System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = " + sm.similarity(a, b) + "\n" + sm.rawCountsA() + "\n" + sm.rawCountsB() + "\n" + sm.rawCountsAB()); a.setTranscription("the man likes the small bear"); a.setSyntax(p.parse(a.getTranscription())); // TreeKernel.setIncludeWords(true); System.out.println("" + a + " " + a.getSyntax() + "\n" + b + " " + b.getSyntax() + "\n" + "sim = " + sm.similarity(a, b) + "\n" + sm.rawCountsA() + "\n" + sm.rawCountsB() + "\n" + sm.rawCountsAB()); }