List of usage examples for edu.stanford.nlp.semgraph SemanticGraph typedDependencies
public Collection<TypedDependency> typedDependencies()
From source file:com.project.NLP.Requirement.ParserTreeGenerator.java
/** * method to identify the passive sentence using the dependencies generated by stanford coreNLP * for a particular sentence /*from w w w . j ava 2 s . c o m*/ * * @param sentence * @param tree */ public void passiveSentenceIdentification(CoreMap sentence, Tree tree) { boolean passive = false; /*sentence dependency annotation*/ SemanticGraph dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); Collection collection = dependencies.typedDependencies(); Iterator iterator = collection.iterator(); while (iterator.hasNext()) { String dependency = iterator.next().toString(); String dependencyArray[] = dependency.split("\\("); if (dependencyArray[0].equalsIgnoreCase("nsubjpass")) { passiveSentenceMap.put(tree, true); } } }
From source file:coreferenceresolver.util.StanfordUtil.java
public void init(boolean simpleInit) throws FileNotFoundException, IOException { String outPosFilePath = "./input.txt.pos"; FileWriter fw = new FileWriter(new File(outPosFilePath)); BufferedWriter bw = new BufferedWriter(fw); props = new Properties(); if (simpleInit) { props.put("annotators", "tokenize, ssplit, pos, parse"); } else {/* w w w . ja v a2s . co m*/ props.put("annotators", "tokenize, ssplit, pos, parse, sentiment"); } pipeline = new StanfordCoreNLP(props); reviews = new ArrayList<>(); FileReader fileReader = new FileReader(documentFile); BufferedReader bufferedReader = new BufferedReader(fileReader); String reviewLine; int reviewId = 0; int sentenceId; //read input file line by line and count the number sentences of each lines while ((reviewLine = bufferedReader.readLine()) != null) { sentenceId = 0; Review newReview = new Review(); //Add to reviews list newReview.setRawContent(reviewLine); // create an empty Annotation just with the given text document = new Annotation(reviewLine); // run all Annotators on this text pipeline.annotate(document); List<CoreMap> sentences = document.get(SentencesAnnotation.class); //Begin extracting from paragraphs for (CoreMap sentence : sentences) { int sentenceOffsetBegin = sentence.get(CharacterOffsetBeginAnnotation.class); int sentenceOffsetEnd = sentence.get(CharacterOffsetEndAnnotation.class); Sentence newSentence = new Sentence(); newSentence.setReviewId(reviewId); newSentence.setRawContent(sentence.toString()); newSentence.setOffsetBegin(sentenceOffsetBegin); newSentence.setOffsetEnd(sentenceOffsetEnd); if (!simpleInit) { int sentimentLevel = RNNCoreAnnotations .getPredictedClass(sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class)); newSentence.setSentimentLevel(sentimentLevel); //Dependency Parsing SemanticGraph collCCDeps = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); Collection<TypedDependency> typedDeps = collCCDeps.typedDependencies(); newSentence.setDependencies(typedDeps); } List<Tree> sentenceTreeLeaves = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).getLeaves(); int i = 0; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { Token newToken = new Token(); Tree tokenTree = sentenceTreeLeaves.get(i); newToken.setTokenTree(tokenTree); String word = token.get(TextAnnotation.class); newToken.setWord(word); String pos = token.get(PartOfSpeechAnnotation.class); newToken.setPOS(pos); int offsetBegin = token.get(CharacterOffsetBeginAnnotation.class); newToken.setOffsetBegin(offsetBegin); int offsetEnd = token.get(CharacterOffsetEndAnnotation.class); newToken.setOffsetEnd(offsetEnd); if (!simpleInit) { //Check NP relative clause Tree twoLevelsAncestor = tokenTree.ancestor(2, sentence.get(TreeCoreAnnotations.TreeAnnotation.class)); if (twoLevelsAncestor.value().equals("WHNP") && !word.toLowerCase().equals("who") && !word.toLowerCase().equals("what")) { newToken.setRelativePronoun(true); } //Calculate sentiment for this token int newTokenSentiment = Util.retrieveOriginalSentiment(newToken.getWord()); newToken.setSentimentOrientation(newTokenSentiment, newSentence.getDependencies()); } newSentence.addToken(newToken); bw.write(token.word() + "/" + token.tag() + " "); ++i; } bw.newLine(); if (!simpleInit) { //Check if this sentence contains a comparative indicator. //If yes, it is a comparative sentence. Identify which NP is superior or inferior in this sentence List<Token> comparativeTokens = FeatureExtractor.findComparativeIndicator(newSentence, null, null); //TODO //Check special comparative samples if (!comparativeTokens.isEmpty()) { newSentence.initComparatives(comparativeTokens); } } newReview.addSentence(newSentence); ++sentenceId; } bw.write("./."); bw.newLine(); reviews.add(newReview); ++reviewId; } bw.close(); }
From source file:ims.cs.corenlp.TokenAligner.java
License:Open Source License
/** * Aligns the tokens of a sentence//from w w w. j av a 2s. com * @param pTokens * @param cSentence */ private void alignTokensStrict(List<Token> pTokens, CoreMap cSentence) { Tree tree = cSentence.get(TreeAnnotation.class); SemanticGraph dependencies = cSentence.get(CollapsedCCProcessedDependenciesAnnotation.class); List<CoreLabel> cTokens = cSentence.get(CoreAnnotations.TokensAnnotation.class); Iterator<IndexedWord> depIterator = new IndexedWordIterator(dependencies.vertexListSorted().iterator()); pcTokenList = new ArrayList<Token>(cTokens.size()); List<Tree> leaves = tree.getLeaves(); Iterator<Tree> leafIterator = leaves.iterator(); indexedWord2CoreLabel = new HashMap<IndexedWord, Token>(); tree2CoreLabel = new HashMap<Tree, Token>(); // state variables Token prevCombinedToken = null; Iterator<CoreLabel> cTokenIter = cTokens.iterator(); Iterator<Token> pTokenIter = pTokens.iterator(); int currentCoreNlpSentenceIndex = 0; CoreLabel cToken = cTokenIter.next(); Token pToken = pTokenIter.next(); Token prevPToken = null; int pFinal = pTokens.get(pTokens.size() - 1).goldByteCount.getEnd(); int cFinal = cTokens.get(cTokens.size() - 1).endPosition(); int pBegin = pToken.goldByteCount.getBegin(); int pEnd = pToken.goldByteCount.getEnd(); int cBegin = cToken.beginPosition(); int cEnd = cToken.endPosition(); // for compatibility: TreeGraphNode bookkeeping Collection<TypedDependency> dependencyEdges = dependencies.typedDependencies(); List<TreeGraphNode> tgnList = new ArrayList<TreeGraphNode>(cTokens.size()); for (int i = 0; i < cTokens.size() + 1; i++) tgnList.add(null); for (TypedDependency edge : dependencyEdges) { tgnList.set(edge.gov().index(), edge.gov()); tgnList.set(edge.dep().index(), edge.dep()); } Iterator<TreeGraphNode> tgnIterator = tgnList.iterator(); IndexedWord dep = null; Tree leaf = null; TreeGraphNode tgn = null; // move dep and tree iterators forward by 1 if (depIterator.hasNext()) dep = depIterator.next(); if (leafIterator.hasNext()) leaf = leafIterator.next(); if (tgnIterator.hasNext()) tgn = tgnIterator.next(); // guess a pSentence for debug messages -- may be null if there is no sentence annotation Sentence pSentence = pTokens.get(pTokens.size() - 1).sentence; String pSentenceId; if (pSentence != null) { SentenceId id = pSentence.sentenceId; pSentenceId = id == null ? "null" : id.toString(); } else { pSentenceId = null; } boolean usedPToken = false; // loop until we reach the end of either sentence while ((pFinal != pEnd) || (cFinal != cEnd)) { // Check for unwanted conditions: // 1. No PARC tokens left? // this happens when the raw text contained tokens that are missing in the PARC data. these are mostly // sentence-final punctuation marks. if (pToken == null) { // try to recover here for final quotes that the parser predicted. This may be good or bad. if (useCoreNlpQuoteCompletion && Helper.isQuote(cToken)) { Token combinedToken = combineTokens(prevPToken, cToken, currentCoreNlpSentenceIndex); prevCombinedToken.dependencyBackpointer = dep; prevCombinedToken.treeBackpointer = leaf; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); } else { if (StaticConfig.verbose) System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: " + pTokens + " )"); } // stop processing this sentence, drop remaining CoreNLP data -- in practice, these will never be needed break; } // 2. No CoreNLP tokens left if (cToken == null) { if (StaticConfig.verbose) System.out.println("Unaligned Token(s) in " + pSentenceId + " " + pToken); break; } // check whether tokens at least overlap before continuing processing ... pBegin = pToken.goldByteCount.getBegin(); pEnd = pToken.goldByteCount.getEnd(); cBegin = cToken.beginPosition(); cEnd = cToken.endPosition(); // ... if they don't, try to recover by syncing up if (cBegin > pEnd) { if (usedPToken) { if (StaticConfig.verbose) System.out.println( pSentenceId + " out of sync " + pToken + " " + cToken + " -- trying to fix"); if (pTokenIter.hasNext()) { prevPToken = pToken; pToken = pTokenIter.next(); continue; // restart the iteration } else { if (StaticConfig.verbose) System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: " + pTokens + " )"); break; } } else { /* this may happen when tokens from previous iterations have a wrong byte count -- skip */ if (StaticConfig.verbose) System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: " + pTokens + " )"); break; } } // Now the main part. There are three conditions which could occur. if (pEnd == cEnd) { // 1. Tokens have identical end points // In this case, just combine the tokens and move on Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex); combinedToken.dependencyBackpointer = dep; combinedToken.treeBackpointer = leaf; combinedToken.tgn = tgn; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); prevCombinedToken = combinedToken; // move iterators if (cTokenIter.hasNext()) { cToken = cTokenIter.next(); currentCoreNlpSentenceIndex++; } else { cToken = null; } if (pTokenIter.hasNext()) { prevPToken = pToken; pToken = pTokenIter.next(); } else { pToken = null; } usedPToken = false; // add parse information if (depIterator.hasNext()) dep = depIterator.next(); if (leafIterator.hasNext()) leaf = leafIterator.next(); if (tgnIterator.hasNext()) tgn = tgnIterator.next(); } else if (cEnd > pEnd) { // 2. The CoreNLP token is longer than the PARC token // split the CoreNLP token into two parts Token combinedToken; CoreLabel[] splitCToken = null; if (splitType == SplitType.SPLIT) { splitCToken = splitToken(cToken, pEnd); combinedToken = combineTokens(pToken, splitCToken[0], currentCoreNlpSentenceIndex); } else if (splitType == SplitType.NONE_CORENLP) { throw new Error(); } else { combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex); } combinedToken.dependencyBackpointer = dep; combinedToken.treeBackpointer = leaf; combinedToken.tgn = tgn; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); prevCombinedToken = combinedToken; // get new pToken to match the remaining bit if (pTokenIter.hasNext()) { prevPToken = pToken; pToken = pTokenIter.next(); } else { pToken = null; } if (splitType == SplitType.SPLIT) cToken = splitCToken[1]; usedPToken = false; } else { // cEnd < pEnd // 3. The PARC token is longer than the CoreNLP token // Attach the PARC token to multiple CoreNLP tokens Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex); combinedToken.dependencyBackpointer = dep; combinedToken.treeBackpointer = leaf; combinedToken.tgn = tgn; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); prevCombinedToken = combinedToken; // get new cToken and other CoreNLP data if (cTokenIter.hasNext()) { cToken = cTokenIter.next(); currentCoreNlpSentenceIndex++; } else { cToken = null; } usedPToken = true; if (depIterator.hasNext()) dep = depIterator.next(); if (leafIterator.hasNext()) leaf = leafIterator.next(); if (tgnIterator.hasNext()) tgn = tgnIterator.next(); } } }
From source file:jnetention.nlp.TextParse.java
public SemanticGraph getDependencies(boolean b) { List<TypedDependency> l = new ArrayList(); for (CoreMap s : getSentences()) { SemanticGraph g = getDependencies(s, b); l.addAll(g.typedDependencies()); }/*from ww w. j a v a2 s. co m*/ SemanticGraph graph = new SemanticGraph(l); return graph; }
From source file:qa.StanfordDepParser.java
public synchronized DependencyTree parse(String documentText) throws IOException { // Create an empty Annotation just with the given text Annotation document = new Annotation(documentText); // run all Annotators on this text this.pipeline.annotate(document); // Iterate over all of the sentences found SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0) .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); Collection<TypedDependency> dependencies = ccProcessed.typedDependencies(); CoNLLOutputter.conllPrint(document, new FileOutputStream(new File("temp.dep"))); String conllString = FileUtil.readCoNLLFormat("temp.dep"); //System.out.println(documentText); DependencyTree tree = DependencyTree.fromCoNLLFormatString(conllString); return tree;//from www .j a v a 2 s . c o m }
From source file:qa.StanfordDepParser.java
public String parseCoNLL(String documentText) throws IOException { // Create an empty Annotation just with the given text Annotation document = new Annotation(documentText); // run all Annotators on this text this.pipeline.annotate(document); // Iterate over all of the sentences found SemanticGraph ccProcessed = document.get(CoreAnnotations.SentencesAnnotation.class).get(0) .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); Collection<TypedDependency> dependencies = ccProcessed.typedDependencies(); CoNLLOutputter.conllPrint(document, new FileOutputStream(new File("temp.dep"))); String conllString = FileUtil.readCoNLLFormat("temp.dep"); return conllString; }