List of usage examples for edu.stanford.nlp.semgraph SemanticGraph vertexListSorted
public List<IndexedWord> vertexListSorted()
From source file:ims.cs.corenlp.TokenAligner.java
License:Open Source License
/** * Aligns the tokens of a sentence//from w w w . j a v a 2 s . co m * @param pTokens * @param cSentence */ private void alignTokensStrict(List<Token> pTokens, CoreMap cSentence) { Tree tree = cSentence.get(TreeAnnotation.class); SemanticGraph dependencies = cSentence.get(CollapsedCCProcessedDependenciesAnnotation.class); List<CoreLabel> cTokens = cSentence.get(CoreAnnotations.TokensAnnotation.class); Iterator<IndexedWord> depIterator = new IndexedWordIterator(dependencies.vertexListSorted().iterator()); pcTokenList = new ArrayList<Token>(cTokens.size()); List<Tree> leaves = tree.getLeaves(); Iterator<Tree> leafIterator = leaves.iterator(); indexedWord2CoreLabel = new HashMap<IndexedWord, Token>(); tree2CoreLabel = new HashMap<Tree, Token>(); // state variables Token prevCombinedToken = null; Iterator<CoreLabel> cTokenIter = cTokens.iterator(); Iterator<Token> pTokenIter = pTokens.iterator(); int currentCoreNlpSentenceIndex = 0; CoreLabel cToken = cTokenIter.next(); Token pToken = pTokenIter.next(); Token prevPToken = null; int pFinal = pTokens.get(pTokens.size() - 1).goldByteCount.getEnd(); int cFinal = cTokens.get(cTokens.size() - 1).endPosition(); int pBegin = pToken.goldByteCount.getBegin(); int pEnd = pToken.goldByteCount.getEnd(); int cBegin = cToken.beginPosition(); int cEnd = cToken.endPosition(); // for compatibility: TreeGraphNode bookkeeping Collection<TypedDependency> dependencyEdges = dependencies.typedDependencies(); List<TreeGraphNode> tgnList = new ArrayList<TreeGraphNode>(cTokens.size()); for (int i = 0; i < cTokens.size() + 1; i++) tgnList.add(null); for (TypedDependency edge : dependencyEdges) { tgnList.set(edge.gov().index(), edge.gov()); tgnList.set(edge.dep().index(), edge.dep()); } Iterator<TreeGraphNode> tgnIterator = tgnList.iterator(); IndexedWord dep = null; Tree leaf = null; TreeGraphNode tgn = null; // move dep and tree iterators forward by 1 if (depIterator.hasNext()) dep = depIterator.next(); if (leafIterator.hasNext()) leaf = leafIterator.next(); if (tgnIterator.hasNext()) tgn = tgnIterator.next(); // guess a pSentence for debug messages -- may be null if there is no sentence annotation Sentence pSentence = pTokens.get(pTokens.size() - 1).sentence; String pSentenceId; if (pSentence != null) { SentenceId id = pSentence.sentenceId; pSentenceId = id == null ? "null" : id.toString(); } else { pSentenceId = null; } boolean usedPToken = false; // loop until we reach the end of either sentence while ((pFinal != pEnd) || (cFinal != cEnd)) { // Check for unwanted conditions: // 1. No PARC tokens left? // this happens when the raw text contained tokens that are missing in the PARC data. these are mostly // sentence-final punctuation marks. if (pToken == null) { // try to recover here for final quotes that the parser predicted. This may be good or bad. if (useCoreNlpQuoteCompletion && Helper.isQuote(cToken)) { Token combinedToken = combineTokens(prevPToken, cToken, currentCoreNlpSentenceIndex); prevCombinedToken.dependencyBackpointer = dep; prevCombinedToken.treeBackpointer = leaf; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); } else { if (StaticConfig.verbose) System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: " + pTokens + " )"); } // stop processing this sentence, drop remaining CoreNLP data -- in practice, these will never be needed break; } // 2. No CoreNLP tokens left if (cToken == null) { if (StaticConfig.verbose) System.out.println("Unaligned Token(s) in " + pSentenceId + " " + pToken); break; } // check whether tokens at least overlap before continuing processing ... pBegin = pToken.goldByteCount.getBegin(); pEnd = pToken.goldByteCount.getEnd(); cBegin = cToken.beginPosition(); cEnd = cToken.endPosition(); // ... if they don't, try to recover by syncing up if (cBegin > pEnd) { if (usedPToken) { if (StaticConfig.verbose) System.out.println( pSentenceId + " out of sync " + pToken + " " + cToken + " -- trying to fix"); if (pTokenIter.hasNext()) { prevPToken = pToken; pToken = pTokenIter.next(); continue; // restart the iteration } else { if (StaticConfig.verbose) System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: " + pTokens + " )"); break; } } else { /* this may happen when tokens from previous iterations have a wrong byte count -- skip */ if (StaticConfig.verbose) System.out.println(pSentenceId + " Dropping unmatched " + cToken + " " + "(PARC tokens: " + pTokens + " )"); break; } } // Now the main part. There are three conditions which could occur. if (pEnd == cEnd) { // 1. Tokens have identical end points // In this case, just combine the tokens and move on Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex); combinedToken.dependencyBackpointer = dep; combinedToken.treeBackpointer = leaf; combinedToken.tgn = tgn; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); prevCombinedToken = combinedToken; // move iterators if (cTokenIter.hasNext()) { cToken = cTokenIter.next(); currentCoreNlpSentenceIndex++; } else { cToken = null; } if (pTokenIter.hasNext()) { prevPToken = pToken; pToken = pTokenIter.next(); } else { pToken = null; } usedPToken = false; // add parse information if (depIterator.hasNext()) dep = depIterator.next(); if (leafIterator.hasNext()) leaf = leafIterator.next(); if (tgnIterator.hasNext()) tgn = tgnIterator.next(); } else if (cEnd > pEnd) { // 2. The CoreNLP token is longer than the PARC token // split the CoreNLP token into two parts Token combinedToken; CoreLabel[] splitCToken = null; if (splitType == SplitType.SPLIT) { splitCToken = splitToken(cToken, pEnd); combinedToken = combineTokens(pToken, splitCToken[0], currentCoreNlpSentenceIndex); } else if (splitType == SplitType.NONE_CORENLP) { throw new Error(); } else { combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex); } combinedToken.dependencyBackpointer = dep; combinedToken.treeBackpointer = leaf; combinedToken.tgn = tgn; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); prevCombinedToken = combinedToken; // get new pToken to match the remaining bit if (pTokenIter.hasNext()) { prevPToken = pToken; pToken = pTokenIter.next(); } else { pToken = null; } if (splitType == SplitType.SPLIT) cToken = splitCToken[1]; usedPToken = false; } else { // cEnd < pEnd // 3. The PARC token is longer than the CoreNLP token // Attach the PARC token to multiple CoreNLP tokens Token combinedToken = combineTokens(pToken, cToken, currentCoreNlpSentenceIndex); combinedToken.dependencyBackpointer = dep; combinedToken.treeBackpointer = leaf; combinedToken.tgn = tgn; // bookkeeping with new token if (usedPToken) { // avoid making subsequent tokens start tokens! combinedToken.paragraphBegins = false; } addNewWord(combinedToken, prevCombinedToken); prevCombinedToken = combinedToken; // get new cToken and other CoreNLP data if (cTokenIter.hasNext()) { cToken = cTokenIter.next(); currentCoreNlpSentenceIndex++; } else { cToken = null; } usedPToken = true; if (depIterator.hasNext()) dep = depIterator.next(); if (leafIterator.hasNext()) leaf = leafIterator.next(); if (tgnIterator.hasNext()) tgn = tgnIterator.next(); } } }
From source file:it.uniroma2.sag.kelp.input.parser.impl.StanfordParserWrapper.java
License:Apache License
@Override public DependencyGraph parse(String sentenceString) { Annotation document = new Annotation(sentenceString); pipeline.annotate(document);/* ww w . ja v a2s .c om*/ List<CoreMap> sentences = document.get(SentencesAnnotation.class); CoreMap sentence = sentences.get(0); DependencyGraph graph = new DependencyGraph(); graph.setSentence(sentenceString); graph.setParserName("StanfordParser"); graph.setParserVersion("3.6.0"); graph.setNodes(new ArrayList<DGNode>()); int nId = 1; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { DGNode node = new DGNode(); Map<String, Object> nodeProps = new HashMap<String, Object>(); nodeProps.put("surface", token.originalText()); nodeProps.put("lemma", token.lemma()); nodeProps.put("pos", token.tag()); nodeProps.put("start", token.beginPosition()); nodeProps.put("end", token.endPosition()); nodeProps.put("id", nId); nId++; graph.getNodes().add(node); node.setProperties(nodeProps); } SemanticGraph dependencies = null; switch (dependencyType) { case BASIC: dependencies = sentence.get(BasicDependenciesAnnotation.class); break; case COLLAPSED: dependencies = sentence.get(CollapsedDependenciesAnnotation.class); break; case COLLAPSED_CCPROCESSED: dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); break; default: dependencies = sentence.get(BasicDependenciesAnnotation.class); break; } dependencies.edgeListSorted(); List<DGRelation> relations = new ArrayList<DGRelation>(); for (IndexedWord node : dependencies.vertexListSorted()) { DGRelation relation = new DGRelation(); relation.setProperties(new HashMap<String, Object>()); DGNode child = graph.getDGNodeById(node.index()); relation.setTarget(child); Collection<IndexedWord> parentsTmp = dependencies.getParents(node); ArrayList<IndexedWord> parents = new ArrayList<IndexedWord>(); for (IndexedWord par : parentsTmp) { SemanticGraphEdge edge = dependencies.getEdge(par, node); DGNode parent = graph.getDGNodeById(edge.getGovernor().index()); if (parent.getProperties().get("id") != child.getProperties().get("id")) parents.add(par); } if (parents.isEmpty()) { relation.getProperties().put("type", "root"); relation.getProperties().put("fromId", new Integer(0)); relation.setSource(null); graph.setRoot(relation); } else { Iterator<IndexedWord> it = parents.iterator(); while (it.hasNext()) { IndexedWord par = it.next(); SemanticGraphEdge edge = dependencies.getEdge(par, node); DGNode parent = graph.getDGNodeById(edge.getGovernor().index()); relation.setSource(parent); relation.getProperties().put("fromId", parent.getProperties().get("id")); relation.getProperties().put("type", edge.getRelation().toString()); } } relations.add(relation); } graph.setRelations(relations); return graph; }
From source file:opendial.bn.values.RelationalVal.java
License:Open Source License
public void addGraph(SemanticGraph newGraph) { int oldGraphSize = graph.size(); for (IndexedWord iw : newGraph.vertexListSorted()) { IndexedWord copy = new IndexedWord(iw); copy.setIndex(graph.size());/* w ww .j a va2s .c o m*/ graph.addVertex(copy); } for (SemanticGraphEdge edge : newGraph.edgeListSorted()) { int dep = edge.getDependent().index() + oldGraphSize; int gov = edge.getGovernor().index() + oldGraphSize; GrammaticalRelation rel = edge.getRelation(); addEdge(gov, dep, rel.getLongName()); } cachedHashCode = 0; }