List of usage examples for edu.stanford.nlp.semgraph SemanticGraphEdge getGovernor
public IndexedWord getGovernor()
From source file:de.tudarmstadt.ukp.dkpro.core.corenlp.internal.CoreNlp2DKPro.java
License:Open Source License
public static void convertDependencies(JCas aJCas, Annotation document, MappingProvider mappingProvider, boolean internStrings) { for (CoreMap s : document.get(SentencesAnnotation.class)) { SemanticGraph graph = s.get(CollapsedDependenciesAnnotation.class); //SemanticGraph graph = s.get(EnhancedDependenciesAnnotation.class); // If there are no dependencies for this sentence, skip it. Might well mean we // skip all sentences because normally either there are dependencies for all or for // none.//from w ww. j a v a2 s . c om if (graph == null) { continue; } for (IndexedWord root : graph.getRoots()) { Dependency dep = new ROOT(aJCas); dep.setDependencyType("root"); dep.setDependent(root.get(TokenKey.class)); dep.setGovernor(root.get(TokenKey.class)); dep.setBegin(dep.getDependent().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.setFlavor(DependencyFlavor.BASIC); dep.addToIndexes(); } for (SemanticGraphEdge edge : graph.edgeListSorted()) { Token dependent = edge.getDependent().get(TokenKey.class); Token governor = edge.getGovernor().get(TokenKey.class); // For the type mapping, we use getShortName() instead, because the <specific> // actually doesn't change the relation type String labelUsedForMapping = edge.getRelation().getShortName(); // The nndepparser may produce labels in which the shortName contains a colon. // These represent language-specific labels of the UD, cf: // http://universaldependencies.github.io/docs/ext-dep-index.html labelUsedForMapping = StringUtils.substringBefore(labelUsedForMapping, ":"); // Need to use toString() here to get "<shortname>_<specific>" String actualLabel = edge.getRelation().toString(); Type depRel = mappingProvider.getTagType(labelUsedForMapping); Dependency dep = (Dependency) aJCas.getCas().createFS(depRel); dep.setDependencyType(internStrings ? actualLabel.intern() : actualLabel); dep.setDependent(dependent); dep.setGovernor(governor); dep.setBegin(dep.getDependent().getBegin()); dep.setEnd(dep.getDependent().getEnd()); dep.setFlavor(edge.isExtra() ? DependencyFlavor.ENHANCED : DependencyFlavor.BASIC); dep.addToIndexes(); } } }
From source file:edu.cmu.deiis.annotator.StanfordCoreNLPAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);//w w w. j a v a 2s . c o m token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { entity.addToIndexes(); } }
From source file:edu.cmu.deiis.annotators.StanfordAnnotator.java
License:Open Source License
@Override public void process(JCas jCas) throws AnalysisEngineProcessException { Annotation document = this.processor.process(jCas.getDocumentText()); String lastNETag = "O"; int lastNEBegin = -1; int lastNEEnd = -1; for (CoreMap tokenAnn : document.get(TokensAnnotation.class)) { // create the token annotation int begin = tokenAnn.get(CharacterOffsetBeginAnnotation.class); int end = tokenAnn.get(CharacterOffsetEndAnnotation.class); String pos = tokenAnn.get(PartOfSpeechAnnotation.class); String lemma = tokenAnn.get(LemmaAnnotation.class); Token token = new Token(jCas, begin, end); token.setPos(pos);/* w w w . ja va 2 s. c o m*/ token.setLemma(lemma); token.addToIndexes(); // hackery to convert token-level named entity tag into phrase-level tag String neTag = tokenAnn.get(NamedEntityTagAnnotation.class); if (neTag.equals("O") && !lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } else { if (lastNETag.equals("O")) { lastNEBegin = begin; } else if (lastNETag.equals(neTag)) { // do nothing - begin was already set } else { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); lastNEBegin = begin; } lastNEEnd = end; } lastNETag = neTag; } if (!lastNETag.equals("O")) { NamedEntityMention ne = new NamedEntityMention(jCas, lastNEBegin, lastNEEnd); ne.setMentionType(lastNETag); ne.addToIndexes(); } // add sentences and trees for (CoreMap sentenceAnn : document.get(SentencesAnnotation.class)) { // add the sentence annotation int sentBegin = sentenceAnn.get(CharacterOffsetBeginAnnotation.class); int sentEnd = sentenceAnn.get(CharacterOffsetEndAnnotation.class); Sentence sentence = new Sentence(jCas, sentBegin, sentEnd); sentence.addToIndexes(); // add the syntactic tree annotation List<CoreLabel> tokenAnns = sentenceAnn.get(TokensAnnotation.class); Tree tree = sentenceAnn.get(TreeAnnotation.class); if (tree.children().length != 1) { throw new RuntimeException("Expected single root node, found " + tree); } tree = tree.firstChild(); tree.indexSpans(0); TopTreebankNode root = new TopTreebankNode(jCas); root.setTreebankParse(tree.toString()); // TODO: root.setTerminals(v) this.addTreebankNodeToIndexes(root, jCas, tree, tokenAnns); // get the dependencies SemanticGraph dependencies = sentenceAnn.get(CollapsedCCProcessedDependenciesAnnotation.class); // convert Stanford nodes to UIMA annotations List<Token> tokens = JCasUtil.selectCovered(jCas, Token.class, sentence); Map<IndexedWord, DependencyNode> stanfordToUima = new HashMap<IndexedWord, DependencyNode>(); for (IndexedWord stanfordNode : dependencies.vertexSet()) { int indexBegin = stanfordNode.get(BeginIndexAnnotation.class); int indexEnd = stanfordNode.get(EndIndexAnnotation.class); int tokenBegin = tokens.get(indexBegin).getBegin(); int tokenEnd = tokens.get(indexEnd - 1).getEnd(); DependencyNode node; if (dependencies.getRoots().contains(stanfordNode)) { node = new TopDependencyNode(jCas, tokenBegin, tokenEnd); } else { node = new DependencyNode(jCas, tokenBegin, tokenEnd); } stanfordToUima.put(stanfordNode, node); } // create relation annotations for each Stanford dependency ArrayListMultimap<DependencyNode, DependencyRelation> headRelations = ArrayListMultimap.create(); ArrayListMultimap<DependencyNode, DependencyRelation> childRelations = ArrayListMultimap.create(); for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { DependencyRelation relation = new DependencyRelation(jCas); DependencyNode head = stanfordToUima.get(stanfordEdge.getGovernor()); DependencyNode child = stanfordToUima.get(stanfordEdge.getDependent()); String relationType = stanfordEdge.getRelation().toString(); if (head == null || child == null || relationType == null) { throw new RuntimeException(String.format( "null elements not allowed in relation:\nrelation=%s\nchild=%s\nhead=%s\n", relation, child, head)); } relation.setHead(head); relation.setChild(child); relation.setRelation(relationType); relation.addToIndexes(); headRelations.put(child, relation); childRelations.put(head, relation); } // set the relations for each node annotation for (DependencyNode node : stanfordToUima.values()) { List<DependencyRelation> heads = headRelations.get(node); node.setHeadRelations(new FSArray(jCas, heads == null ? 0 : heads.size())); if (heads != null) { FSCollectionFactory.fillArrayFS(node.getHeadRelations(), heads); } List<DependencyRelation> children = childRelations.get(node); node.setChildRelations(new FSArray(jCas, children == null ? 0 : children.size())); if (children != null) { FSCollectionFactory.fillArrayFS(node.getChildRelations(), children); } node.addToIndexes(); } } // map from spans to named entity mentions Map<Span, NamedEntityMention> spanMentionMap = new HashMap<Span, NamedEntityMention>(); for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { spanMentionMap.put(new Span(mention.getBegin(), mention.getEnd()), mention); } // add mentions for all entities identified by the coreference system List<NamedEntity> entities = new ArrayList<NamedEntity>(); List<List<Token>> sentenceTokens = new ArrayList<List<Token>>(); for (Sentence sentence : JCasUtil.select(jCas, Sentence.class)) { sentenceTokens.add(JCasUtil.selectCovered(jCas, Token.class, sentence)); } Map<Integer, CorefChain> corefChains = document.get(CorefChainAnnotation.class); for (CorefChain chain : corefChains.values()) { List<NamedEntityMention> mentions = new ArrayList<NamedEntityMention>(); for (CorefMention corefMention : chain.getMentionsInTextualOrder()) { // figure out the character span of the token List<Token> tokens = sentenceTokens.get(corefMention.sentNum - 1); int begin = tokens.get(corefMention.startIndex - 1).getBegin(); int end = tokens.get(corefMention.endIndex - 2).getEnd(); // use an existing named entity mention when possible; otherwise create a new one NamedEntityMention mention = spanMentionMap.get(new Span(begin, end)); if (mention == null) { mention = new NamedEntityMention(jCas, begin, end); //String line = mention.getCoveredText(); //System.out.println(line); mention.addToIndexes(); } mentions.add(mention); } // create an entity for the mentions Collections.sort(mentions, new Comparator<NamedEntityMention>() { @Override public int compare(NamedEntityMention m1, NamedEntityMention m2) { return m1.getBegin() - m2.getBegin(); } }); // create mentions and add them to entity NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, mentions.size())); int index = 0; for (NamedEntityMention mention : mentions) { mention.setMentionedEntity(entity); entity.setMentions(index, mention); index += 1; } entities.add(entity); } // add singleton entities for any named entities not picked up by coreference system for (NamedEntityMention mention : JCasUtil.select(jCas, NamedEntityMention.class)) { if (mention.getMentionedEntity() == null) { NamedEntity entity = new NamedEntity(jCas); entity.setMentions(new FSArray(jCas, 1)); entity.setMentions(0, mention); mention.setMentionedEntity(entity); entity.getMentions(); entities.add(entity); } } // sort entities by document order Collections.sort(entities, new Comparator<NamedEntity>() { @Override public int compare(NamedEntity o1, NamedEntity o2) { return getFirstBegin(o1) - getFirstBegin(o2); } private int getFirstBegin(NamedEntity entity) { int min = Integer.MAX_VALUE; for (NamedEntityMention mention : JCasUtil.select(entity.getMentions(), NamedEntityMention.class)) { if (mention.getBegin() < min) { min = mention.getBegin(); } } return min; } }); // add entities to document for (NamedEntity entity : entities) { //NamedEntityMention mention=entity.getMentions(3); //System.out.println(mention.getBegin()); entity.addToIndexes(); } }
From source file:edu.csupomona.nlp.util.Sentence2Clause.java
public void process(String text) { // create an empty Annotation just with the given text Annotation document = new Annotation(text); // run all Annotators on this text pipeline.annotate(document);//w ww . j a va 2 s.c om List<CoreMap> sentences = document.get(SentencesAnnotation.class); for (CoreMap sentence : sentences) { // this is the Stanford dependency graph of the current sentence SemanticGraph dependencies = sentence.get(BasicDependenciesAnnotation.class); System.out.println(dependencies.toString("plain")); for (SemanticGraphEdge edge : dependencies.getEdgeSet()) { System.out.println(edge.getRelation().getShortName() + ": " + edge.getGovernor().value() + "(" + edge.getGovernor().index() + ") => " + edge.getDependent().value() + "(" + edge.getDependent().index() + ")"); } } }
From source file:ims.cs.parc.ParcUtils.java
License:Open Source License
/** * Compute cached dependency paths using Floyd Warshall * @param dependencies//w ww .j av a 2 s . c o m * @return */ public static FloydWarshallShortestPaths computeFloydWarshallSGE(List<SemanticGraphEdge> dependencies) { SimpleDirectedGraph<IndexedWord, IndexedEdge> graph = new SimpleDirectedGraph<IndexedWord, IndexedEdge>( IndexedEdge.class); int edgeId = 0; for (SemanticGraphEdge dep : dependencies) { graph.addVertex(dep.getGovernor()); graph.addVertex(dep.getDependent()); graph.addEdge(dep.getGovernor(), dep.getDependent(), new IndexedEdge(dep.getRelation(), edgeId)); } return new FloydWarshallShortestPaths(graph); }
From source file:ims.cs.qsample.features.components.SentenceDependencyFeatures.java
License:Open Source License
/** * Add features about the parent of the token * @param token/*from w w w . ja va 2 s . co m*/ */ private static void addParentFeature(Token token) { SemanticGraphEdge parentEdge = Helper.getDependencyParentRel(token); FeatureSet fs = token.boundaryFeatureSet; if (parentEdge != null) { // plain parent if (StaticConfig.dependencyParentRel) fs.add(PARENT_REL_PREFIX + "=" + parentEdge.getRelation()); // parent and relation label if (StaticConfig.dependencyParentRelHead) fs.add(PARENT_RELHEAD_PREFIX + "=" + parentEdge.getRelation() + "," + parentEdge.getGovernor().lemma()); } }
From source file:it.uniroma2.sag.kelp.input.parser.impl.StanfordParserWrapper.java
License:Apache License
@Override public DependencyGraph parse(String sentenceString) { Annotation document = new Annotation(sentenceString); pipeline.annotate(document);/* w w w.j ava 2s. co m*/ List<CoreMap> sentences = document.get(SentencesAnnotation.class); CoreMap sentence = sentences.get(0); DependencyGraph graph = new DependencyGraph(); graph.setSentence(sentenceString); graph.setParserName("StanfordParser"); graph.setParserVersion("3.6.0"); graph.setNodes(new ArrayList<DGNode>()); int nId = 1; for (CoreLabel token : sentence.get(TokensAnnotation.class)) { DGNode node = new DGNode(); Map<String, Object> nodeProps = new HashMap<String, Object>(); nodeProps.put("surface", token.originalText()); nodeProps.put("lemma", token.lemma()); nodeProps.put("pos", token.tag()); nodeProps.put("start", token.beginPosition()); nodeProps.put("end", token.endPosition()); nodeProps.put("id", nId); nId++; graph.getNodes().add(node); node.setProperties(nodeProps); } SemanticGraph dependencies = null; switch (dependencyType) { case BASIC: dependencies = sentence.get(BasicDependenciesAnnotation.class); break; case COLLAPSED: dependencies = sentence.get(CollapsedDependenciesAnnotation.class); break; case COLLAPSED_CCPROCESSED: dependencies = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class); break; default: dependencies = sentence.get(BasicDependenciesAnnotation.class); break; } dependencies.edgeListSorted(); List<DGRelation> relations = new ArrayList<DGRelation>(); for (IndexedWord node : dependencies.vertexListSorted()) { DGRelation relation = new DGRelation(); relation.setProperties(new HashMap<String, Object>()); DGNode child = graph.getDGNodeById(node.index()); relation.setTarget(child); Collection<IndexedWord> parentsTmp = dependencies.getParents(node); ArrayList<IndexedWord> parents = new ArrayList<IndexedWord>(); for (IndexedWord par : parentsTmp) { SemanticGraphEdge edge = dependencies.getEdge(par, node); DGNode parent = graph.getDGNodeById(edge.getGovernor().index()); if (parent.getProperties().get("id") != child.getProperties().get("id")) parents.add(par); } if (parents.isEmpty()) { relation.getProperties().put("type", "root"); relation.getProperties().put("fromId", new Integer(0)); relation.setSource(null); graph.setRoot(relation); } else { Iterator<IndexedWord> it = parents.iterator(); while (it.hasNext()) { IndexedWord par = it.next(); SemanticGraphEdge edge = dependencies.getEdge(par, node); DGNode parent = graph.getDGNodeById(edge.getGovernor().index()); relation.setSource(parent); relation.getProperties().put("fromId", parent.getProperties().get("id")); relation.getProperties().put("type", edge.getRelation().toString()); } } relations.add(relation); } graph.setRelations(relations); return graph; }
From source file:opendial.bn.values.RelationalVal.java
License:Open Source License
public void addGraph(SemanticGraph newGraph) { int oldGraphSize = graph.size(); for (IndexedWord iw : newGraph.vertexListSorted()) { IndexedWord copy = new IndexedWord(iw); copy.setIndex(graph.size());/*from w w w . j ava2 s . c o m*/ graph.addVertex(copy); } for (SemanticGraphEdge edge : newGraph.edgeListSorted()) { int dep = edge.getDependent().index() + oldGraphSize; int gov = edge.getGovernor().index() + oldGraphSize; GrammaticalRelation rel = edge.getRelation(); addEdge(gov, dep, rel.getLongName()); } cachedHashCode = 0; }
From source file:org.nlp2rdf.implementation.stanfordcorenlp.StanfordWrapper.java
License:Apache License
public void process(Individual context, OntModel inputModel, OntModel outputModel, NIFParameters nifParameters) {//from ww w . j a va2 s . c om String contextString = context .getPropertyValue(NIFDatatypeProperties.isString.getDatatypeProperty(inputModel)).asLiteral() .getString(); String prefix = nifParameters.getPrefix(); URIScheme urischeme = nifParameters.getUriScheme(); Annotator pipeline = buildAnnotator(nifParameters); // create an empty Annotation just with the given text Annotation document = new Annotation(contextString); // run all Annotators on this text pipeline.annotate(document); // these are all the sentences in this document // a CoreMap is essentially a Map that uses class objects as keys and has values with custom types List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class); //get all the sentences and words and read it in an intermediate structure //NOTE: this can be greatly optimized of course // for now it is just simple and cheap to implement it like this int wordCount = 0; TreeMap<Span, List<Span>> tokenizedText = new TreeMap<Span, List<Span>>(); for (CoreMap sentence : sentences) { Span sentenceSpan = new Span(sentence.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), sentence.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); List<Span> wordSpans = new ArrayList<Span>(); for (CoreLabel coreLabel : sentence.get(CoreAnnotations.TokensAnnotation.class)) { wordSpans.add(new Span(coreLabel.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), coreLabel.get(CoreAnnotations.CharacterOffsetEndAnnotation.class))); wordCount++; } tokenizedText.put(sentenceSpan, wordSpans); } /** * Basic Model Setup **/ //get parameters for the URIGenerator Text2RDF text2RDF = new Text2RDF(); text2RDF.generateNIFModel(prefix, context, urischeme, outputModel, tokenizedText); outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "Finished creating " + tokenizedText.size() + " sentence(s) with " + wordCount + " word(s) ", RLOGIndividuals.DEBUG, this.getClass().getCanonicalName(), null, null)); // text2RDF.addNextAndPreviousProperties(prefix,urischeme,model); // traversing the words in the current sentence // a CoreLabel is a CoreMap with additional token-specific methods for (CoreMap sentence : sentences) { for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) { Span wordSpan = new Span(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); //the word should exist already Individual wordIndividual = outputModel .getIndividual(urischeme.generate(prefix, contextString, wordSpan)); if (wordIndividual == null) { log.error("SKIPPING: word was not found in the model: " + urischeme.generate(prefix, contextString, wordSpan)); continue; } /******************************** * Lemma ******/ if (token.get(CoreAnnotations.LemmaAnnotation.class) != null) { wordIndividual.addProperty(NIFDatatypeProperties.lemma.getDatatypeProperty(outputModel), token.get(CoreAnnotations.LemmaAnnotation.class), XSDDatatype.XSDstring); } /******************************** * POS tag ******/ outputModel.setNsPrefix("olia", "http://purl.org/olia/olia.owl#"); // this is the POS tag of the token String posTag = token.get(CoreAnnotations.PartOfSpeechAnnotation.class); List<String> oliaIndividual = (List<String>) Penn.hasTag.get(posTag); if (oliaIndividual != null) { for (String s : oliaIndividual) { wordIndividual.addProperty(NIFObjectProperties.oliaLink.getObjectProperty(outputModel), outputModel.createIndividual(s, OWL.Thing)); List<String> pennlinks = (List<String>) Penn.links.get(s); if (pennlinks != null) { for (String oc : pennlinks) { wordIndividual.addProperty( NIFAnnotationProperties.oliaCategory.getAnnotationProperty(outputModel), outputModel.createClass(oc)); } } else { outputModel.add( RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + s, RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null)); } } } else { outputModel.add( RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), "missing oliaLinks for " + posTag, RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null)); } } SemanticGraph dependencies = sentence .get(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class); if (dependencies != null) { //time to add the prefix StanfordSimple.addStanfordSimplePrefix(outputModel); // create relation annotations for each Stanford dependency for (SemanticGraphEdge stanfordEdge : dependencies.edgeIterable()) { Span govSpan = new Span( stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), stanfordEdge.getGovernor().get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); Span depSpan = new Span( stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetBeginAnnotation.class), stanfordEdge.getDependent().get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); //String relationType = stanfordEdge.getRelation().toString(); String[] edgeURIs = StanfordSimple.getURIforEdgeLabel(stanfordEdge.getRelation().toString()); //ObjectProperty relation = model.createObjectProperty(new CStringInst().generate(prefix, contextString, new Span[]{})); ObjectProperty relation = null; switch (edgeURIs.length) { case 1: relation = outputModel.createObjectProperty(edgeURIs[0]); break; case 2: relation = outputModel.createObjectProperty(edgeURIs[0]); relation.addSubProperty(outputModel.createObjectProperty(edgeURIs[1])); break; default: String message = "Empty edge label, no URI written: " + edgeURIs; outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message, RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null)); continue; } Individual gov = text2RDF.createCStringIndividual(prefix, context, govSpan, urischeme, outputModel); Individual dep = text2RDF.createCStringIndividual(prefix, context, depSpan, urischeme, outputModel); gov.addProperty(relation, dep); relation.addSuperProperty(NIFObjectProperties.inter.getObjectProperty(outputModel)); relation.addSuperProperty(NIFObjectProperties.dependency.getObjectProperty(outputModel)); if (gov == null || dep == null) { String message = "SKIPPING Either gov or dep was null for the dependencies\n" + "gov: " + gov + "\ndep: " + dep; outputModel.add(RLOGSLF4JBinding.log(nifParameters.getLogPrefix(), message, RLOGIndividuals.ERROR, this.getClass().getCanonicalName(), null, null)); continue; } // List<String> oliaIndividual = (List<String>) Stanford.hasTag.get(stanfordEdge.getRelation().getShortName()); /** for (String s : oliaIndividual) { relation.addProperty(NIFAnnotationProperties.oliaPropLink.getAnnotationProperty(model), model.createIndividual(s, OWL.Thing)); for (String oc : (List<String>) Stanford.links.get(s)) { relation.addProperty(NIFAnnotationProperties.oliaCategory.getAnnotationProperty(model), oc); } if (((List<String>) Stanford.links.get(s)).isEmpty()) { log.error("missing links for: " + s); } } **/ /* Individual relation = null;//dependency.getOLiAIndividualForTag(relationType); //in an ideal world, all used tags should also be in OLiA, this tends to be null sometimes if (relation == null) { log.error("reltype was null for: " + relationType); continue; } ObjectProperty dependencyRelation = model.createObjectProperty(relation.getURI()); //add the property from governer to dependent gov.addProperty(dependencyRelation, dep); Set<String> classUris = dependency.getClassURIsForTag(relationType); for (String cl : classUris) { if (!cl.startsWith("http://purl.org/olia/stanford.owl")) { continue; } //add the property from governer to dependent ObjectProperty nn = model.createObjectProperty(cl); gov.addProperty(nn, dep); dependencyRelation.addSuperProperty(nn); //copy and transform the hierarchy //removed for 2.0 //OLiAOntology.classHierarchy2PropertyHierarchy(dependency.getHierarchy(cl), model, "http://purl.org/olia/stanford.owl"); } }*/ } } //end sentences /************** * Syntax Tree * */ //Tree tree = sentence.get(TreeAnnotation.class); //if (tree != null) { //removed for 2.0 //processTree(tree, urigenerator, prefix, text, model); //} } }
From source file:org.sam_agent.csparser.ContinuousParser.java
License:Open Source License
public String stringify(SemanticGraph dependencies) { List<String> depsList = new ArrayList<String>(); for (SemanticGraphEdge eit : dependencies.edgeIterable()) { String rel = eit.getRelation().toString(); IndexedWord gov = eit.getGovernor(), dep = eit.getDependent(); String arg0 = gov.word().toString() + "-" + gov.index(); String arg1 = dep.word().toString() + "-" + dep.index(); depsList.add(String.format("{\"rel\":\"%s\",\"arg0\":\"%s\",\"arg1\":\"%s\"}", rel, arg0, arg1)); }/*from w ww.j a v a2 s . c o m*/ return String.format("\"dependencies\":[%s]", String.join(",", depsList)); }