List of usage examples for edu.stanford.nlp.util ArrayCoreMap ArrayCoreMap
public ArrayCoreMap()
From source file:ca.mcgill.cs.crown.AnnotatedLexicalEntryImpl.java
License:Creative Commons License
public AnnotatedLexicalEntryImpl(LexicalEntry entry) { this.entry = entry; operations = new ArrayCoreMap(); }
From source file:ca.mcgill.cs.crown.LexicalEntryImpl.java
License:Creative Commons License
public LexicalEntryImpl(String lemma, String id, POS pos) { this.lemma = lemma; this.id = id; this.pos = pos; this.annotations = new ArrayCoreMap(); }
From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java
public static CoreMap getStanfordSentence(DocumentNLP document, int sentIdx) { List<String> words = document.getSentenceTokenStrs(sentIdx); List<PoSTag> posTags = document.getSentencePoSTags(sentIdx); List<CoreLabel> tokenList = new ArrayList<CoreLabel>(); for (int i = 0; i < words.size(); i++) { /*Re-create Stanford tokens*/ CoreLabel token = new CoreLabel(); token.setWord(words.get(i));// w ww . j ava 2 s .c o m token.setTag(posTags.get(i).toString()); token.setNER("O"); token.setDocID(document.getName()); token.setSentIndex(sentIdx); token.setBeginPosition(document.getToken(sentIdx, i).getCharSpanStart()); token.setEndPosition(document.getToken(sentIdx, i).getCharSpanEnd()); //System.out.println(token.word()+" "+token.beginPosition()+" "+token.endPosition()); tokenList.add(token); } //Add NER labels for sentence List<Pair<TokenSpan, String>> ners = document.getNer(sentIdx); for (Pair<TokenSpan, String> p : ners) { for (int k = p.getFirst().getStartTokenIndex(); k < p.getFirst().getEndTokenIndex(); k++) { tokenList.get(k).setNER(p.getSecond()); } } //Convert to Stanford Sentence CoreMap sentence = new ArrayCoreMap(); sentence.set(TokensAnnotation.class, tokenList); sentence.set(CharacterOffsetBeginAnnotation.class, tokenList.get(0).beginPosition()); sentence.set(CharacterOffsetEndAnnotation.class, tokenList.get(words.size() - 1).endPosition()); return sentence; }
From source file:edu.illinois.cs.cogcomp.pipeline.handlers.StanfordParseHandler.java
License:Open Source License
private static CoreMap buildStanfordSentence(Constituent sentence, String rawText, int sentIndex, List<CoreLabel> stanfordTokens) { CoreMap stanfordSentence = new ArrayCoreMap(); CoreLabel firstTok = stanfordTokens.get(0); CoreLabel lastTok = stanfordTokens.get(stanfordTokens.size() - 1); stanfordSentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, sentence.getStartSpan()); stanfordSentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, sentence.getEndSpan()); stanfordSentence.set(CoreAnnotations.TokenBeginAnnotation.class, firstTok.index()); stanfordSentence.set(CoreAnnotations.TokenEndAnnotation.class, lastTok.index() + 1); // at-the-end // indexing? stanfordSentence.set(CoreAnnotations.TextAnnotation.class, rawText); stanfordSentence.set(CoreAnnotations.SentenceIndexAnnotation.class, sentIndex); stanfordSentence.set(CoreAnnotations.TokensAnnotation.class, stanfordTokens); return stanfordSentence; }
From source file:edu.jhu.hlt.concrete.stanford.ConcreteToStanfordMapper.java
License:Open Source License
public static List<CoreMap> concreteSectionToCoreMapList(final Section sect, final String commText) { List<CoreMap> toRet = new ArrayList<>(); List<Sentence> sentList = sect.getSentenceList(); int tokOffset = 0; for (int i = 0; i < sentList.size(); i++) { Sentence st = sentList.get(i);//from www . j a va 2 s .c o m CoreMap cm = new ArrayCoreMap(); cm.set(SentenceIndexAnnotation.class, i); final TextSpan sts = st.getTextSpan(); final int sentCharStart = sts.getStart(); final int sentCharEnd = sts.getEnding(); LOGGER.debug("Setting stanford sentence BeginChar = {}", sentCharStart); cm.set(CharacterOffsetBeginAnnotation.class, sentCharStart); LOGGER.debug("Setting stanford sentence EndChar = {}", sentCharEnd); cm.set(CharacterOffsetEndAnnotation.class, sentCharEnd); String sectText = commText.substring(sentCharStart, sentCharEnd); LOGGER.debug("Setting text: {}", sectText); cm.set(TextAnnotation.class, sectText); Tokenization tkz = st.getTokenization(); List<CoreLabel> clList = tokenizationToCoreLabelList(tkz, i, sentCharStart); final int maxIdx = clList.size(); LOGGER.debug("Setting stanford sentence token begin: {}", tokOffset); cm.set(TokenBeginAnnotation.class, tokOffset); final int tokEnd = tokOffset + maxIdx; LOGGER.debug("Setting stanford sentence token end: {}", tokEnd); cm.set(TokenEndAnnotation.class, tokEnd); cm.set(TokensAnnotation.class, clList); tokOffset = tokEnd; toRet.add(cm); } return toRet; }