Example usage for edu.stanford.nlp.ling CoreLabel word

List of usage examples for edu.stanford.nlp.ling CoreLabel word

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling CoreLabel word.

Prototype

@Override
public String word() 

Source Link

Usage

From source file:NERServer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.err.println("usage: java NERServer modelpath");
        System.exit(1);/*  ww  w.j av a2 s  . c  o  m*/
    }

    CRFClassifier crf = CRFClassifier.getClassifier(args[0]);
    BufferedReader input = new BufferedReader(new InputStreamReader(System.in), 1);

    for (;;) {
        String ln = input.readLine();
        if (ln == null) {
            break;
        }

        List<List<CoreLabel>> out = crf.classify(ln);
        for (List<CoreLabel> sentence : out) {
            for (CoreLabel word : sentence) {
                String label = word.get(CoreAnnotations.AnswerAnnotation.class);
                System.out.print(word.word() + '/' + label + ' ');
            }
        }
        System.out.print('\n');
    }
}

From source file:ca.ualberta.exemplar.core.ParserMalt.java

License:Open Source License

private String[] sentenceToCoNLLInput(List<CoreLabel> tokens) {
    List<String> conllList = new ArrayList<String>(100);

    int num = 1;//from  ww w  .ja  va2s  .com
    for (CoreLabel token : tokens) {

        String word = token.word();
        String lemmaA = token.lemma();
        String lemma = lemmaA != null && lemmaA.length() > 0 ? lemmaA : "_";
        String posA = token.get(PartOfSpeechAnnotation.class);
        String pos = posA != null && posA.length() > 0 ? posA : "_";

        conllList.add(num + "\t" + word + "\t" + lemma + "\t" + pos + "\t" + pos + "\t" + "_");

        num++;
    }

    String[] conll = new String[conllList.size()];
    conll = conllList.toArray(conll);
    return conll;
}

From source file:com.asimihsan.handytrowel.nlp.StopwordAnnotator.java

License:Open Source License

@Override
public void annotate(Annotation annotation) {
    if (stopwords != null && stopwords.size() > 0 && annotation.containsKey(TokensAnnotation.class)) {
        List<CoreLabel> tokens = annotation.get(TokensAnnotation.class);
        for (CoreLabel token : tokens) {
            boolean isWordStopword = stopwords.contains(token.word().toLowerCase());
            boolean isLemmaStopword = checkLemma ? stopwords.contains(token.lemma().toLowerCase()) : false;
            Pair<Boolean, Boolean> pair = Pair.makePair(isWordStopword, isLemmaStopword);
            token.set(StopwordAnnotator.class, pair);
        }/*  w  w  w . j  av a  2  s . c om*/
    }
}

From source file:com.asimihsan.handytrowel.nlp.TextAnalyzer.java

License:Open Source License

public TextAnalyzer analyze() {
    // Stanford CoreNLP, avoid lemmatization as it's very slow to use Porter2 stemming
    // instead. (Porter -> Snowball (Porter2) -> Lancaster is order of stemming
    // aggressiveness.
    ///* w  ww . j  a va  2s.co m*/
    // other ideas
    // - remove top 10k most common english words
    Properties props = new Properties();
    props.put("annotators", "tokenize, ssplit, stopword");
    props.setProperty("customAnnotatorClass.stopword", "com.asimihsan.handytrowel.nlp.StopwordAnnotator");
    List<String> stopWords = null;
    try {
        stopWords = WordReader.wordReaderWithResourcePath("/nlp/top1000words.txt").getWords();
    } catch (IOException e) {
        e.printStackTrace();
        return this;
    }
    String customStopWordList = Joiner.on(",").join(stopWords);
    props.setProperty(StopwordAnnotator.STOPWORDS_LIST, customStopWordList);
    StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
    Annotation document = new Annotation(body);
    pipeline.annotate(document);
    List<CoreLabel> inputTokens = document.get(CoreAnnotations.TokensAnnotation.class);
    SnowballStemmer stemmer = new englishStemmer();
    for (CoreLabel token : inputTokens) {
        Pair<Boolean, Boolean> stopword = token.get(StopwordAnnotator.class);
        if (stopword.first())
            continue;
        String word = token.word().toLowerCase();

        //!!AI TODO this sucks, should make another annotator and make it optional etc.
        //also we're matching full stops! so we lose sentence information.
        if (punctuation.matcher(word).matches())
            continue;

        //!AI TODO again this would be its own annotator and optional
        word = number.matcher(word).replaceAll("NUMBER");

        stemmer.setCurrent(word);
        stemmer.stem();
        word = stemmer.getCurrent();
        tokens.add(word);
    }
    return this;
}

From source file:com.epictodo.controller.nlp.SentenceAnalysis.java

License:Open Source License

/**
 * This method identify and extract NER entities such as Name, Person, Date, Time, Organization, Location
 *
 * @param _sentence// w w  w  . j  a  va  2s .c  o m
 * @return _results
 */
public LinkedHashMap<String, LinkedHashSet<String>> nerEntitiesExtractor(String _sentence) {
    LinkedHashMap<String, LinkedHashSet<String>> _results = new <String, LinkedHashSet<String>>LinkedHashMap();
    CRFClassifier<CoreLabel> _classifier = load_engine.CLASSIFIER; //CRFClassifier.getClassifierNoExceptions(CLASSIFIER_MODEL);
    List<List<CoreLabel>> _classify = _classifier.classify(_sentence);

    for (List<CoreLabel> _tokens : _classify) {
        for (CoreLabel _token : _tokens) {
            String _word = _token.word();
            String _category = _token.get(CoreAnnotations.AnswerAnnotation.class);

            if (!"O".equals(_category)) {
                if (_results.containsKey(_category)) {
                    // Key already exists, insert to LinkedHashMap
                    _results.get(_category).add(_word);
                } else {
                    LinkedHashSet<String> _temp = new LinkedHashSet<>();
                    _temp.add(_word);
                    _results.put(_category, _temp);
                }
            }
        }
    }

    return _results;
}

From source file:com.github.kutschkem.Qgen.annotators.OvergeneratorPermutation.java

License:Open Source License

private String joinWithWhiteSpaces(List<CoreLabel> labels) {
    StringBuilder buf = new StringBuilder();

    for (CoreLabel l : labels) {
        buf.append(' ');
        buf.append(l.word());
    }/*from  w w w .ja v  a2  s  . c o  m*/

    String result = buf.toString().trim().replaceAll("\\s+", " ");
    result = result.replaceAll("\\s(?=\\p{Punct})", ""); // remove leading whitespaces of punctuation
    result = result.replaceFirst("^.", result.substring(0, 1).toUpperCase()); //First letter uppercase

    return result;
}

From source file:com.graphbrain.eco.StanfordLemmatizer.java

License:Open Source License

public List<String> lemmatize(String documentText, int returnType) {

    List<String> words = new LinkedList<>();
    List<String> lemmas = new LinkedList<>();

    // create an empty Annotation just with the given text
    Annotation document = new Annotation(documentText);

    // run all Annotators on this text
    this.pipeline.annotate(document);

    // Iterate over all of the sentences found
    List<CoreMap> sentences = document.get(SentencesAnnotation.class);
    for (CoreMap sentence : sentences) {
        // Iterate over all tokens in a sentence
        for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
            // Retrieve and add the lemma for each word into the
            // list of lemmas
            words.add(token.word());
            lemmas.add(token.lemma());/*ww  w  .  ja  v a 2s .co  m*/
            //                lemmas.add(token.get(LemmaAnnotation.class));
        }
    }
    if (returnType == 0) {
        return lemmas;
    } else {
        return words;
    }

}

From source file:com.panot.JavaCoref.MyMUCMentionExtractor.java

License:Open Source License

@Override
public Document nextDoc() throws Exception {
    List<List<CoreLabel>> allWords = new ArrayList<List<CoreLabel>>();
    List<Tree> allTrees = new ArrayList<Tree>();
    List<List<Mention>> allGoldMentions = new ArrayList<List<Mention>>();
    List<List<Mention>> allPredictedMentions;
    List<CoreMap> allSentences = new ArrayList<CoreMap>();
    Annotation docAnno = new Annotation("");

    Pattern docPattern = Pattern.compile("<DOC>(.*?)</DOC>", Pattern.DOTALL + Pattern.CASE_INSENSITIVE);
    Pattern sentencePattern = Pattern.compile("(<s>|<hl>|<dd>|<DATELINE>)(.*?)(</s>|</hl>|</dd>|</DATELINE>)",
            Pattern.DOTALL + Pattern.CASE_INSENSITIVE);
    Matcher docMatcher = docPattern.matcher(fileContents);
    if (!docMatcher.find(currentOffset))
        return null;

    currentOffset = docMatcher.end();/*w  w  w. j  a  v a2 s.co  m*/
    String doc = docMatcher.group(1);
    Matcher sentenceMatcher = sentencePattern.matcher(doc);
    String ner = null;

    //Maintain current document ID.
    Pattern docIDPattern = Pattern.compile("<DOCNO>(.*?)</DOCNO>", Pattern.DOTALL + Pattern.CASE_INSENSITIVE);
    Matcher docIDMatcher = docIDPattern.matcher(doc);
    if (docIDMatcher.find())
        currentDocumentID = docIDMatcher.group(1);
    else
        currentDocumentID = "documentAfter " + currentDocumentID;

    while (sentenceMatcher.find()) {
        String sentenceString = sentenceMatcher.group(2);
        List<CoreLabel> words = tokenizerFactory.getTokenizer(new StringReader(sentenceString), "invertible")
                .tokenize();

        // FIXING TOKENIZATION PROBLEMS
        for (int i = 0; i < words.size(); i++) {
            CoreLabel w = words.get(i);
            if (i > 0 && w.word().equals("$")) {
                if (!words.get(i - 1).word().endsWith("PRP") && !words.get(i - 1).word().endsWith("WP"))
                    continue;
                words.get(i - 1).set(CoreAnnotations.TextAnnotation.class, words.get(i - 1).word() + "$");
                words.remove(i);
                i--;
            } else if (w.word().equals("\\/")) {
                if (words.get(i - 1).word().equals("</COREF>"))
                    continue;
                w.set(CoreAnnotations.TextAnnotation.class,
                        words.get(i - 1).word() + "\\/" + words.get(i + 1).word());
                words.remove(i + 1);
                words.remove(i - 1);
            }
        }
        // END FIXING TOKENIZATION PROBLEMS

        List<CoreLabel> sentence = new ArrayList<CoreLabel>();
        // MUC accepts embedded coref mentions, so we need to keep a stack for the mentions currently open
        Stack<Mention> stack = new Stack<Mention>();
        List<Mention> mentions = new ArrayList<Mention>();

        allWords.add(sentence);
        allGoldMentions.add(mentions);

        for (CoreLabel word : words) {
            String w = word.get(CoreAnnotations.TextAnnotation.class);
            // found regular token: WORD/POS
            if (!w.startsWith("<") && w.contains("\\/") && w.lastIndexOf("\\/") != w.length() - 2) {
                int i = w.lastIndexOf("\\/");
                String w1 = w.substring(0, i);
                // we do NOT set POS info here. We take the POS tags from the parser!
                word.set(CoreAnnotations.TextAnnotation.class, w1);
                word.remove(CoreAnnotations.OriginalTextAnnotation.class);
                if (Constants.USE_GOLD_NE) {
                    if (ner != null) {
                        word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
                    } else {
                        word.set(CoreAnnotations.NamedEntityTagAnnotation.class, "O");
                    }
                }
                sentence.add(word);
            }
            // found the start SGML tag for a NE, e.g., "<ORGANIZATION>"
            else if (w.startsWith("<") && !w.startsWith("<COREF") && !w.startsWith("</")) {
                Pattern nerPattern = Pattern.compile("<(.*?)>");
                Matcher m = nerPattern.matcher(w);
                m.find();
                ner = m.group(1);
            }
            // found the end SGML tag for a NE, e.g., "</ORGANIZATION>"
            else if (w.startsWith("</") && !w.startsWith("</COREF")) {
                Pattern nerPattern = Pattern.compile("</(.*?)>");
                Matcher m = nerPattern.matcher(w);
                m.find();
                String ner1 = m.group(1);
                if (ner != null && !ner.equals(ner1))
                    throw new RuntimeException("Unmatched NE labels in MUC file: " + ner + " v. " + ner1);
                ner = null;
            }
            // found the start SGML tag for a coref mention
            else if (w.startsWith("<COREF")) {
                Mention mention = new Mention();
                // position of this mention in the sentence
                mention.startIndex = sentence.size();

                // extract GOLD info about this coref chain. needed for eval
                Pattern idPattern = Pattern.compile("ID=\"(.*?)\"");
                Pattern refPattern = Pattern.compile("REF=\"(.*?)\"");

                Matcher m = idPattern.matcher(w);
                m.find();
                mention.mentionID = Integer.valueOf(m.group(1));

                m = refPattern.matcher(w);
                if (m.find()) {
                    mention.originalRef = Integer.valueOf(m.group(1));
                }

                // open mention. keep track of all open mentions using the stack
                stack.push(mention);
            }
            // found the end SGML tag for a coref mention
            else if (w.equals("</COREF>")) {
                Mention mention = stack.pop();
                mention.endIndex = sentence.size();

                // this is a closed mention. add it to the final list of mentions
                // System.err.printf("Found MENTION: ID=%d, REF=%d\n", mention.mentionID, mention.originalRef);
                mentions.add(mention);
            } else {
                word.remove(CoreAnnotations.OriginalTextAnnotation.class);
                if (Constants.USE_GOLD_NE) {
                    if (ner != null) {
                        word.set(CoreAnnotations.NamedEntityTagAnnotation.class, ner);
                    } else {
                        word.set(CoreAnnotations.NamedEntityTagAnnotation.class, "O");
                    }
                }
                sentence.add(word);
            }
        }
        StringBuilder textContent = new StringBuilder();
        for (int i = 0; i < sentence.size(); i++) {
            CoreLabel w = sentence.get(i);
            w.set(CoreAnnotations.IndexAnnotation.class, i + 1);
            w.set(CoreAnnotations.UtteranceAnnotation.class, 0);
            if (i > 0)
                textContent.append(" ");
            textContent.append(w.getString(CoreAnnotations.TextAnnotation.class));
        }
        CoreMap sentCoreMap = new Annotation(textContent.toString());
        allSentences.add(sentCoreMap);
        sentCoreMap.set(CoreAnnotations.TokensAnnotation.class, sentence);
    }

    // assign goldCorefClusterID
    Map<Integer, Mention> idMention = Generics.newHashMap(); // temporary use
    for (List<Mention> goldMentions : allGoldMentions) {
        for (Mention m : goldMentions) {
            idMention.put(m.mentionID, m);
        }
    }
    for (List<Mention> goldMentions : allGoldMentions) {
        for (Mention m : goldMentions) {
            if (m.goldCorefClusterID == -1) {
                if (m.originalRef == -1)
                    m.goldCorefClusterID = m.mentionID;
                else {
                    int ref = m.originalRef;
                    while (true) {
                        Mention m2 = idMention.get(ref);
                        if (m2.goldCorefClusterID != -1) {
                            m.goldCorefClusterID = m2.goldCorefClusterID;
                            break;
                        } else if (m2.originalRef == -1) {
                            m2.goldCorefClusterID = m2.mentionID;
                            m.goldCorefClusterID = m2.goldCorefClusterID;
                            break;
                        } else {
                            ref = m2.originalRef;
                        }
                    }
                }
            }
        }
    }

    docAnno.set(CoreAnnotations.SentencesAnnotation.class, allSentences);
    stanfordProcessor.annotate(docAnno);

    if (allSentences.size() != allWords.size())
        throw new IllegalStateException("allSentences != allWords");
    for (int i = 0; i < allSentences.size(); i++) {
        List<CoreLabel> annotatedSent = allSentences.get(i).get(CoreAnnotations.TokensAnnotation.class);
        List<CoreLabel> unannotatedSent = allWords.get(i);
        List<Mention> mentionInSent = allGoldMentions.get(i);
        for (Mention m : mentionInSent) {
            m.dependency = allSentences.get(i)
                    .get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
        }
        if (annotatedSent.size() != unannotatedSent.size()) {
            throw new IllegalStateException("annotatedSent != unannotatedSent");
        }
        for (int j = 0, sz = annotatedSent.size(); j < sz; j++) {
            CoreLabel annotatedWord = annotatedSent.get(j);
            CoreLabel unannotatedWord = unannotatedSent.get(j);
            if (!annotatedWord.get(CoreAnnotations.TextAnnotation.class)
                    .equals(unannotatedWord.get(CoreAnnotations.TextAnnotation.class))) {
                throw new IllegalStateException("annotatedWord != unannotatedWord");
            }
        }
        allWords.set(i, annotatedSent);
        allTrees.add(allSentences.get(i).get(TreeCoreAnnotations.TreeAnnotation.class));
    }

    // term things

    List<List<Mention>> termMentions = new ArrayList<List<Mention>>();

    if (use_term) {
        String dataCrf = "";
        System.err.print("FEAT TYPE: ");
        System.err
                .println(props.getProperty(MyConstants.TTE_FEATURE_GENERATOR, MyConstants.TTE_FEATURE_CORENLP));
        if (props.getProperty(MyConstants.TTE_FEATURE_GENERATOR, MyConstants.TTE_FEATURE_CORENLP)
                .equals(MyConstants.TTE_FEATURE_NLTK)) {
            dataCrf = NltkCrfFormatter.annotationToCrfString(docAnno);
        } else {
            dataCrf = CrfFormatter.annotationToCrfString(docAnno);
        }
        List<List<String>> tagResult = new ArrayList<List<String>>();

        try {
            tagResult = CrfsuiteCaller.tag(dataCrf, props.getProperty(MyConstants.TTE_MODEL));

            if (props.containsKey(MyConstants.TTE_SAVE_CRF_DATA)) {
                String crfDataFilename = props.getProperty(MyConstants.TTE_SAVE_CRF_DATA);

                File crfDataFile = new File(crfDataFilename);
                BufferedWriter bw = new BufferedWriter(new FileWriter(crfDataFile));
                bw.write(dataCrf);
                bw.close();
            }

        } catch (Exception e) {
            System.err.println("Crfsuite tag failed");
        }

        termAsMentionFinder.setTags(tagResult);
        termMentions = termAsMentionFinder.extractPredictedMentions(docAnno, maxID, dictionaries);

        maxID = termAsMentionFinder.getMaxID();
    }

    // extract predicted mentions

    allPredictedMentions = mentionFinder.extractPredictedMentions(docAnno, maxID, dictionaries);

    if (use_term && props.containsKey(MyConstants.TTE_KEEP_PRON)) {
        termMentions = injectPronoun(termMentions, allPredictedMentions);
    }

    if (experimentType != null) {
        if (experimentType.equals(MyConstants.EXP_TYPE_03_UNION)) {
            List<List<Mention>> usingMentions = unionMentions(allPredictedMentions, allGoldMentions);
            allPredictedMentions = usingMentions;
        } else if (experimentType.equals(MyConstants.EXP_TYPE_03_INTERSECT)) {
            List<List<Mention>> usingMentions = intersectMentions(allPredictedMentions, allGoldMentions);
            allPredictedMentions = usingMentions;
        } else if (use_term && experimentType.equals(MyConstants.EXP_TYPE_04_CHECK)) {
            allPredictedMentions = termMentions;
        } else if (use_term && experimentType.equals(MyConstants.EXP_TYPE_04_SUPER)) {
            List<List<Mention>> usingMentions = superstringMentions(termMentions, allPredictedMentions);
            allPredictedMentions = usingMentions;
        } else if (use_term && experimentType.equals(MyConstants.EXP_TYPE_04_OVERLAP)) {
            List<List<Mention>> usingMentions = overlapMentions(termMentions, allPredictedMentions);
            allPredictedMentions = usingMentions;
        } else if (use_term && experimentType.equals(MyConstants.EXP_TYPE_04_UNION)) {
            List<List<Mention>> usingMentions = unionMentions(termMentions, allPredictedMentions);
            allPredictedMentions = usingMentions;
        } else if (use_term && experimentType.equals(MyConstants.EXP_TYPE_05_SUPER)) {
            List<List<Mention>> usingMentions = superstringMentions(termMentions, allGoldMentions);
            allPredictedMentions = usingMentions;
        } else if (use_term && experimentType.equals(MyConstants.EXP_TYPE_05_OVERLAP)) {
            List<List<Mention>> usingMentions = overlapMentions(termMentions, allGoldMentions);
            allPredictedMentions = usingMentions;
        } else {
            System.err.println(experimentType);
            System.err.println("Unknown experiment type. Using mention detector.");
        }
    } else if (useGoldMention) {
        allPredictedMentions = allGoldMentions;
    }

    // add the relevant fields to mentions and order them for coref
    return arrange(docAnno, allWords, allTrees, allPredictedMentions, allGoldMentions, true);
}

From source file:conditionalCFG.ConditionalCFGParser.java

License:Open Source License

private CoreLabel getCoreLabel(int labelIndex) {
    if (originalCoreLabels[labelIndex] != null) {
        CoreLabel terminalLabel = originalCoreLabels[labelIndex];
        if (terminalLabel.value() == null && terminalLabel.word() != null) {
            terminalLabel.setValue(terminalLabel.word());
        }/*from w  w w. j  a  v a  2s  . c om*/
        return terminalLabel;
    }

    String wordStr = wordIndex.get(words[labelIndex]);
    CoreLabel terminalLabel = new CoreLabel();
    terminalLabel.setValue(wordStr);
    terminalLabel.setWord(wordStr);
    terminalLabel.setBeginPosition(beginOffsets[labelIndex]);
    terminalLabel.setEndPosition(endOffsets[labelIndex]);
    if (originalTags[labelIndex] != null) {
        terminalLabel.setTag(originalTags[labelIndex].tag());
    }
    return terminalLabel;
}

From source file:coreferenceresolver.util.StanfordUtil.java

public void init(boolean simpleInit) throws FileNotFoundException, IOException {
    String outPosFilePath = "./input.txt.pos";
    FileWriter fw = new FileWriter(new File(outPosFilePath));
    BufferedWriter bw = new BufferedWriter(fw);
    props = new Properties();
    if (simpleInit) {
        props.put("annotators", "tokenize, ssplit, pos, parse");
    } else {/*  w w  w  . j  av  a2 s .co m*/
        props.put("annotators", "tokenize, ssplit, pos, parse, sentiment");
    }
    pipeline = new StanfordCoreNLP(props);

    reviews = new ArrayList<>();

    FileReader fileReader = new FileReader(documentFile);
    BufferedReader bufferedReader = new BufferedReader(fileReader);

    String reviewLine;
    int reviewId = 0;
    int sentenceId;
    //read input file line by line and count the number sentences of each lines
    while ((reviewLine = bufferedReader.readLine()) != null) {
        sentenceId = 0;
        Review newReview = new Review();

        //Add to reviews list
        newReview.setRawContent(reviewLine);

        // create an empty Annotation just with the given text
        document = new Annotation(reviewLine);

        // run all Annotators on this text
        pipeline.annotate(document);
        List<CoreMap> sentences = document.get(SentencesAnnotation.class);

        //Begin extracting from paragraphs
        for (CoreMap sentence : sentences) {
            int sentenceOffsetBegin = sentence.get(CharacterOffsetBeginAnnotation.class);
            int sentenceOffsetEnd = sentence.get(CharacterOffsetEndAnnotation.class);
            Sentence newSentence = new Sentence();
            newSentence.setReviewId(reviewId);
            newSentence.setRawContent(sentence.toString());
            newSentence.setOffsetBegin(sentenceOffsetBegin);
            newSentence.setOffsetEnd(sentenceOffsetEnd);

            if (!simpleInit) {
                int sentimentLevel = RNNCoreAnnotations
                        .getPredictedClass(sentence.get(SentimentCoreAnnotations.SentimentAnnotatedTree.class));
                newSentence.setSentimentLevel(sentimentLevel);

                //Dependency Parsing
                SemanticGraph collCCDeps = sentence.get(CollapsedCCProcessedDependenciesAnnotation.class);
                Collection<TypedDependency> typedDeps = collCCDeps.typedDependencies();
                newSentence.setDependencies(typedDeps);
            }

            List<Tree> sentenceTreeLeaves = sentence.get(TreeCoreAnnotations.TreeAnnotation.class).getLeaves();

            int i = 0;
            for (CoreLabel token : sentence.get(TokensAnnotation.class)) {
                Token newToken = new Token();

                Tree tokenTree = sentenceTreeLeaves.get(i);
                newToken.setTokenTree(tokenTree);

                String word = token.get(TextAnnotation.class);
                newToken.setWord(word);

                String pos = token.get(PartOfSpeechAnnotation.class);
                newToken.setPOS(pos);

                int offsetBegin = token.get(CharacterOffsetBeginAnnotation.class);
                newToken.setOffsetBegin(offsetBegin);

                int offsetEnd = token.get(CharacterOffsetEndAnnotation.class);
                newToken.setOffsetEnd(offsetEnd);

                if (!simpleInit) {
                    //Check NP relative clause
                    Tree twoLevelsAncestor = tokenTree.ancestor(2,
                            sentence.get(TreeCoreAnnotations.TreeAnnotation.class));
                    if (twoLevelsAncestor.value().equals("WHNP") && !word.toLowerCase().equals("who")
                            && !word.toLowerCase().equals("what")) {
                        newToken.setRelativePronoun(true);
                    }

                    //Calculate sentiment for this token
                    int newTokenSentiment = Util.retrieveOriginalSentiment(newToken.getWord());
                    newToken.setSentimentOrientation(newTokenSentiment, newSentence.getDependencies());
                }

                newSentence.addToken(newToken);
                bw.write(token.word() + "/" + token.tag() + " ");
                ++i;
            }
            bw.newLine();

            if (!simpleInit) {

                //Check if this sentence contains a comparative indicator. 
                //If yes, it is a comparative sentence. Identify which NP is superior or inferior in this sentence
                List<Token> comparativeTokens = FeatureExtractor.findComparativeIndicator(newSentence, null,
                        null);
                //TODO
                //Check special comparative samples
                if (!comparativeTokens.isEmpty()) {
                    newSentence.initComparatives(comparativeTokens);
                }
            }

            newReview.addSentence(newSentence);

            ++sentenceId;
        }

        bw.write("./.");
        bw.newLine();

        reviews.add(newReview);
        ++reviewId;
    }
    bw.close();
}