Example usage for edu.stanford.nlp.ling.tokensregex MatchedExpression getTokenOffsets

List of usage examples for edu.stanford.nlp.ling.tokensregex MatchedExpression getTokenOffsets

Introduction

In this page you can find the example usage for edu.stanford.nlp.ling.tokensregex MatchedExpression getTokenOffsets.

Prototype

public Interval<Integer> getTokenOffsets() 

Source Link

Usage

From source file:edu.cmu.ml.rtw.users.ssrivastava.RegexExtractor.java

@Override
public List<Triple<TokenSpan, String, Double>> annotate(DocumentNLP document) {

    List<Triple<TokenSpan, String, Double>> annotationList = new ArrayList<Triple<TokenSpan, String, Double>>();
    for (int sentIdx = 0; sentIdx < document.getSentenceCount(); sentIdx++) {

        CoreMap sentence = getStanfordSentence(document, sentIdx);
        if (verbose) {
            printSentence(sentence);/*from  w  w w. j a  v  a  2s  .c o m*/
        }

        if (!isGoodToProcess(sentence)) {
            continue;
        }

        //System.out.println("Extracting patterns from sentence with index "+sentIdx);
        List<MatchedExpression> matchedExpressions = extractor.extractExpressions(sentence);
        if (matchedExpressions.size() > 0) {
            if (verbose) {
                System.out.println(matchedExpressions.size());
            }
            for (MatchedExpression expr : matchedExpressions) {
                //Interval<Integer> interv = expr.getCharOffsets();
                Interval<Integer> interv = expr.getTokenOffsets();
                TokenSpan ts = new TokenSpan(document, sentIdx, interv.first(), interv.second() - 1);
                String result = expr.getValue().toString().substring(7,
                        expr.getValue().toString().length() - 1);
                if (verbose) {
                    System.out.println("[ docId:" + ts.getDocument().getName() + " first:" + interv.first()
                            + " end:" + interv.second() + " result:" + result + " ]");
                }
                annotationList.add(new Triple<TokenSpan, String, Double>(ts, result, 0.8));
            }
        }
    }

    return annotationList;
}