Example usage for org.apache.lucene.search.highlight Scorer startFragment

List of usage examples for org.apache.lucene.search.highlight Scorer startFragment

Introduction

In this page you can find the example usage for org.apache.lucene.search.highlight Scorer startFragment.

Prototype

public void startFragment(TextFragment newFragment);

Source Link

Document

Called when a new fragment is started for consideration.

Usage

From source file:it.cnr.ilc.lc.clavius.search.ClaviusHighlighter.java

public final List<Annotation> getBestTextClaviusFragments(TokenStream tokenStream, String idDoc,
        boolean mergeContiguousFragments, int maxNumFragments)
        throws IOException, InvalidTokenOffsetsException {

    List<Annotation> ret = new ArrayList<>();

    ArrayList<ClaviusTextFragment> docFrags = new ArrayList<>();
    StringBuilder newText = new StringBuilder();

    Scorer fragmentScorer = getFragmentScorer();
    Fragmenter textFragmenter = getTextFragmenter();
    int maxDocCharsToAnalyze = getMaxDocCharsToAnalyze();
    Encoder encoder = getEncoder();

    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
    ClaviusTextFragment currentFrag = new ClaviusTextFragment(newText, newText.length(), docFrags.size());

    if (fragmentScorer instanceof QueryScorer) {
        ((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
    }/*from  w w w .j  a v a2 s  .co  m*/

    TokenStream newStream = fragmentScorer.init(tokenStream);
    if (newStream != null) {
        tokenStream = newStream;
    }
    fragmentScorer.startFragment(currentFrag);
    docFrags.add(currentFrag);

    //        FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
    try {

        String tokenText;
        int startOffset;
        int endOffset;
        int lastEndOffset = 0;
        //textFragmenter.start(text, tokenStream);

        ClaviusTokenGroup tokenGroup = new ClaviusTokenGroup(tokenStream);

        tokenStream.reset();
        // log.info("tokenGroup.getNumTokens() A: " + tokenGroup.getNumTokens());

        for (boolean next = tokenStream.incrementToken(); next
                && (offsetAtt.startOffset() < maxDocCharsToAnalyze); next = tokenStream.incrementToken()) {

            //                if ((offsetAtt.endOffset() > text.length())
            //                        || (offsetAtt.startOffset() > text.length())) {
            //                    throw new InvalidTokenOffsetsException("Token " + termAtt.toString()
            //                            + " exceeds length of provided text sized " + text.length());
            //                }
            //  log.info("newText: A (" + newText.toString() + "), fragmentScorer.getTokenScore()("+fragmentScorer.getTokenScore()+")");
            tokenGroup.addToken(fragmentScorer.getTokenScore());

        } // END FOR
          //  log.info("tokenGroup.getNumTokens() B: " + tokenGroup.getNumTokens());

        for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
            //log.info("tokenGroup[" + i + "]: token: " + tokenGroup.getToken(i) + ", score: " + tokenGroup.getScore(i));
            if (tokenGroup.getScore(i) > 0) {
                Annotation a = new Annotation();
                a.setMatched(tokenGroup.getToken(i).toString());
                a.setIdDoc(idDoc);
                //contesto sinistro
                Token[] t = Arrays.copyOfRange(tokenGroup.getTokens(), (i > ctxLenght) ? i - ctxLenght : 0, i);
                StringBuilder sb = new StringBuilder();
                for (int j = 0; j < t.length; j++) {
                    sb.append(t[j].toString());
                    if (j < t.length - 1) {
                        sb.append(" ");
                    }
                }
                a.setLeftContext(sb.toString());
                sb.setLength(0);
                //contesto destro
                t = Arrays.copyOfRange(tokenGroup.getTokens(), i + 1,
                        (i + ctxLenght + 1 < tokenGroup.getNumTokens() ? i + ctxLenght + 1
                                : tokenGroup.getNumTokens()));
                sb = new StringBuilder();
                for (int j = 0; j < t.length; j++) {
                    sb.append(t[j].toString());
                    if (j < t.length - 1) {
                        sb.append(" ");
                    }
                }
                a.setRightContext(sb.toString());

                a.setConcept("");
                a.setType("");
                a.setIdNeo4j(-1l);
                a.setPageNum(-1l);
                a.setResourceObject("");
                a.setId(-1l);

                ret.add(a);
            }
        }

        return ret;

    } finally {
        if (tokenStream != null) {
            try {
                tokenStream.end();
                tokenStream.close();
            } catch (Exception e) {
            }
        }
    }
}

From source file:it.cnr.ilc.lc.claviusweb.fulltextsearch.ClaviusHighlighter.java

public final List<Annotation> getBestTextClaviusFragments(TokenStream tokenStream, Document document,
        boolean mergeContiguousFragments, int maxNumFragments)
        throws IOException, InvalidTokenOffsetsException {

    List<Annotation> ret = new ArrayList<>();

    ArrayList<ClaviusTextFragment> docFrags = new ArrayList<>();
    StringBuilder newText = new StringBuilder();

    Scorer fragmentScorer = getFragmentScorer();
    Fragmenter textFragmenter = getTextFragmenter();
    int maxDocCharsToAnalyze = getMaxDocCharsToAnalyze();
    Encoder encoder = getEncoder();

    CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
    OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
    ClaviusTextFragment currentFrag = new ClaviusTextFragment(newText, newText.length(), docFrags.size());

    if (fragmentScorer instanceof QueryScorer) {
        ((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
    }/*ww  w  .ja v  a  2  s  .com*/

    TokenStream newStream = fragmentScorer.init(tokenStream);
    if (newStream != null) {
        tokenStream = newStream;
    }
    fragmentScorer.startFragment(currentFrag);
    docFrags.add(currentFrag);

    //        FragmentQueue fragQueue = new FragmentQueue(maxNumFragments);
    try {

        String tokenText;
        int startOffset;
        int endOffset;
        int lastEndOffset = 0;
        //textFragmenter.start(text, tokenStream);

        ClaviusTokenGroup tokenGroup = new ClaviusTokenGroup(tokenStream);

        tokenStream.reset();
        //log.info("tokenGroup.getNumTokens() A: " + tokenGroup.getNumTokens());

        for (boolean next = tokenStream.incrementToken(); next
                && (offsetAtt.startOffset() < maxDocCharsToAnalyze); next = tokenStream.incrementToken()) {

            //                if ((offsetAtt.endOffset() > text.length())
            //                        || (offsetAtt.startOffset() > text.length())) {
            //                    throw new InvalidTokenOffsetsException("Token " + termAtt.toString()
            //                            + " exceeds length of provided text sized " + text.length());
            //                }
            //  log.info("newText: A (" + newText.toString() + "), fragmentScorer.getTokenScore()("+fragmentScorer.getTokenScore()+")");
            tokenGroup.addToken(fragmentScorer.getTokenScore());

        } // END FOR
          //log.info("tokenGroup.getNumTokens() B: " + tokenGroup.getNumTokens());

        for (int i = 0; i < tokenGroup.getNumTokens(); i++) {
            //log.info("tokenGroup[" + i + "]: token: " + tokenGroup.getToken(i) + ", score: " + tokenGroup.getScore(i));
            if (tokenGroup.getScore(i) > 0) {
                Annotation a = new Annotation();
                a.setMatched(tokenGroup.getToken(i).toString());
                a.setIdDoc(document.get("idDoc"));
                //contesto sinistro
                Token[] t = Arrays.copyOfRange(tokenGroup.getTokens(), (i > ctxLenght) ? i - ctxLenght : 0, i);
                StringBuilder sb = new StringBuilder();
                for (int j = 0; j < t.length; j++) {
                    sb.append(t[j].toString());
                    if (j < t.length - 1) {
                        sb.append(" ");
                    }
                }
                a.setLeftContext(sb.toString());
                sb.setLength(0);
                //contesto destro
                t = Arrays.copyOfRange(tokenGroup.getTokens(), i + 1,
                        (i + ctxLenght + 1 < tokenGroup.getNumTokens() ? i + ctxLenght + 1
                                : tokenGroup.getNumTokens()));
                sb = new StringBuilder();
                for (int j = 0; j < t.length; j++) {
                    sb.append(t[j].toString());
                    if (j < t.length - 1) {
                        sb.append(" ");
                    }
                }
                a.setRightContext(sb.toString());

                a.setConcept("");
                a.setType("");
                a.setPageNum(-1l);
                // a.setIdNeo4j(Long.parseLong(document.get("idNeo4j")));
                a.setIdNeo4j(Long.parseLong(document.get("idDoc")));
                a.setResourceObject("");
                a.setId(-1l);

                ret.add(a);
            }
        }

        return ret;

    } finally {
        if (tokenStream != null) {
            try {
                tokenStream.end();
                tokenStream.close();
            } catch (Exception e) {
            }
        }
    }
}