List of usage examples for org.apache.lucene.search.highlight Scorer getTokenScore
public float getTokenScore();
From source file:it.cnr.ilc.lc.clavius.search.ClaviusHighlighter.java
public final List<Annotation> getBestTextClaviusFragments(TokenStream tokenStream, String idDoc, boolean mergeContiguousFragments, int maxNumFragments) throws IOException, InvalidTokenOffsetsException { List<Annotation> ret = new ArrayList<>(); ArrayList<ClaviusTextFragment> docFrags = new ArrayList<>(); StringBuilder newText = new StringBuilder(); Scorer fragmentScorer = getFragmentScorer(); Fragmenter textFragmenter = getTextFragmenter(); int maxDocCharsToAnalyze = getMaxDocCharsToAnalyze(); Encoder encoder = getEncoder(); CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); ClaviusTextFragment currentFrag = new ClaviusTextFragment(newText, newText.length(), docFrags.size()); if (fragmentScorer instanceof QueryScorer) { ((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(Integer.MAX_VALUE); }/*from w w w . j a v a2s. com*/ TokenStream newStream = fragmentScorer.init(tokenStream); if (newStream != null) { tokenStream = newStream; } fragmentScorer.startFragment(currentFrag); docFrags.add(currentFrag); // FragmentQueue fragQueue = new FragmentQueue(maxNumFragments); try { String tokenText; int startOffset; int endOffset; int lastEndOffset = 0; //textFragmenter.start(text, tokenStream); ClaviusTokenGroup tokenGroup = new ClaviusTokenGroup(tokenStream); tokenStream.reset(); // log.info("tokenGroup.getNumTokens() A: " + tokenGroup.getNumTokens()); for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset() < maxDocCharsToAnalyze); next = tokenStream.incrementToken()) { // if ((offsetAtt.endOffset() > text.length()) // || (offsetAtt.startOffset() > text.length())) { // throw new InvalidTokenOffsetsException("Token " + termAtt.toString() // + " exceeds length of provided text sized " + text.length()); // } // log.info("newText: A (" + newText.toString() + "), fragmentScorer.getTokenScore()("+fragmentScorer.getTokenScore()+")"); tokenGroup.addToken(fragmentScorer.getTokenScore()); } // END FOR // log.info("tokenGroup.getNumTokens() B: " + tokenGroup.getNumTokens()); for (int i = 0; i < tokenGroup.getNumTokens(); i++) { //log.info("tokenGroup[" + i + "]: token: " + tokenGroup.getToken(i) + ", score: " + tokenGroup.getScore(i)); if (tokenGroup.getScore(i) > 0) { Annotation a = new Annotation(); a.setMatched(tokenGroup.getToken(i).toString()); a.setIdDoc(idDoc); //contesto sinistro Token[] t = Arrays.copyOfRange(tokenGroup.getTokens(), (i > ctxLenght) ? i - ctxLenght : 0, i); StringBuilder sb = new StringBuilder(); for (int j = 0; j < t.length; j++) { sb.append(t[j].toString()); if (j < t.length - 1) { sb.append(" "); } } a.setLeftContext(sb.toString()); sb.setLength(0); //contesto destro t = Arrays.copyOfRange(tokenGroup.getTokens(), i + 1, (i + ctxLenght + 1 < tokenGroup.getNumTokens() ? i + ctxLenght + 1 : tokenGroup.getNumTokens())); sb = new StringBuilder(); for (int j = 0; j < t.length; j++) { sb.append(t[j].toString()); if (j < t.length - 1) { sb.append(" "); } } a.setRightContext(sb.toString()); a.setConcept(""); a.setType(""); a.setIdNeo4j(-1l); a.setPageNum(-1l); a.setResourceObject(""); a.setId(-1l); ret.add(a); } } return ret; } finally { if (tokenStream != null) { try { tokenStream.end(); tokenStream.close(); } catch (Exception e) { } } } }
From source file:it.cnr.ilc.lc.claviusweb.fulltextsearch.ClaviusHighlighter.java
public final List<Annotation> getBestTextClaviusFragments(TokenStream tokenStream, Document document, boolean mergeContiguousFragments, int maxNumFragments) throws IOException, InvalidTokenOffsetsException { List<Annotation> ret = new ArrayList<>(); ArrayList<ClaviusTextFragment> docFrags = new ArrayList<>(); StringBuilder newText = new StringBuilder(); Scorer fragmentScorer = getFragmentScorer(); Fragmenter textFragmenter = getTextFragmenter(); int maxDocCharsToAnalyze = getMaxDocCharsToAnalyze(); Encoder encoder = getEncoder(); CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class); ClaviusTextFragment currentFrag = new ClaviusTextFragment(newText, newText.length(), docFrags.size()); if (fragmentScorer instanceof QueryScorer) { ((QueryScorer) fragmentScorer).setMaxDocCharsToAnalyze(Integer.MAX_VALUE); }/*from w ww . j a va2 s . c om*/ TokenStream newStream = fragmentScorer.init(tokenStream); if (newStream != null) { tokenStream = newStream; } fragmentScorer.startFragment(currentFrag); docFrags.add(currentFrag); // FragmentQueue fragQueue = new FragmentQueue(maxNumFragments); try { String tokenText; int startOffset; int endOffset; int lastEndOffset = 0; //textFragmenter.start(text, tokenStream); ClaviusTokenGroup tokenGroup = new ClaviusTokenGroup(tokenStream); tokenStream.reset(); //log.info("tokenGroup.getNumTokens() A: " + tokenGroup.getNumTokens()); for (boolean next = tokenStream.incrementToken(); next && (offsetAtt.startOffset() < maxDocCharsToAnalyze); next = tokenStream.incrementToken()) { // if ((offsetAtt.endOffset() > text.length()) // || (offsetAtt.startOffset() > text.length())) { // throw new InvalidTokenOffsetsException("Token " + termAtt.toString() // + " exceeds length of provided text sized " + text.length()); // } // log.info("newText: A (" + newText.toString() + "), fragmentScorer.getTokenScore()("+fragmentScorer.getTokenScore()+")"); tokenGroup.addToken(fragmentScorer.getTokenScore()); } // END FOR //log.info("tokenGroup.getNumTokens() B: " + tokenGroup.getNumTokens()); for (int i = 0; i < tokenGroup.getNumTokens(); i++) { //log.info("tokenGroup[" + i + "]: token: " + tokenGroup.getToken(i) + ", score: " + tokenGroup.getScore(i)); if (tokenGroup.getScore(i) > 0) { Annotation a = new Annotation(); a.setMatched(tokenGroup.getToken(i).toString()); a.setIdDoc(document.get("idDoc")); //contesto sinistro Token[] t = Arrays.copyOfRange(tokenGroup.getTokens(), (i > ctxLenght) ? i - ctxLenght : 0, i); StringBuilder sb = new StringBuilder(); for (int j = 0; j < t.length; j++) { sb.append(t[j].toString()); if (j < t.length - 1) { sb.append(" "); } } a.setLeftContext(sb.toString()); sb.setLength(0); //contesto destro t = Arrays.copyOfRange(tokenGroup.getTokens(), i + 1, (i + ctxLenght + 1 < tokenGroup.getNumTokens() ? i + ctxLenght + 1 : tokenGroup.getNumTokens())); sb = new StringBuilder(); for (int j = 0; j < t.length; j++) { sb.append(t[j].toString()); if (j < t.length - 1) { sb.append(" "); } } a.setRightContext(sb.toString()); a.setConcept(""); a.setType(""); a.setPageNum(-1l); // a.setIdNeo4j(Long.parseLong(document.get("idNeo4j"))); a.setIdNeo4j(Long.parseLong(document.get("idDoc"))); a.setResourceObject(""); a.setId(-1l); ret.add(a); } } return ret; } finally { if (tokenStream != null) { try { tokenStream.end(); tokenStream.close(); } catch (Exception e) { } } } }