List of usage examples for org.apache.lucene.index TermsEnum attributes
public abstract AttributeSource attributes();
From source file:com.sindicetech.siren.search.node.TopNodeTermsRewrite.java
License:Open Source License
@Override public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException { final int maxSize = Math.min(size, this.getMaxSize()); final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>(); this.collectTerms(reader, query, new TermCollector() { private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes .addAttribute(MaxNonCompetitiveBoostAttribute.class); private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>(); private TermsEnum termsEnum; private Comparator<BytesRef> termComp; private BoostAttribute boostAtt; private ScoreTerm st; @Override//from w w w .j a v a2s .c o m public void setNextEnum(final TermsEnum termsEnum) throws IOException { this.termsEnum = termsEnum; this.termComp = termsEnum.getComparator(); assert this.compareToLastTerm(null); // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) st = new ScoreTerm(this.termComp, new TermContext(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } // for assert: private BytesRef lastTerm; private boolean compareToLastTerm(final BytesRef t) throws IOException { if (lastTerm == null && t != null) { lastTerm = BytesRef.deepCopyOf(t); } else if (t == null) { lastTerm = null; } else { assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t; lastTerm.copyBytes(t); } return true; } @Override public boolean collect(final BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); // make sure within a single seg we always collect // terms in order assert this.compareToLastTerm(bytes); //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.size() == maxSize) { final ScoreTerm t = stQueue.peek(); if (boost < t.boost) return true; if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0) return true; } ScoreTerm t = visitedTerms.get(bytes); final TermState state = termsEnum.termState(); assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ assert t.boost == boost : "boost should be equal in all segment TermsEnums"; t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copyBytes(bytes); st.boost = boost; visitedTerms.put(st.bytes, st); assert st.termState.docFreq() == 0; st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); st.termState.clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.size() == maxSize) { t = stQueue.peek(); maxBoostAtt.setMaxNonCompetitiveBoost(t.boost); maxBoostAtt.setCompetitiveTerm(t.bytes); } } return true; } }); final Q q = this.getTopLevelQuery(query); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = new Term(query.field, st.bytes); assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term; this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } return q; }
From source file:org.elasticsearch.action.termvectors.TermVectorsResponse.java
License:Apache License
private void buildField(XContentBuilder builder, final CharsRefBuilder spare, Fields theFields, Iterator<String> fieldIter) throws IOException { String fieldName = fieldIter.next(); builder.startObject(fieldName);//w w w . j a v a 2 s . c o m Terms curTerms = theFields.terms(fieldName); // write field statistics buildFieldStatistics(builder, curTerms); builder.startObject(FieldStrings.TERMS); TermsEnum termIter = curTerms.iterator(); BoostAttribute boostAtt = termIter.attributes().addAttribute(BoostAttribute.class); for (int i = 0; i < curTerms.size(); i++) { buildTerm(builder, spare, curTerms, termIter, boostAtt); } builder.endObject(); builder.endObject(); }
From source file:org.sindice.siren.search.node.TopNodeTermsRewrite.java
License:Apache License
@Override public Q rewrite(final IndexReader reader, final MultiNodeTermQuery query) throws IOException { final int maxSize = Math.min(size, this.getMaxSize()); final PriorityQueue<ScoreTerm> stQueue = new PriorityQueue<ScoreTerm>(); this.collectTerms(reader, query, new TermCollector() { private final MaxNonCompetitiveBoostAttribute maxBoostAtt = attributes .addAttribute(MaxNonCompetitiveBoostAttribute.class); private final Map<BytesRef, ScoreTerm> visitedTerms = new HashMap<BytesRef, ScoreTerm>(); private TermsEnum termsEnum; private Comparator<BytesRef> termComp; private BoostAttribute boostAtt; private ScoreTerm st; @Override/*from w ww. j av a 2 s .c o m*/ public void setNextEnum(final TermsEnum termsEnum) throws IOException { this.termsEnum = termsEnum; this.termComp = termsEnum.getComparator(); assert this.compareToLastTerm(null); // lazy init the initial ScoreTerm because comparator is not known on ctor: if (st == null) st = new ScoreTerm(this.termComp, new TermContext(topReaderContext)); boostAtt = termsEnum.attributes().addAttribute(BoostAttribute.class); } // for assert: private BytesRef lastTerm; private boolean compareToLastTerm(final BytesRef t) throws IOException { if (lastTerm == null && t != null) { lastTerm = BytesRef.deepCopyOf(t); } else if (t == null) { lastTerm = null; } else { assert termsEnum.getComparator().compare(lastTerm, t) < 0 : "lastTerm=" + lastTerm + " t=" + t; lastTerm.copyBytes(t); } return true; } @Override public boolean collect(final BytesRef bytes) throws IOException { final float boost = boostAtt.getBoost(); // make sure within a single seg we always collect // terms in order assert this.compareToLastTerm(bytes); //System.out.println("TTR.collect term=" + bytes.utf8ToString() + " boost=" + boost + " ord=" + readerContext.ord); // ignore uncompetitive hits if (stQueue.size() == maxSize) { final ScoreTerm t = stQueue.peek(); if (boost < t.boost) return true; if (boost == t.boost && termComp.compare(bytes, t.bytes) > 0) return true; } ScoreTerm t = visitedTerms.get(bytes); final TermState state = termsEnum.termState(); assert state != null; if (t != null) { // if the term is already in the PQ, only update docFreq of term in PQ assert t.boost == boost : "boost should be equal in all segment TermsEnums"; t.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); } else { // add new entry in PQ, we must clone the term, else it may get overwritten! st.bytes.copyBytes(bytes); st.boost = boost; visitedTerms.put(st.bytes, st); assert st.termState.docFreq() == 0; st.termState.register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq()); stQueue.offer(st); // possibly drop entries from queue if (stQueue.size() > maxSize) { st = stQueue.poll(); visitedTerms.remove(st.bytes); st.termState.clear(); // reset the termstate! } else { st = new ScoreTerm(termComp, new TermContext(topReaderContext)); } assert stQueue.size() <= maxSize : "the PQ size must be limited to maxSize"; // set maxBoostAtt with values to help FuzzyTermsEnum to optimize if (stQueue.size() == maxSize) { t = stQueue.peek(); maxBoostAtt.setMaxNonCompetitiveBoost(t.boost); maxBoostAtt.setCompetitiveTerm(t.bytes); } } return true; } }); final Q q = this.getTopLevelQuery(); final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]); ArrayUtil.mergeSort(scoreTerms, scoreTermSortByTermComp); for (final ScoreTerm st : scoreTerms) { final Term term = new Term(query.field, st.bytes); assert reader.docFreq(term) == st.termState.docFreq() : "reader DF is " + reader.docFreq(term) + " vs " + st.termState.docFreq() + " term=" + term; this.addClause(q, term, st.termState.docFreq(), query.getBoost() * st.boost, st.termState); // add to query } return q; }