List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.apache.solr.search.function.ScaleFloatFunction.java
License:Apache License
@Override public DocValues getValues(Map context, IndexReader reader) throws IOException { final DocValues vals = source.getValues(context, reader); int maxDoc = reader.maxDoc(); // this doesn't take into account deleted docs! float minVal = 0.0f; float maxVal = 0.0f; if (maxDoc > 0) { minVal = maxVal = vals.floatVal(0); }//from www. j a va 2 s .co m // Traverse the complete set of values to get the min and the max. // Future alternatives include being able to ask a DocValues for min/max // Another memory-intensive option is to cache the values in // a float[] on this first pass. for (int i = 0; i < maxDoc; i++) { float val = vals.floatVal(i); if ((Float.floatToRawIntBits(val) & (0xff << 23)) == 0xff << 23) { // if the exponent in the float is all ones, then this is +Inf, -Inf or NaN // which don't make sense to factor into the scale function continue; } if (val < minVal) { minVal = val; } else if (val > maxVal) { maxVal = val; } } final float scale = (maxVal - minVal == 0) ? 0 : (max - min) / (maxVal - minVal); final float minSource = minVal; final float maxSource = maxVal; return new DocValues() { @Override public float floatVal(int doc) { return (vals.floatVal(doc) - minSource) * scale + min; } @Override public int intVal(int doc) { return (int) floatVal(doc); } @Override public long longVal(int doc) { return (long) floatVal(doc); } @Override public double doubleVal(int doc) { return (double) floatVal(doc); } @Override public String strVal(int doc) { return Float.toString(floatVal(doc)); } @Override public String toString(int doc) { return "scale(" + vals.toString(doc) + ",toMin=" + min + ",toMax=" + max + ",fromMin=" + minSource + ",fromMax=" + maxSource + ")"; } }; }
From source file:org.apache.solr.search.function.ValueSourceScorer.java
License:Apache License
protected ValueSourceScorer(IndexReader reader, DocValues values) { super(null, null); this.reader = reader; this.maxDoc = reader.maxDoc(); this.values = values; setCheckDeletes(true);/*from w ww . j a v a2 s.c o m*/ }
From source file:org.apache.solr.search.PrefixFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); new PrefixGenerator(prefix) { public void handleDoc(int doc) { bitSet.set(doc);/*from w w w. j av a 2 s .c o m*/ } }.generate(reader); return bitSet; }
From source file:org.apache.solr.search.TestDocSet.java
License:Apache License
public void doFilterTest(IndexReader reader) throws IOException { IndexReaderContext topLevelContext = reader.getContext(); OpenBitSet bs = getRandomSet(reader.maxDoc(), rand.nextInt(reader.maxDoc() + 1)); DocSet a = new BitDocSet(bs); DocSet b = getIntDocSet(bs);// w w w. j a va2 s.co m Filter fa = a.getTopFilter(); Filter fb = b.getTopFilter(); /*** top level filters are no longer supported // test top-level DocIdSet da = fa.getDocIdSet(topLevelContext); DocIdSet db = fb.getDocIdSet(topLevelContext); doTestIteratorEqual(da, db); ***/ DocIdSet da; DocIdSet db; List<AtomicReaderContext> leaves = topLevelContext.leaves(); // first test in-sequence sub readers for (AtomicReaderContext readerContext : leaves) { da = fa.getDocIdSet(readerContext, null); db = fb.getDocIdSet(readerContext, null); doTestIteratorEqual(da, db); } int nReaders = leaves.size(); // now test out-of-sequence sub readers for (int i = 0; i < nReaders; i++) { AtomicReaderContext readerContext = leaves.get(rand.nextInt(nReaders)); da = fa.getDocIdSet(readerContext, null); db = fb.getDocIdSet(readerContext, null); doTestIteratorEqual(da, db); } }
From source file:org.apache.solr.search.WildcardFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); new WildcardGenerator(term) { public void handleDoc(int doc) { bitSet.set(doc);//from w w w. ja v a 2s. c om } }.generate(reader); return bitSet; }
From source file:org.apache.solr.spelling.SpellCheckCollator.java
License:Apache License
public List<SpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse) { List<SpellCheckCollation> collations = new ArrayList<SpellCheckCollation>(); QueryComponent queryComponent = null; if (ultimateResponse.components != null) { for (SearchComponent sc : ultimateResponse.components) { if (sc instanceof QueryComponent) { queryComponent = (QueryComponent) sc; break; }/*from w w w . java 2 s . c om*/ } } boolean verifyCandidateWithQuery = true; int maxTries = maxCollationTries; int maxNumberToIterate = maxTries; if (maxTries < 1) { maxTries = 1; maxNumberToIterate = maxCollations; verifyCandidateWithQuery = false; } if (queryComponent == null && verifyCandidateWithQuery) { LOG.info( "Could not find an instance of QueryComponent. Disabling collation verification against the index."); maxTries = 1; verifyCandidateWithQuery = false; } docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0; int maxDocId = -1; if (verifyCandidateWithQuery && docCollectionLimit > 0) { IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader(); maxDocId = reader.maxDoc(); } int tryNo = 0; int collNo = 0; PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap); while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) { PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next(); String collationQueryStr = getCollation(originalQuery, possibility.corrections); int hits = 0; if (verifyCandidateWithQuery) { tryNo++; SolrParams origParams = ultimateResponse.req.getParams(); ModifiableSolrParams params = new ModifiableSolrParams(origParams); Iterator<String> origParamIterator = origParams.getParameterNamesIterator(); int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length(); while (origParamIterator.hasNext()) { String origParamName = origParamIterator.next(); if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE) && origParamName.length() > pl) { String[] val = origParams.getParams(origParamName); if (val.length == 1 && val[0].length() == 0) { params.set(origParamName.substring(pl), (String[]) null); } else { params.set(origParamName.substring(pl), val); } } } params.set(CommonParams.Q, collationQueryStr); params.remove(CommonParams.START); params.set(CommonParams.ROWS, "" + docCollectionLimit); // we don't want any stored fields params.set(CommonParams.FL, "id"); // we'll sort by doc id to ensure no scoring is done. params.set(CommonParams.SORT, "_docid_ asc"); // If a dismax query, don't add unnecessary clauses for scoring params.remove(DisMaxParams.TIE); params.remove(DisMaxParams.PF); params.remove(DisMaxParams.PF2); params.remove(DisMaxParams.PF3); params.remove(DisMaxParams.BQ); params.remove(DisMaxParams.BF); // Collate testing does not support Grouping (see SOLR-2577) params.remove(GroupParams.GROUP); // creating a request here... make sure to close it! ResponseBuilder checkResponse = new ResponseBuilder( new LocalSolrQueryRequest(ultimateResponse.req.getCore(), params), new SolrQueryResponse(), Arrays.<SearchComponent>asList(queryComponent)); checkResponse.setQparser(ultimateResponse.getQparser()); checkResponse.setFilters(ultimateResponse.getFilters()); checkResponse.setQueryString(collationQueryStr); checkResponse.components = Arrays.<SearchComponent>asList(queryComponent); try { queryComponent.prepare(checkResponse); if (docCollectionLimit > 0) { int f = checkResponse.getFieldFlags(); checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY); } queryComponent.process(checkResponse); hits = (Integer) checkResponse.rsp.getToLog().get("hits"); } catch (EarlyTerminatingCollectorException etce) { assert (docCollectionLimit > 0); assert 0 < etce.getNumberScanned(); assert 0 < etce.getNumberCollected(); if (etce.getNumberScanned() == maxDocId) { hits = etce.getNumberCollected(); } else { hits = (int) (((float) (maxDocId * etce.getNumberCollected())) / (float) etce.getNumberScanned()); } } catch (Exception e) { LOG.warn( "Exception trying to re-query to check if a spell check possibility would return any hits.", e); } finally { checkResponse.req.close(); } } if (hits > 0 || !verifyCandidateWithQuery) { collNo++; SpellCheckCollation collation = new SpellCheckCollation(); collation.setCollationQuery(collationQueryStr); collation.setHits(hits); collation.setInternalRank( suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank); NamedList<String> misspellingsAndCorrections = new NamedList<String>(); for (SpellCheckCorrection corr : possibility.corrections) { misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection()); } collation.setMisspellingsAndCorrections(misspellingsAndCorrections); collations.add(collation); } if (LOG.isDebugEnabled()) { LOG.debug("Collation: " + collationQueryStr + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : "")); } } return collations; }
From source file:org.apache.solr.uninverting.TestFieldCacheSortRandom.java
License:Apache License
private void testRandomStringSort(SortField.Type type) throws Exception { Random random = new Random(random().nextLong()); final int NUM_DOCS = atLeast(100); final Directory dir = newDirectory(); final RandomIndexWriter writer = new RandomIndexWriter(random, dir); final boolean allowDups = random.nextBoolean(); final Set<String> seen = new HashSet<>(); final int maxLength = TestUtil.nextInt(random, 5, 100); if (VERBOSE) { System.out//from w w w .j a v a2 s .c o m .println("TEST: NUM_DOCS=" + NUM_DOCS + " maxLength=" + maxLength + " allowDups=" + allowDups); } int numDocs = 0; final List<BytesRef> docValues = new ArrayList<>(); // TODO: deletions while (numDocs < NUM_DOCS) { final Document doc = new Document(); // 10% of the time, the document is missing the value: final BytesRef br; if (random().nextInt(10) != 7) { final String s; if (random.nextBoolean()) { s = TestUtil.randomSimpleString(random, maxLength); } else { s = TestUtil.randomUnicodeString(random, maxLength); } if (!allowDups) { if (seen.contains(s)) { continue; } seen.add(s); } if (VERBOSE) { System.out.println(" " + numDocs + ": s=" + s); } doc.add(new StringField("stringdv", s, Field.Store.NO)); docValues.add(new BytesRef(s)); } else { br = null; if (VERBOSE) { System.out.println(" " + numDocs + ": <missing>"); } docValues.add(null); } doc.add(new IntPoint("id", numDocs)); doc.add(new StoredField("id", numDocs)); writer.addDocument(doc); numDocs++; if (random.nextInt(40) == 17) { // force flush writer.getReader().close(); } } Map<String, UninvertingReader.Type> mapping = new HashMap<>(); mapping.put("stringdv", Type.SORTED); mapping.put("id", Type.INTEGER_POINT); final IndexReader r = UninvertingReader.wrap(writer.getReader(), mapping); writer.close(); if (VERBOSE) { System.out.println(" reader=" + r); } final IndexSearcher s = newSearcher(r, false); final int ITERS = atLeast(100); for (int iter = 0; iter < ITERS; iter++) { final boolean reverse = random.nextBoolean(); final TopFieldDocs hits; final SortField sf; final boolean sortMissingLast; final boolean missingIsNull; sf = new SortField("stringdv", type, reverse); sortMissingLast = random().nextBoolean(); missingIsNull = true; if (sortMissingLast) { sf.setMissingValue(SortField.STRING_LAST); } final Sort sort; if (random.nextBoolean()) { sort = new Sort(sf); } else { sort = new Sort(sf, SortField.FIELD_DOC); } final int hitCount = TestUtil.nextInt(random, 1, r.maxDoc() + 20); final RandomQuery f = new RandomQuery(random.nextLong(), random.nextFloat(), docValues); int queryType = random.nextInt(2); if (queryType == 0) { hits = s.search(new ConstantScoreQuery(f), hitCount, sort, random.nextBoolean(), random.nextBoolean()); } else { hits = s.search(f, hitCount, sort, random.nextBoolean(), random.nextBoolean()); } if (VERBOSE) { System.out.println("\nTEST: iter=" + iter + " " + hits.totalHits + " hits; topN=" + hitCount + "; reverse=" + reverse + "; sortMissingLast=" + sortMissingLast + " sort=" + sort); } // Compute expected results: Collections.sort(f.matchValues, new Comparator<BytesRef>() { @Override public int compare(BytesRef a, BytesRef b) { if (a == null) { if (b == null) { return 0; } if (sortMissingLast) { return 1; } else { return -1; } } else if (b == null) { if (sortMissingLast) { return -1; } else { return 1; } } else { return a.compareTo(b); } } }); if (reverse) { Collections.reverse(f.matchValues); } final List<BytesRef> expected = f.matchValues; if (VERBOSE) { System.out.println(" expected:"); for (int idx = 0; idx < expected.size(); idx++) { BytesRef br = expected.get(idx); if (br == null && missingIsNull == false) { br = new BytesRef(); } System.out.println(" " + idx + ": " + (br == null ? "<missing>" : br.utf8ToString())); if (idx == hitCount - 1) { break; } } } if (VERBOSE) { System.out.println(" actual:"); for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) { final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX]; BytesRef br = (BytesRef) fd.fields[0]; System.out.println(" " + hitIDX + ": " + (br == null ? "<missing>" : br.utf8ToString()) + " id=" + s.doc(fd.doc).get("id")); } } for (int hitIDX = 0; hitIDX < hits.scoreDocs.length; hitIDX++) { final FieldDoc fd = (FieldDoc) hits.scoreDocs[hitIDX]; BytesRef br = expected.get(hitIDX); if (br == null && missingIsNull == false) { br = new BytesRef(); } // Normally, the old codecs (that don't support // docsWithField via doc values) will always return // an empty BytesRef for the missing case; however, // if all docs in a given segment were missing, in // that case it will return null! So we must map // null here, too: BytesRef br2 = (BytesRef) fd.fields[0]; if (br2 == null && missingIsNull == false) { br2 = new BytesRef(); } assertEquals(br, br2); } } r.close(); dir.close(); }
From source file:org.archive.nutchwax.tools.LengthNormUpdater.java
License:Apache License
/** * *//*from w w w . ja va2s .c o m*/ public static void reSetNorms(IndexReader reader, String fieldName, Map<String, Integer> ranks, Similarity sim) throws IOException { if (VERBOSE > 0) System.out.println("Updating field: " + fieldName); int[] termCounts = new int[0]; TermEnum termEnum = null; TermDocs termDocs = null; termCounts = new int[reader.maxDoc()]; try { termEnum = reader.terms(new Term(fieldName, "")); try { termDocs = reader.termDocs(); do { Term term = termEnum.term(); if (term != null && term.field().equals(fieldName)) { termDocs.seek(termEnum.term()); while (termDocs.next()) { termCounts[termDocs.doc()] += termDocs.freq(); } } } while (termEnum.next()); } finally { if (null != termDocs) termDocs.close(); } } finally { if (null != termEnum) termEnum.close(); } for (int d = 0; d < termCounts.length; d++) { if (!reader.isDeleted(d)) { Document doc = reader.document(d); String url = doc.get("url"); if (url != null) { Integer rank = ranks.get(url); if (rank == null) continue; float originalNorm = sim.lengthNorm(fieldName, termCounts[d]); byte encodedOrig = sim.encodeNorm(originalNorm); float rankedNorm = originalNorm * (float) (Math.log10(rank) + 1); byte encodedRank = sim.encodeNorm(rankedNorm); if (VERBOSE > 1) System.out.println(fieldName + "\t" + d + "\t" + originalNorm + "\t" + encodedOrig + "\t" + rankedNorm + "\t" + encodedRank); reader.setNorm(d, fieldName, encodedRank); } } } }
From source file:org.archive.tnh.tools.LengthNormUpdater.java
License:Apache License
/** * *//*from www .j a v a2 s .c om*/ public static void updateNorms(IndexReader reader, String fieldName, Map<String, Integer> ranks, Similarity sim) throws IOException { if (VERBOSE > 0) System.out.println("Updating field: " + fieldName); int[] termCounts = new int[0]; TermEnum termEnum = null; TermDocs termDocs = null; termCounts = new int[reader.maxDoc()]; try { termEnum = reader.terms(new Term(fieldName, "")); try { termDocs = reader.termDocs(); do { Term term = termEnum.term(); if (term != null && term.field().equals(fieldName)) { termDocs.seek(termEnum.term()); while (termDocs.next()) { termCounts[termDocs.doc()] += termDocs.freq(); } } } while (termEnum.next()); } finally { if (null != termDocs) termDocs.close(); } } finally { if (null != termEnum) termEnum.close(); } for (int d = 0; d < termCounts.length; d++) { if (!reader.isDeleted(d)) { Document doc = reader.document(d); String url = doc.get("url"); if (url != null) { Integer rank = ranks.get(url); if (rank == null) continue; float originalNorm = sim.lengthNorm(fieldName, termCounts[d]); byte encodedOrig = sim.encodeNorm(originalNorm); float rankedNorm = originalNorm * (float) (Math.log10(rank) + 1); byte encodedRank = sim.encodeNorm(rankedNorm); if (VERBOSE > 1) System.out.println(fieldName + "\t" + d + "\t" + originalNorm + "\t" + encodedOrig + "\t" + rankedNorm + "\t" + encodedRank); reader.setNorm(d, fieldName, encodedRank); } } } }
From source file:org.codelibs.elasticsearch.index.mapper.MappedFieldType.java
License:Apache License
/** * @return a {FieldStats} instance that maps to the type of this * field or {@code null} if the provided index has no stats about the * current field//from w w w . j a v a 2s . c o m */ public FieldStats stats(IndexReader reader) throws IOException { int maxDoc = reader.maxDoc(); FieldInfo fi = MultiFields.getMergedFieldInfos(reader).fieldInfo(name()); if (fi == null) { return null; } Terms terms = MultiFields.getTerms(reader, name()); if (terms == null) { return new FieldStats.Text(maxDoc, 0, -1, -1, isSearchable(), isAggregatable()); } FieldStats stats = new FieldStats.Text(maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), isSearchable(), isAggregatable(), terms.getMin(), terms.getMax()); return stats; }