List of usage examples for org.apache.lucene.index Fields terms
public abstract Terms terms(String field) throws IOException;
From source file:org.apache.solr.handler.component.TermsComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(TermsParams.TERMS, false)) return;//from w w w . j a va 2s. c o m String[] fields = params.getParams(TermsParams.TERMS_FIELD); NamedList<Object> termsResult = new SimpleOrderedMap<Object>(); rb.rsp.add("terms", termsResult); if (fields == null || fields.length == 0) return; int limit = params.getInt(TermsParams.TERMS_LIMIT, 10); if (limit < 0) { limit = Integer.MAX_VALUE; } String lowerStr = params.get(TermsParams.TERMS_LOWER); String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean sort = !TermsParams.TERMS_SORT_INDEX .equals(params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); if (freqmax < 0) { freqmax = Integer.MAX_VALUE; } String prefix = params.get(TermsParams.TERMS_PREFIX_STR); String regexp = params.get(TermsParams.TERMS_REGEXP_STR); Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null; boolean raw = params.getBool(TermsParams.TERMS_RAW, false); final AtomicReader indexReader = rb.req.getSearcher().getAtomicReader(); Fields lfields = indexReader.fields(); for (String field : fields) { NamedList<Integer> fieldTerms = new NamedList<Integer>(); termsResult.add(field, fieldTerms); Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field continue; } FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); // prefix must currently be text BytesRef prefixBytes = prefix == null ? null : new BytesRef(prefix); BytesRef upperBytes = null; if (upperStr != null) { upperBytes = new BytesRef(); ft.readableToIndexed(upperStr, upperBytes); } BytesRef lowerBytes; if (lowerStr == null) { // If no lower bound was specified, use the prefix lowerBytes = prefixBytes; } else { lowerBytes = new BytesRef(); if (raw) { // TODO: how to handle binary? perhaps we don't for "raw"... or if the field exists // perhaps we detect if the FieldType is non-character and expect hex if so? lowerBytes = new BytesRef(lowerStr); } else { lowerBytes = new BytesRef(); ft.readableToIndexed(lowerStr, lowerBytes); } } TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; if (lowerBytes != null) { if (termsEnum.seekCeil(lowerBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); //Only advance the enum if we are excluding the lower bound and the lower Term actually matches if (lowerIncl == false && term.equals(lowerBytes)) { term = termsEnum.next(); } } } else { // position termsEnum on first term term = termsEnum.next(); } int i = 0; BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null); CharsRef external = new CharsRef(); while (term != null && (i < limit || sort)) { boolean externalized = false; // did we fill in "external" yet for this term? // stop if the prefix doesn't match if (prefixBytes != null && !StringHelper.startsWith(term, prefixBytes)) break; if (pattern != null) { // indexed text or external text? // TODO: support "raw" mode? ft.indexedToReadable(term, external); externalized = true; if (!pattern.matcher(external).matches()) { term = termsEnum.next(); continue; } } if (upperBytes != null) { int upperCmp = term.compareTo(upperBytes); // if we are past the upper term, or equal to it (when don't include upper) then stop. if (upperCmp > 0 || (upperCmp == 0 && !upperIncl)) break; } // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); if (docFreq >= freqmin && docFreq <= freqmax) { // add the term to the list if (sort) { queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); i++; } } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { if (i >= limit) break; ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); i++; } } } }
From source file:org.apache.solr.handler.component.TermVectorComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { SolrParams params = rb.req.getParams(); if (!params.getBool(COMPONENT_NAME, false)) { return;//from ww w . j a v a 2 s . c om } NamedList<Object> termVectors = new NamedList<Object>(); rb.rsp.add(TERM_VECTORS, termVectors); IndexSchema schema = rb.req.getSchema(); SchemaField keyField = schema.getUniqueKeyField(); String uniqFieldName = null; if (keyField != null) { uniqFieldName = keyField.getName(); termVectors.add("uniqueKeyFieldName", uniqFieldName); } FieldOptions allFields = new FieldOptions(); //figure out what options we have, and try to get the appropriate vector allFields.termFreq = params.getBool(TermVectorParams.TF, false); allFields.positions = params.getBool(TermVectorParams.POSITIONS, false); allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false); allFields.docFreq = params.getBool(TermVectorParams.DF, false); allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false); //boolean cacheIdf = params.getBool(TermVectorParams.IDF, false); //short cut to all values. if (params.getBool(TermVectorParams.ALL, false)) { allFields.termFreq = true; allFields.positions = true; allFields.offsets = true; allFields.docFreq = true; allFields.tfIdf = true; } //Build up our per field mapping Map<String, FieldOptions> fieldOptions = new HashMap<String, FieldOptions>(); NamedList<List<String>> warnings = new NamedList<List<String>>(); List<String> noTV = new ArrayList<String>(); List<String> noPos = new ArrayList<String>(); List<String> noOff = new ArrayList<String>(); Set<String> fields = getFields(rb); if (null != fields) { //we have specific fields to retrieve, or no fields for (String field : fields) { // workarround SOLR-3523 if (null == field || "score".equals(field)) continue; // we don't want to issue warnings about the uniqueKey field // since it can cause lots of confusion in distributed requests // where the uniqueKey field is injected into the fl for merging final boolean fieldIsUniqueKey = field.equals(uniqFieldName); SchemaField sf = schema.getFieldOrNull(field); if (sf != null) { if (sf.storeTermVector()) { FieldOptions option = fieldOptions.get(field); if (option == null) { option = new FieldOptions(); option.fieldName = field; fieldOptions.put(field, option); } //get the per field mappings option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq); option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq); option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf); //Validate these are even an option option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions); if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) { noPos.add(field); } option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets); if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) { noOff.add(field); } } else {//field doesn't have term vectors if (!fieldIsUniqueKey) noTV.add(field); } } else { //field doesn't exist throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field); } } } //else, deal with all fields // NOTE: currently all typs of warnings are schema driven, and garunteed // to be consistent across all shards - if additional types of warnings // are added that might be differnet between shards, finishStage() needs // to be changed to account for that. boolean hasWarnings = false; if (!noTV.isEmpty()) { warnings.add("noTermVectors", noTV); hasWarnings = true; } if (!noPos.isEmpty()) { warnings.add("noPositions", noPos); hasWarnings = true; } if (!noOff.isEmpty()) { warnings.add("noOffsets", noOff); hasWarnings = true; } if (hasWarnings) { termVectors.add("warnings", warnings); } DocListAndSet listAndSet = rb.getResults(); List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS)); Iterator<Integer> iter; if (docIds != null && !docIds.isEmpty()) { iter = docIds.iterator(); } else { DocList list = listAndSet.docList; iter = list.iterator(); } SolrIndexSearcher searcher = rb.req.getSearcher(); IndexReader reader = searcher.getIndexReader(); //the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors //Only load the id field to get the uniqueKey of that //field final String finalUniqFieldName = uniqFieldName; final List<String> uniqValues = new ArrayList<String>(); // TODO: is this required to be single-valued? if so, we should STOP // once we find it... final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() { @Override public void stringField(FieldInfo fieldInfo, String value) { uniqValues.add(value); } @Override public void intField(FieldInfo fieldInfo, int value) { uniqValues.add(Integer.toString(value)); } @Override public void longField(FieldInfo fieldInfo, long value) { uniqValues.add(Long.toString(value)); } @Override public Status needsField(FieldInfo fieldInfo) { return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO; } }; TermsEnum termsEnum = null; while (iter.hasNext()) { Integer docId = iter.next(); NamedList<Object> docNL = new NamedList<Object>(); if (keyField != null) { reader.document(docId, getUniqValue); String uniqVal = null; if (uniqValues.size() != 0) { uniqVal = uniqValues.get(0); uniqValues.clear(); docNL.add("uniqueKey", uniqVal); termVectors.add(uniqVal, docNL); } } else { // support for schemas w/o a unique key, termVectors.add("doc-" + docId, docNL); } if (null != fields) { for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) { final String field = entry.getKey(); final Terms vector = reader.getTermVector(docId, field); if (vector != null) { termsEnum = vector.iterator(termsEnum); mapOneVector(docNL, entry.getValue(), reader, docId, vector.iterator(termsEnum), field); } } } else { // extract all fields final Fields vectors = reader.getTermVectors(docId); for (String field : vectors) { Terms terms = vectors.terms(field); if (terms != null) { termsEnum = terms.iterator(termsEnum); mapOneVector(docNL, allFields, reader, docId, termsEnum, field); } } } } }
From source file:org.apache.solr.request.SimpleFacets.java
License:Apache License
/** * Returns a list of terms in the specified field along with the * corresponding count of documents in the set that match that constraint. * This method uses the FilterCache to get the intersection count between <code>docs</code> * and the DocSet for each term in the filter. * * @see FacetParams#FACET_LIMIT//ww w. j a va 2 s. c o m * @see FacetParams#FACET_ZEROS * @see FacetParams#FACET_MISSING */ public NamedList<Integer> getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { /* :TODO: potential optimization... * cache the Terms with the highest docFreq and try them first * don't enum if we get our max from them */ // Minimum term docFreq in order to use the filterCache for that term. int minDfFilterCache = params.getFieldInt(field, FacetParams.FACET_ENUM_CACHE_MINDF, 0); // make sure we have a set that is fast for random access, if we will use it for that DocSet fastForRandomSet = docs; if (minDfFilterCache > 0 && docs instanceof SortedIntDocSet) { SortedIntDocSet sset = (SortedIntDocSet) docs; fastForRandomSet = new HashDocSet(sset.getDocs(), 0, sset.size()); } IndexSchema schema = searcher.getSchema(); AtomicReader r = searcher.getAtomicReader(); FieldType ft = schema.getFieldType(field); boolean sortByCount = sort.equals("count") || sort.equals("true"); final int maxsize = limit >= 0 ? offset + limit : Integer.MAX_VALUE - 1; final BoundedTreeSet<CountPair<BytesRef, Integer>> queue = sortByCount ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(maxsize) : null; final NamedList<Integer> res = new NamedList<Integer>(); int min = mincount - 1; // the smallest value in the top 'N' values int off = offset; int lim = limit >= 0 ? limit : Integer.MAX_VALUE; BytesRef startTermBytes = null; if (prefix != null) { String indexedPrefix = ft.toInternal(prefix); startTermBytes = new BytesRef(indexedPrefix); } Fields fields = r.fields(); Terms terms = fields == null ? null : fields.terms(field); TermsEnum termsEnum = null; SolrIndexSearcher.DocsEnumState deState = null; BytesRef term = null; if (terms != null) { termsEnum = terms.iterator(null); // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for // facet.offset when sorting by index order. if (startTermBytes != null) { if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); } } else { // position termsEnum on first term term = termsEnum.next(); } } DocsEnum docsEnum = null; CharsRef charsRef = new CharsRef(10); if (docs.size() >= mincount) { while (term != null) { if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) break; int df = termsEnum.docFreq(); // If we are sorting, we can use df>min (rather than >=) since we // are going in index order. For certain term distributions this can // make a large difference (for example, many terms with df=1). if (df > 0 && df > min) { int c; if (df >= minDfFilterCache) { // use the filter cache if (deState == null) { deState = new SolrIndexSearcher.DocsEnumState(); deState.fieldName = field; deState.liveDocs = r.getLiveDocs(); deState.termsEnum = termsEnum; deState.docsEnum = docsEnum; } c = searcher.numDocs(docs, deState); docsEnum = deState.docsEnum; } else { // iterate over TermDocs to calculate the intersection // TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this? // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl) // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet? docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE); c = 0; if (docsEnum instanceof MultiDocsEnum) { MultiDocsEnum.EnumWithSlice[] subs = ((MultiDocsEnum) docsEnum).getSubs(); int numSubs = ((MultiDocsEnum) docsEnum).getNumSubs(); for (int subindex = 0; subindex < numSubs; subindex++) { MultiDocsEnum.EnumWithSlice sub = subs[subindex]; if (sub.docsEnum == null) continue; int base = sub.slice.start; int docid; while ((docid = sub.docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (fastForRandomSet.exists(docid + base)) c++; } } } else { int docid; while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (fastForRandomSet.exists(docid)) c++; } } } if (sortByCount) { if (c > min) { BytesRef termCopy = BytesRef.deepCopyOf(term); queue.add(new CountPair<BytesRef, Integer>(termCopy, c)); if (queue.size() >= maxsize) min = queue.last().val; } } else { if (c >= mincount && --off < 0) { if (--lim < 0) break; ft.indexedToReadable(term, charsRef); res.add(charsRef.toString(), c); } } } term = termsEnum.next(); } } if (sortByCount) { for (CountPair<BytesRef, Integer> p : queue) { if (--off >= 0) continue; if (--lim < 0) break; ft.indexedToReadable(p.key, charsRef); res.add(charsRef.toString(), p.val); } } if (missing) { res.add(null, getFieldMissingCount(searcher, docs, field)); } return res; }
From source file:org.apache.solr.search.DocSetUtil.java
License:Apache License
public static DocSet createDocSet(SolrIndexSearcher searcher, Term term) throws IOException { DirectoryReader reader = searcher.getRawReader(); // raw reader to avoid extra wrapping overhead int maxDoc = searcher.getIndexReader().maxDoc(); int smallSetSize = smallSetSize(maxDoc); String field = term.field();/*ww w.j ava 2 s . c om*/ BytesRef termVal = term.bytes(); int maxCount = 0; int firstReader = -1; List<LeafReaderContext> leaves = reader.leaves(); PostingsEnum[] postList = new PostingsEnum[leaves.size()]; // use array for slightly higher scanning cost, but fewer memory allocations for (LeafReaderContext ctx : leaves) { assert leaves.get(ctx.ord) == ctx; LeafReader r = ctx.reader(); Fields f = r.fields(); Terms t = f.terms(field); if (t == null) continue; // field is missing TermsEnum te = t.iterator(); if (te.seekExact(termVal)) { maxCount += te.docFreq(); postList[ctx.ord] = te.postings(null, PostingsEnum.NONE); if (firstReader < 0) firstReader = ctx.ord; } } DocSet answer = null; if (maxCount == 0) { answer = DocSet.EMPTY; } else if (maxCount <= smallSetSize) { answer = createSmallSet(leaves, postList, maxCount, firstReader); } else { answer = createBigSet(leaves, postList, maxDoc, firstReader); } return DocSetUtil.getDocSet(answer, searcher); }
From source file:org.apache.solr.search.facet.FacetFieldProcessorByEnumTermsStream.java
License:Apache License
private void setup() throws IOException { countOnly = freq.facetStats.size() == 0 || freq.facetStats.values().iterator().next() instanceof CountAgg; hasSubFacets = freq.subFacets.size() > 0; bucketsToSkip = freq.offset;/*from www . j av a 2 s . c om*/ createAccs(-1, 1); // Minimum term docFreq in order to use the filterCache for that term. if (freq.cacheDf == -1) { // -1 means never cache minDfFilterCache = Integer.MAX_VALUE; } else if (freq.cacheDf == 0) { // default; compute as fraction of maxDoc minDfFilterCache = Math.max(fcontext.searcher.maxDoc() >> 4, 3); // (minimum of 3 is for test coverage purposes) } else { minDfFilterCache = freq.cacheDf; } docs = fcontext.base; fastForRandomSet = null; if (freq.prefix != null) { String indexedPrefix = sf.getType().toInternal(freq.prefix); startTermBytes = new BytesRef(indexedPrefix); } else if (sf.getType().getNumericType() != null) { String triePrefix = TrieField.getMainValuePrefix(sf.getType()); if (triePrefix != null) { startTermBytes = new BytesRef(triePrefix); } } Fields fields = fcontext.searcher.getSlowAtomicReader().fields(); Terms terms = fields == null ? null : fields.terms(sf.getName()); termsEnum = null; deState = null; term = null; if (terms != null) { termsEnum = terms.iterator(); // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for // facet.offset when sorting by index order. if (startTermBytes != null) { if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); } } else { // position termsEnum on first term term = termsEnum.next(); } } List<LeafReaderContext> leafList = fcontext.searcher.getTopReaderContext().leaves(); leaves = leafList.toArray(new LeafReaderContext[leafList.size()]); }
From source file:org.apache.solr.search.FloatPayloadValueSource.java
License:Apache License
@Override public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException { Fields fields = readerContext.reader().fields(); final Terms terms = fields.terms(indexedField); FunctionValues defaultValues = defaultValueSource.getValues(context, readerContext); // copied the bulk of this from TFValueSource - TODO: this is a very repeated pattern - base-class this advance logic stuff? return new FloatDocValues(this) { PostingsEnum docs;/*from w w w. ja v a2 s.c o m*/ int atDoc; int lastDocRequested = -1; { reset(); } public void reset() throws IOException { // no one should call us for deleted docs? if (terms != null) { final TermsEnum termsEnum = terms.iterator(); if (termsEnum.seekExact(indexedBytes)) { docs = termsEnum.postings(null, PostingsEnum.ALL); } else { docs = null; } } else { docs = null; } if (docs == null) { // dummy PostingsEnum so floatVal() can work // when would this be called? if field/val did not match? this is called for every doc? create once and cache? docs = new PostingsEnum() { @Override public int freq() { return 0; } @Override public int nextPosition() throws IOException { return -1; } @Override public int startOffset() throws IOException { return -1; } @Override public int endOffset() throws IOException { return -1; } @Override public BytesRef getPayload() throws IOException { return null; } @Override public int docID() { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int nextDoc() { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) { return DocIdSetIterator.NO_MORE_DOCS; } @Override public long cost() { return 0; } }; } atDoc = -1; } @Override public float floatVal(int doc) { try { if (doc < lastDocRequested) { // out-of-order access.... reset reset(); } lastDocRequested = doc; if (atDoc < doc) { atDoc = docs.advance(doc); } if (atDoc > doc) { // term doesn't match this document... either because we hit the // end, or because the next doc is after this doc. return defaultValues.floatVal(doc); } // a match! int freq = docs.freq(); int numPayloadsSeen = 0; float currentScore = 0; for (int i = 0; i < freq; i++) { docs.nextPosition(); BytesRef payload = docs.getPayload(); if (payload != null) { float payloadVal = decoder.decode(atDoc, docs.startOffset(), docs.endOffset(), payload); // payloadFunction = null represents "first" if (payloadFunction == null) return payloadVal; currentScore = payloadFunction.currentScore(doc, indexedField, docs.startOffset(), docs.endOffset(), numPayloadsSeen, currentScore, payloadVal); numPayloadsSeen++; } } return (numPayloadsSeen > 0) ? payloadFunction.docScore(doc, indexedField, numPayloadsSeen, currentScore) : defaultValues.floatVal(doc); } catch (IOException e) { throw new RuntimeException("caught exception in function " + description() + " : doc=" + doc, e); } } }; }
From source file:org.apache.solr.search.function.TermFreqValueSource.java
License:Apache License
@Override public DocValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { Fields fields = readerContext.reader.fields(); final Terms terms = fields.terms(field); return new IntDocValues(this) { DocsEnum docs;/*from w ww .j av a 2 s. c om*/ int atDoc; int lastDocRequested = -1; { reset(); } public void reset() throws IOException { // no one should call us for deleted docs? docs = terms.docs(null, indexedBytes, null); if (docs == null) { docs = new DocsEnum() { @Override public int freq() { return 0; } @Override public int docID() { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int nextDoc() throws IOException { return DocIdSetIterator.NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { return DocIdSetIterator.NO_MORE_DOCS; } }; } atDoc = -1; } @Override public int intVal(int doc) { try { if (doc < lastDocRequested) { // out-of-order access.... reset reset(); } lastDocRequested = doc; if (atDoc < doc) { atDoc = docs.advance(doc); } if (atDoc > doc) { // term doesn't match this document... either because we hit the // end, or because the next doc is after this doc. return 0; } // a match! return docs.freq(); } catch (IOException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "caught exception in function " + description() + " : doc=" + doc, e); } } }; }
From source file:org.apache.solr.search.SolrIndexSearcher.java
License:Apache License
/** * Returns the first document number containing the term <code>t</code> * Returns -1 if no document was found./*from ww w . ja v a 2 s. c o m*/ * This method is primarily intended for clients that want to fetch * documents using a unique identifier." * @return the first document number containing the term */ public int getFirstMatch(Term t) throws IOException { Fields fields = atomicReader.fields(); if (fields == null) return -1; Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes(); final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(termBytes)) { return -1; } DocsEnum docs = termsEnum.docs(atomicReader.getLiveDocs(), null, DocsEnum.FLAG_NONE); if (docs == null) return -1; int id = docs.nextDoc(); return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; }
From source file:org.apache.solr.search.TestRTGBase.java
License:Apache License
protected int getFirstMatch(IndexReader r, Term t) throws IOException { Fields fields = MultiFields.getFields(r); if (fields == null) return -1; Terms terms = fields.terms(t.field()); if (terms == null) return -1; BytesRef termBytes = t.bytes();/* w w w . j av a2 s . co m*/ final TermsEnum termsEnum = terms.iterator(null); if (!termsEnum.seekExact(termBytes)) { return -1; } DocsEnum docs = termsEnum.docs(MultiFields.getLiveDocs(r), null, DocsEnum.FLAG_NONE); int id = docs.nextDoc(); if (id != DocIdSetIterator.NO_MORE_DOCS) { int next = docs.nextDoc(); assertEquals(DocIdSetIterator.NO_MORE_DOCS, next); } return id == DocIdSetIterator.NO_MORE_DOCS ? -1 : id; }
From source file:org.apache.solr.update.SolrIndexSplitter.java
License:Apache License
OpenBitSet[] split(AtomicReaderContext readerContext) throws IOException { AtomicReader reader = readerContext.reader(); OpenBitSet[] docSets = new OpenBitSet[numPieces]; for (int i = 0; i < docSets.length; i++) { docSets[i] = new OpenBitSet(reader.maxDoc()); }//ww w. j av a 2 s .com Bits liveDocs = reader.getLiveDocs(); Fields fields = reader.fields(); Terms terms = fields == null ? null : fields.terms(field.getName()); TermsEnum termsEnum = terms == null ? null : terms.iterator(null); if (termsEnum == null) return docSets; BytesRef term = null; DocsEnum docsEnum = null; CharsRef idRef = new CharsRef(100); for (;;) { term = termsEnum.next(); if (term == null) break; // figure out the hash for the term // FUTURE: if conversion to strings costs too much, we could // specialize and use the hash function that can work over bytes. idRef = field.getType().indexedToReadable(term, idRef); String idString = idRef.toString(); if (splitKey != null) { // todo have composite routers support these kind of things instead String part1 = getRouteKey(idString); if (part1 == null) continue; if (!splitKey.equals(part1)) { continue; } } int hash = 0; if (hashRouter != null) { hash = hashRouter.sliceHash(idString, null, null, null); } docsEnum = termsEnum.docs(liveDocs, docsEnum, DocsEnum.FLAG_NONE); for (;;) { int doc = docsEnum.nextDoc(); if (doc == DocsEnum.NO_MORE_DOCS) break; if (ranges == null) { docSets[currPartition].fastSet(doc); currPartition = (currPartition + 1) % numPieces; } else { for (int i = 0; i < rangesArr.length; i++) { // inner-loop: use array here for extra speed. if (rangesArr[i].includes(hash)) { docSets[i].fastSet(doc); } } } } } return docSets; }