List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:nmsu.cs.TFIDFVector.java
License:Open Source License
/** * calculate likelihood from the index/*from ww w .j av a2 s . c o m*/ * @param indexDir * @param lambda */ public void calLikelihoodFromIndex(String indexDir, double lambda) { try { IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexDir))); IndexSearcher is = new IndexSearcher(ir); int numDocs = ir.maxDoc(); double LLH = 0; //vocabulary list List<String> vocab = new ArrayList<String>(); TermEnum te = ir.terms(); //create vocabulary while (te.next()) { String term = te.term().text(); // System.out.println(term); vocab.add(term); } TFIDFVector.vocabulary = vocab; //dataset id to index id Map<Integer, Integer> idMap = new HashMap<Integer, Integer>(); for (int i = 0; i < numDocs; i++) { Document doc = ir.document(i); idMap.put(Integer.parseInt(doc.get("docid")), i); } //o -> a -> o' Map<Integer, Map<Integer, Map<Integer, Double>>> cosineSimMap = new HashMap<Integer, Map<Integer, Map<Integer, Double>>>(); // (o | o') dataset id -> tfidf vector Map<Integer, TFIDFVector> docVectorMap = new HashMap<Integer, TFIDFVector>(); // o -> a -> vector Map<Integer, Map<Integer, TFIDFVector>> docAspectVectorMap = new HashMap<Integer, Map<Integer, TFIDFVector>>(); Set<Integer> citedSet = new HashSet<Integer>(); //for all citing document for (Map.Entry<Integer, List<Integer>> entry : rawdata.pubId2CiteIds.entrySet()) {//llh for citing documents int citingDatasetID = entry.getKey(); int citingIndexID = idMap.get(citingDatasetID); //set up citing document vector TFIDFVector citingVector = BaseLineMethod.getFullTextTFIDFVector(docVectorMap, ir, citingDatasetID, citingIndexID, numDocs); float sum = citingVector.sum(); // System.out.println(Debugger.getCallerPosition()+" "+citingDatasetID); List<Integer> refList = entry.getValue(); //for all aspects for (Integer aspectID : rawdata.id2Aspect.keySet()) { String aspect = rawdata.id2Aspect.get(aspectID); //set up citing document aspect vector double aspectSim = 0; if (rawdata.id2Docs.get(citingDatasetID).getText().get(aspectID).length() != 0) { TFIDFVector citingAspectVector = BaseLineMethod.getAspectTFIDFVector(docAspectVectorMap, ir, citingDatasetID, citingIndexID, aspectID, numDocs); citingAspectVector.normalizedBy(sum); int refSize = refList.size(); TFIDFVector[] citedVectors = new TFIDFVector[refSize]; double[] cosineSims = new double[refSize]; int count = 0; //for all cited documents of this citing document for (Integer citedDatasetID : refList) { citedSet.add(citedDatasetID); //set up cited document vector int citedIndexID = idMap.get(citedDatasetID); TFIDFVector citedVector = BaseLineMethod.getFullTextTFIDFVector(docVectorMap, ir, citedDatasetID, citedIndexID, numDocs); citedVector.normalize(); aspectSim = TFIDFVector.computeCosineSim(citedVector, citingAspectVector); // System.out.println(Debugger.getCallerPosition()+"\t\t"+aspectSim); System.out.println( citingDatasetID + "\t" + aspectID + "\t" + citedDatasetID + "\t" + aspectSim); citedVectors[count] = citedVector; cosineSims[count] = aspectSim; count++; } double aspectLLH = citingAspectVector.posteriorLLH(citedVectors, cosineSims, lambda); LLH += aspectLLH; } // Util.update3Map(cosineSimMap, citingDatasetID, aspectID, citedDatasetID, aspectSim); } } for (Integer citedDatasetID : citedSet) { int citedIndexID = idMap.get(citedDatasetID); TFIDFVector citedVector = BaseLineMethod.getFullTextTFIDFVector(docVectorMap, ir, citedDatasetID, citedIndexID, numDocs); citedVector.normalize(); LLH += citedVector.priorLLH(); } System.out.println(LLH); is.close(); ir.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java
License:Open Source License
public List<AlfrescoSpellCheckCollation> collate(SpellingResult result, String originalQuery, ResponseBuilder ultimateResponse) { List<AlfrescoSpellCheckCollation> collations = new ArrayList<>(); QueryComponent queryComponent = null; if (ultimateResponse.components != null) { for (SearchComponent sc : ultimateResponse.components) { if (sc instanceof QueryComponent) { queryComponent = (QueryComponent) sc; break; }//from w w w.ja v a 2s . c o m } } boolean verifyCandidateWithQuery = true; int maxTries = maxCollationTries; int maxNumberToIterate = maxTries; if (maxTries < 1) { maxTries = 1; maxNumberToIterate = maxCollations; verifyCandidateWithQuery = false; } if (queryComponent == null && verifyCandidateWithQuery) { LOG.info( "Could not find an instance of QueryComponent. Disabling collation verification against the index."); maxTries = 1; verifyCandidateWithQuery = false; } docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0; int maxDocId = -1; if (verifyCandidateWithQuery && docCollectionLimit > 0) { IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader(); maxDocId = reader.maxDoc(); } JSONObject alfrescoJSON = (JSONObject) ultimateResponse.req.getContext().get(AbstractQParser.ALFRESCO_JSON); String originalAftsQuery = alfrescoJSON != null ? alfrescoJSON.getString("query") : ultimateResponse.getQueryString(); int tryNo = 0; int collNo = 0; PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate, maxCollationEvaluations, suggestionsMayOverlap); while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) { PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next(); String collationQueryStr = getCollation(originalQuery, possibility.corrections); int hits = 0; String aftsQuery = null; if (verifyCandidateWithQuery) { tryNo++; SolrQueryRequest req = ultimateResponse.req; SolrParams origParams = req.getParams(); ModifiableSolrParams params = new ModifiableSolrParams(origParams); Iterator<String> origParamIterator = origParams.getParameterNamesIterator(); int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length(); while (origParamIterator.hasNext()) { String origParamName = origParamIterator.next(); if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE) && origParamName.length() > pl) { String[] val = origParams.getParams(origParamName); if (val.length == 1 && val[0].length() == 0) { params.set(origParamName.substring(pl), (String[]) null); } else { params.set(origParamName.substring(pl), val); } } } // we don't set the 'q' param, as we'll pass the query via JSON. // params.set(CommonParams.Q, collationQueryStr); params.remove(CommonParams.START); params.set(CommonParams.ROWS, "" + docCollectionLimit); // we don't want any stored fields params.set(CommonParams.FL, "id"); // we'll sort by doc id to ensure no scoring is done. params.set(CommonParams.SORT, "_docid_ asc"); // If a dismax query, don't add unnecessary clauses for scoring params.remove(DisMaxParams.TIE); params.remove(DisMaxParams.PF); params.remove(DisMaxParams.PF2); params.remove(DisMaxParams.PF3); params.remove(DisMaxParams.BQ); params.remove(DisMaxParams.BF); // Collate testing does not support Grouping (see SOLR-2577) params.remove(GroupParams.GROUP); boolean useQStr = true; if (alfrescoJSON != null) { try { aftsQuery = originalAftsQuery.replaceAll(Pattern.quote(originalQuery), Matcher.quoteReplacement(collationQueryStr)); alfrescoJSON.put("query", aftsQuery); req.getContext().put(AbstractQParser.ALFRESCO_JSON, alfrescoJSON); useQStr = false; } catch (JSONException e) { LOG.warn("Exception trying to get/set the query from/to ALFRESCO_JSON.]" + e); } } else { aftsQuery = collationQueryStr; } req.setParams(params); // creating a request here... make sure to close it! ResponseBuilder checkResponse = new ResponseBuilder(req, new SolrQueryResponse(), Arrays.<SearchComponent>asList(queryComponent)); checkResponse.setQparser(ultimateResponse.getQparser()); checkResponse.setFilters(ultimateResponse.getFilters()); checkResponse.components = Arrays.<SearchComponent>asList(queryComponent); if (useQStr) { checkResponse.setQueryString(collationQueryStr); } try { queryComponent.prepare(checkResponse); if (docCollectionLimit > 0) { int f = checkResponse.getFieldFlags(); checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY); } queryComponent.process(checkResponse); hits = (Integer) checkResponse.rsp.getToLog().get("hits"); } catch (EarlyTerminatingCollectorException etce) { assert (docCollectionLimit > 0); assert 0 < etce.getNumberScanned(); assert 0 < etce.getNumberCollected(); if (etce.getNumberScanned() == maxDocId) { hits = etce.getNumberCollected(); } else { hits = (int) (((float) (maxDocId * etce.getNumberCollected())) / (float) etce.getNumberScanned()); } } catch (Exception e) { LOG.warn( "Exception trying to re-query to check if a spell check possibility would return any hits." + e); } finally { checkResponse.req.close(); } } if (hits > 0 || !verifyCandidateWithQuery) { collNo++; AlfrescoSpellCheckCollation collation = new AlfrescoSpellCheckCollation(); collation.setCollationQuery(aftsQuery); collation.setCollationQueryString(collationQueryStr); collation.setHits(hits); collation.setInternalRank( suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank); NamedList<String> misspellingsAndCorrections = new NamedList<>(); for (SpellCheckCorrection corr : possibility.corrections) { misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection()); } collation.setMisspellingsAndCorrections(misspellingsAndCorrections); collations.add(collation); } if (LOG.isDebugEnabled()) { LOG.debug("Collation: " + aftsQuery + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : "")); } } return collations; }
From source file:org.apache.blur.manager.IndexManager.java
License:Apache License
public static void fetchRow(IndexReader reader, String table, String shard, Selector selector, FetchResult fetchResult, Query highlightQuery, FieldManager fieldManager, int maxHeap, TableContext tableContext, Filter filter) throws CorruptIndexException, IOException { try {//from ww w .j a va2 s . c om fetchResult.table = table; String locationId = selector.locationId; int lastSlash = locationId.lastIndexOf('/'); int docId = Integer.parseInt(locationId.substring(lastSlash + 1)); if (docId >= reader.maxDoc()) { throw new RuntimeException( "Location id [" + locationId + "] with docId [" + docId + "] is not valid."); } boolean returnIdsOnly = false; if (selector.columnFamiliesToFetch != null && selector.columnsToFetch != null && selector.columnFamiliesToFetch.isEmpty() && selector.columnsToFetch.isEmpty()) { // exit early returnIdsOnly = true; } Tracer t1 = Trace.trace("fetchRow - live docs"); Bits liveDocs = MultiFields.getLiveDocs(reader); t1.done(); ResetableDocumentStoredFieldVisitor fieldVisitor = getFieldSelector(selector); if (selector.isRecordOnly()) { // select only the row for the given data or location id. if (isFiltered(docId, reader, filter)) { fetchResult.exists = false; fetchResult.deleted = false; return; } else if (liveDocs != null && !liveDocs.get(docId)) { fetchResult.exists = false; fetchResult.deleted = true; return; } else { fetchResult.exists = true; fetchResult.deleted = false; reader.document(docId, fieldVisitor); Document document = fieldVisitor.getDocument(); if (highlightQuery != null && fieldManager != null) { HighlightOptions highlightOptions = selector.getHighlightOptions(); String preTag = highlightOptions.getPreTag(); String postTag = highlightOptions.getPostTag(); try { document = HighlightHelper.highlight(docId, document, highlightQuery, fieldManager, reader, preTag, postTag); } catch (InvalidTokenOffsetsException e) { LOG.error("Unknown error while tring to highlight", e); } } fieldVisitor.reset(); fetchResult.recordResult = getRecord(document); return; } } else { Tracer trace = Trace.trace("fetchRow - Row read"); try { if (liveDocs != null && !liveDocs.get(docId)) { fetchResult.exists = false; fetchResult.deleted = true; return; } else { fetchResult.exists = true; fetchResult.deleted = false; if (returnIdsOnly) { String rowId = selector.getRowId(); if (rowId == null) { rowId = getRowId(reader, docId); } fetchResult.rowResult = new FetchRowResult(); fetchResult.rowResult.row = new Row(rowId, null); } else { List<Document> docs; AtomicBoolean moreDocsToFetch = new AtomicBoolean(false); AtomicInteger totalRecords = new AtomicInteger(); BlurHighlighter highlighter = new BlurHighlighter(highlightQuery, fieldManager, selector); Tracer docTrace = Trace.trace("fetchRow - Document read"); docs = BlurUtil.fetchDocuments(reader, fieldVisitor, selector, maxHeap, table + "/" + shard, tableContext.getDefaultPrimeDocTerm(), filter, moreDocsToFetch, totalRecords, highlighter); docTrace.done(); Tracer rowTrace = Trace.trace("fetchRow - Row create"); Row row = getRow(docs); if (row == null) { String rowId = selector.getRowId(); if (rowId == null) { rowId = getRowId(reader, docId); } row = new Row(rowId, null); } fetchResult.rowResult = new FetchRowResult(row, selector.getStartRecord(), selector.getMaxRecordsToFetch(), moreDocsToFetch.get(), totalRecords.get()); rowTrace.done(); } return; } } finally { trace.done(); } } } finally { if (fetchResult.rowResult != null) { if (fetchResult.rowResult.row != null && fetchResult.rowResult.row.records != null) { _readRecordsMeter.mark(fetchResult.rowResult.row.records.size()); } _readRowMeter.mark(); } else if (fetchResult.recordResult != null) { _readRecordsMeter.mark(); } } }
From source file:org.apache.gaelucene.tools.LuceneIndexPushUtil.java
License:Apache License
public static void main(String[] args) throws IOException { for (int i = 0; i < args.length; i++) { if ("-app-url".equals(args[i])) { gaeAppURL = args[++i];//from w w w .j a va 2 s.c o m } else if ("-auth-cookie".equals(args[i])) { authCookie = args[++i]; } else if ("-src".equals(args[i])) { sourceDirName = args[++i]; } else if ("-cat".equals(args[i])) { category = args[++i]; } else if ("-rec-file".equals(args[i])) { jobRecFileName = args[++i]; } } if (gaeAppURL == null || authCookie == null || sourceDirName == null || category == null || jobRecFileName == null) { System.err.println(USAGE); System.exit(-1); } File sourceDir = new File(sourceDirName); if (!sourceDir.exists()) { System.err.println("'" + sourceDir.getAbsolutePath() + "' DOES NOT EXIST!"); System.exit(-1); } sourceDirName = sourceDir.getAbsolutePath(); // load filenames that have been uploaded successfully last time. HashSet<String> uploadedRec = new HashSet<String>(); File jobRecFile = new File(jobRecFileName); if (jobRecFile.exists()) { LineNumberReader reader = new LineNumberReader(new FileReader(jobRecFile)); for (String line = reader.readLine(); line != null;) { if (line.indexOf(" OK") > -1) { line = line.substring(0, line.indexOf(" ")).trim(); } uploadedRec.add(line); line = reader.readLine(); } reader.close(); } System.out.println("[INFO ] - trying to open index under " + sourceDirName); IndexReader indexReader = IndexReader.open(sourceDir); int maxDoc = indexReader.maxDoc(); int numDocs = indexReader.numDocs(); long version = indexReader.getVersion(); boolean hasDeletions = indexReader.hasDeletions(); boolean isOptimized = indexReader.isOptimized(); System.out.println("maxDoc:" + maxDoc); System.out.println("numDocs:" + numDocs); System.out.println("version:" + version); System.out.println("hasDeletions:" + hasDeletions); System.out.println("isOptimized:" + isOptimized); // record filenames that were uploaded successfully BufferedWriter dataWriter = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(jobRecFile, true))); System.out.println("[INFO ] - trying to synchronize the index files onto gae..."); File[] files = sourceDir.listFiles(); for (int i = 0; i < files.length; i++) { File file = files[i]; if (uploadedRec.contains(file.getName())) { System.out.println("[INFO ] - skip file '" + file.getName() + "'"); continue; } try { commitFile(file, category, version, i); dataWriter.write(file.getName() + " OK\n"); } catch (IOException ioe) { System.out.println("[WARN ] - failed to upload '" + file.getName() + "', because:" + ioe); } } dataWriter.flush(); dataWriter.close(); System.out.println("[INFO ] - trying to activate the index..."); try { activateIndex(category, version); } catch (IOException ioe) { System.out.println("[WARN ] - failed to activate the index, because:" + ioe); } }
From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java
License:Apache License
/** * Creates a new <code>CachingIndexReader</code> based on * <code>delegatee</code>// ww w . j a v a 2s.c o m * * @param delegatee the base <code>IndexReader</code>. * @param cache a document number cache, or <code>null</code> if not * available to this reader. * @param initCache if the {@link #parents} cache should be initialized * when this index reader is constructed. Otherwise * initialization happens in a background thread. * @throws IOException if an error occurs while reading from the index. */ CachingIndexReader(IndexReader delegatee, DocNumberCache cache, boolean initCache) throws IOException { super(delegatee); this.cache = cache; this.parents = new DocId[delegatee.maxDoc()]; this.cacheInitializer = new CacheInitializer(delegatee); if (initCache) { cacheInitializer.run(); } else { try { SERIAL_EXECUTOR.execute(cacheInitializer); } catch (InterruptedException e) { // ignore } } }
From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java
License:Apache License
/** * Creates a new <code>CachingIndexReader</code> based on * <code>delegatee</code>/*from w w w. j a v a2 s .c o m*/ * * @param delegatee the base <code>IndexReader</code>. * @param cache a document number cache, or <code>null</code> if not * available to this reader. * @throws IOException if an error occurs while reading from the index. */ CachingIndexReader(IndexReader delegatee, DocNumberCache cache) throws IOException { super(delegatee); this.cache = cache; parents = new DocId[delegatee.maxDoc()]; initializeParents(delegatee); }
From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java
License:Apache License
/** * Initializes the {@link #parents} <code>DocId</code> array. * * @param reader the underlying index reader. * @throws IOException if an error occurs while reading from the index. *///w ww . jav a2 s. c om private void initializeParents(IndexReader reader) throws IOException { long time = System.currentTimeMillis(); Map docs = new HashMap(); for (int i = 0; i < reader.maxDoc(); i++) { if (!reader.isDeleted(i)) { Document doc = reader.document(i, FieldSelectors.UUID_AND_PARENT); UUID uuid = UUID.fromString(doc.get(FieldNames.UUID)); UUID parent = null; try { parent = UUID.fromString(doc.get(FieldNames.PARENT)); } catch (IllegalArgumentException e) { // root node does not have a parent } NodeInfo info = new NodeInfo(i, uuid, parent); docs.put(uuid, info); } } double foreignParents = 0; Iterator it = docs.values().iterator(); while (it.hasNext()) { NodeInfo info = (NodeInfo) it.next(); NodeInfo parent = (NodeInfo) docs.get(info.parent); if (parent != null) { parents[info.docId] = DocId.create(parent.docId); } else if (info.parent != null) { foreignParents++; parents[info.docId] = DocId.create(info.parent); } else { // no parent -> root node parents[info.docId] = DocId.NULL; } } if (log.isDebugEnabled()) { NumberFormat nf = NumberFormat.getPercentInstance(); nf.setMaximumFractionDigits(1); time = System.currentTimeMillis() - time; if (parents.length > 0) { foreignParents /= parents.length; } log.debug("initialized {} DocIds in {} ms, {} foreign parents", new Object[] { new Integer(parents.length), new Long(time), nf.format(foreignParents) }); } }
From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java
License:Apache License
/** * Returns a read-only index reader, that can be used concurrently with * other threads writing to this index. The returned index reader is * read-only, that is, any attempt to delete a document from the index * will throw an <code>UnsupportedOperationException</code>. * * @return a read-only index reader.//from www. ja v a2s .c o m * @throws IOException if an error occurs while obtaining the index reader. */ synchronized ReadOnlyIndexReader getReadOnlyIndexReader() throws IOException { // get current modifiable index reader IndexReader modifiableReader = getIndexReader(); // capture snapshot of deleted documents BitSet deleted = new BitSet(modifiableReader.maxDoc()); for (int i = 0; i < modifiableReader.maxDoc(); i++) { if (modifiableReader.isDeleted(i)) { deleted.set(i); } } if (sharedReader == null) { // create new shared reader CachingIndexReader cr = new CachingIndexReader(IndexReader.open(getDirectory()), cache); sharedReader = new SharedIndexReader(cr); } return new ReadOnlyIndexReader(sharedReader, deleted); }
From source file:org.apache.jackrabbit.core.query.lucene.SharedFieldCache.java
License:Apache License
/** * Creates a <code>ValueIndex</code> for a <code>field</code> and a term * <code>prefix</code>. The term prefix acts as the property name for the * shared <code>field</code>. * <p/>//w w w . j a v a2 s . c om * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code> * * @param reader the <code>IndexReader</code>. * @param field name of the shared field. * @param prefix the property name, will be used as term prefix. * @param comparator the sort comparator instance. * @return a ValueIndex that contains the field values and order * information. * @throws IOException if an error occurs while reading from the index. */ public ValueIndex getValueIndex(IndexReader reader, String field, String prefix, SortComparator comparator) throws IOException { if (reader instanceof ReadOnlyIndexReader) { reader = ((ReadOnlyIndexReader) reader).getBase(); } field = field.intern(); ValueIndex ret = lookup(reader, field, prefix, comparator); if (ret == null) { Comparable[] retArray = new Comparable[reader.maxDoc()]; int setValues = 0; if (retArray.length > 0) { IndexFormatVersion version = IndexFormatVersion.getVersion(reader); boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3); TermDocs termDocs; byte[] payload = null; int type; if (hasPayloads) { termDocs = reader.termPositions(); payload = new byte[1]; } else { termDocs = reader.termDocs(); } TermEnum termEnum = reader.terms(new Term(field, prefix)); char[] tmp = new char[16]; try { if (termEnum.term() == null) { throw new RuntimeException("no terms in field " + field); } do { Term term = termEnum.term(); if (term.field() != field || !term.text().startsWith(prefix)) { break; } // make sure term is compacted String text = term.text(); int len = text.length() - prefix.length(); if (tmp.length < len) { // grow tmp tmp = new char[len]; } text.getChars(prefix.length(), text.length(), tmp, 0); String value = new String(tmp, 0, len); termDocs.seek(termEnum); while (termDocs.next()) { type = PropertyType.UNDEFINED; if (hasPayloads) { TermPositions termPos = (TermPositions) termDocs; termPos.nextPosition(); if (termPos.isPayloadAvailable()) { payload = termPos.getPayload(payload, 0); type = PropertyMetaData.fromByteArray(payload).getPropertyType(); } } setValues++; retArray[termDocs.doc()] = getValue(value, type); } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } } ValueIndex value = new ValueIndex(retArray, setValues); store(reader, field, prefix, comparator, value); return value; } return ret; }
From source file:org.apache.jackrabbit.core.query.lucene.SharedFieldCache.java
License:Apache License
/** * Creates a <code>StringIndex</code> for a <code>field</code> and a term * <code>prefix</code>. The term prefix acts as the property name for the * shared <code>field</code>. * <p/>/*from w w w . j a va 2s . com*/ * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code> * The returned string index will <b>not</b> have a term lookup array! * See {@link SharedFieldSortComparator} for more info. * * @param reader the <code>IndexReader</code>. * @param field name of the shared field. * @param prefix the property name, will be used as term prefix. * @param comparator the sort comparator instance. * @return a StringIndex that contains the field values and order * information. * @throws IOException if an error occurs while reading from the index. */ public SharedFieldCache.StringIndex getStringIndex(IndexReader reader, String field, String prefix, SortComparator comparator) throws IOException { if (reader instanceof ReadOnlyIndexReader) { reader = ((ReadOnlyIndexReader) reader).getBase(); } field = field.intern(); SharedFieldCache.StringIndex ret = lookup(reader, field, prefix, comparator); if (ret == null) { final String[] retArray = new String[reader.maxDoc()]; int setValues = 0; if (retArray.length > 0) { TermDocs termDocs = reader.termDocs(); TermEnum termEnum = reader.terms(new Term(field, prefix)); char[] tmp = new char[16]; try { if (termEnum.term() == null) { throw new RuntimeException("no terms in field " + field); } do { Term term = termEnum.term(); if (term.field() != field || !term.text().startsWith(prefix)) { break; } // make sure term is compacted String text = term.text(); int len = text.length() - prefix.length(); if (tmp.length < len) { // grow tmp tmp = new char[len]; } text.getChars(prefix.length(), text.length(), tmp, 0); String value = new String(tmp, 0, len); termDocs.seek(termEnum); while (termDocs.next()) { setValues++; retArray[termDocs.doc()] = value; } } while (termEnum.next()); } finally { termDocs.close(); termEnum.close(); } } SharedFieldCache.StringIndex value = new SharedFieldCache.StringIndex(retArray, setValues); store(reader, field, prefix, comparator, value); return value; } return ret; }