Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:nmsu.cs.TFIDFVector.java

License:Open Source License

/**
 * calculate likelihood from the index/*from ww w .j av  a2 s . c o  m*/
  * @param indexDir
 * @param lambda
 */
public void calLikelihoodFromIndex(String indexDir, double lambda) {
    try {
        IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexDir)));
        IndexSearcher is = new IndexSearcher(ir);
        int numDocs = ir.maxDoc();

        double LLH = 0;

        //vocabulary list
        List<String> vocab = new ArrayList<String>();

        TermEnum te = ir.terms();
        //create vocabulary
        while (te.next()) {
            String term = te.term().text();
            //            System.out.println(term);
            vocab.add(term);
        }
        TFIDFVector.vocabulary = vocab;

        //dataset id to index id
        Map<Integer, Integer> idMap = new HashMap<Integer, Integer>();

        for (int i = 0; i < numDocs; i++) {
            Document doc = ir.document(i);
            idMap.put(Integer.parseInt(doc.get("docid")), i);
        }

        //o -> a -> o'
        Map<Integer, Map<Integer, Map<Integer, Double>>> cosineSimMap = new HashMap<Integer, Map<Integer, Map<Integer, Double>>>();
        // (o | o') dataset id -> tfidf vector
        Map<Integer, TFIDFVector> docVectorMap = new HashMap<Integer, TFIDFVector>();
        // o -> a -> vector
        Map<Integer, Map<Integer, TFIDFVector>> docAspectVectorMap = new HashMap<Integer, Map<Integer, TFIDFVector>>();

        Set<Integer> citedSet = new HashSet<Integer>();
        //for all citing document
        for (Map.Entry<Integer, List<Integer>> entry : rawdata.pubId2CiteIds.entrySet()) {//llh for citing documents
            int citingDatasetID = entry.getKey();
            int citingIndexID = idMap.get(citingDatasetID);

            //set up citing document vector
            TFIDFVector citingVector = BaseLineMethod.getFullTextTFIDFVector(docVectorMap, ir, citingDatasetID,
                    citingIndexID, numDocs);
            float sum = citingVector.sum();

            //            System.out.println(Debugger.getCallerPosition()+" "+citingDatasetID);

            List<Integer> refList = entry.getValue();
            //for all aspects
            for (Integer aspectID : rawdata.id2Aspect.keySet()) {
                String aspect = rawdata.id2Aspect.get(aspectID);
                //set up citing document aspect vector
                double aspectSim = 0;
                if (rawdata.id2Docs.get(citingDatasetID).getText().get(aspectID).length() != 0) {
                    TFIDFVector citingAspectVector = BaseLineMethod.getAspectTFIDFVector(docAspectVectorMap, ir,
                            citingDatasetID, citingIndexID, aspectID, numDocs);
                    citingAspectVector.normalizedBy(sum);

                    int refSize = refList.size();
                    TFIDFVector[] citedVectors = new TFIDFVector[refSize];
                    double[] cosineSims = new double[refSize];
                    int count = 0;

                    //for all cited documents of this citing document
                    for (Integer citedDatasetID : refList) {
                        citedSet.add(citedDatasetID);
                        //set up cited document vector
                        int citedIndexID = idMap.get(citedDatasetID);
                        TFIDFVector citedVector = BaseLineMethod.getFullTextTFIDFVector(docVectorMap, ir,
                                citedDatasetID, citedIndexID, numDocs);
                        citedVector.normalize();

                        aspectSim = TFIDFVector.computeCosineSim(citedVector, citingAspectVector);
                        //                     System.out.println(Debugger.getCallerPosition()+"\t\t"+aspectSim);
                        System.out.println(
                                citingDatasetID + "\t" + aspectID + "\t" + citedDatasetID + "\t" + aspectSim);
                        citedVectors[count] = citedVector;
                        cosineSims[count] = aspectSim;
                        count++;
                    }
                    double aspectLLH = citingAspectVector.posteriorLLH(citedVectors, cosineSims, lambda);
                    LLH += aspectLLH;
                }
                //                  Util.update3Map(cosineSimMap, citingDatasetID, aspectID, citedDatasetID, aspectSim);
            }
        }

        for (Integer citedDatasetID : citedSet) {
            int citedIndexID = idMap.get(citedDatasetID);
            TFIDFVector citedVector = BaseLineMethod.getFullTextTFIDFVector(docVectorMap, ir, citedDatasetID,
                    citedIndexID, numDocs);
            citedVector.normalize();
            LLH += citedVector.priorLLH();
        }

        System.out.println(LLH);
        is.close();
        ir.close();

    } catch (CorruptIndexException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
}

From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java

License:Open Source License

public List<AlfrescoSpellCheckCollation> collate(SpellingResult result, String originalQuery,
        ResponseBuilder ultimateResponse) {
    List<AlfrescoSpellCheckCollation> collations = new ArrayList<>();

    QueryComponent queryComponent = null;
    if (ultimateResponse.components != null) {
        for (SearchComponent sc : ultimateResponse.components) {
            if (sc instanceof QueryComponent) {
                queryComponent = (QueryComponent) sc;
                break;
            }//from   w w  w.ja v a  2s  .  c  o m
        }
    }

    boolean verifyCandidateWithQuery = true;
    int maxTries = maxCollationTries;
    int maxNumberToIterate = maxTries;
    if (maxTries < 1) {
        maxTries = 1;
        maxNumberToIterate = maxCollations;
        verifyCandidateWithQuery = false;
    }
    if (queryComponent == null && verifyCandidateWithQuery) {
        LOG.info(
                "Could not find an instance of QueryComponent. Disabling collation verification against the index.");
        maxTries = 1;
        verifyCandidateWithQuery = false;
    }
    docCollectionLimit = docCollectionLimit > 0 ? docCollectionLimit : 0;
    int maxDocId = -1;
    if (verifyCandidateWithQuery && docCollectionLimit > 0) {
        IndexReader reader = ultimateResponse.req.getSearcher().getIndexReader();
        maxDocId = reader.maxDoc();
    }

    JSONObject alfrescoJSON = (JSONObject) ultimateResponse.req.getContext().get(AbstractQParser.ALFRESCO_JSON);
    String originalAftsQuery = alfrescoJSON != null ? alfrescoJSON.getString("query")
            : ultimateResponse.getQueryString();

    int tryNo = 0;
    int collNo = 0;
    PossibilityIterator possibilityIter = new PossibilityIterator(result.getSuggestions(), maxNumberToIterate,
            maxCollationEvaluations, suggestionsMayOverlap);
    while (tryNo < maxTries && collNo < maxCollations && possibilityIter.hasNext()) {
        PossibilityIterator.RankedSpellPossibility possibility = possibilityIter.next();
        String collationQueryStr = getCollation(originalQuery, possibility.corrections);
        int hits = 0;
        String aftsQuery = null;

        if (verifyCandidateWithQuery) {
            tryNo++;
            SolrQueryRequest req = ultimateResponse.req;
            SolrParams origParams = req.getParams();
            ModifiableSolrParams params = new ModifiableSolrParams(origParams);
            Iterator<String> origParamIterator = origParams.getParameterNamesIterator();
            int pl = SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE.length();
            while (origParamIterator.hasNext()) {
                String origParamName = origParamIterator.next();
                if (origParamName.startsWith(SpellingParams.SPELLCHECK_COLLATE_PARAM_OVERRIDE)
                        && origParamName.length() > pl) {
                    String[] val = origParams.getParams(origParamName);
                    if (val.length == 1 && val[0].length() == 0) {
                        params.set(origParamName.substring(pl), (String[]) null);
                    } else {
                        params.set(origParamName.substring(pl), val);
                    }
                }
            }
            // we don't set the 'q' param, as we'll pass the query via JSON.
            // params.set(CommonParams.Q, collationQueryStr);
            params.remove(CommonParams.START);
            params.set(CommonParams.ROWS, "" + docCollectionLimit);
            // we don't want any stored fields
            params.set(CommonParams.FL, "id");
            // we'll sort by doc id to ensure no scoring is done.
            params.set(CommonParams.SORT, "_docid_ asc");
            // If a dismax query, don't add unnecessary clauses for scoring
            params.remove(DisMaxParams.TIE);
            params.remove(DisMaxParams.PF);
            params.remove(DisMaxParams.PF2);
            params.remove(DisMaxParams.PF3);
            params.remove(DisMaxParams.BQ);
            params.remove(DisMaxParams.BF);
            // Collate testing does not support Grouping (see SOLR-2577)
            params.remove(GroupParams.GROUP);

            boolean useQStr = true;

            if (alfrescoJSON != null) {
                try {
                    aftsQuery = originalAftsQuery.replaceAll(Pattern.quote(originalQuery),
                            Matcher.quoteReplacement(collationQueryStr));
                    alfrescoJSON.put("query", aftsQuery);
                    req.getContext().put(AbstractQParser.ALFRESCO_JSON, alfrescoJSON);
                    useQStr = false;
                } catch (JSONException e) {
                    LOG.warn("Exception trying to get/set the query from/to ALFRESCO_JSON.]" + e);
                }
            } else {
                aftsQuery = collationQueryStr;
            }
            req.setParams(params);
            // creating a request here... make sure to close it!
            ResponseBuilder checkResponse = new ResponseBuilder(req, new SolrQueryResponse(),
                    Arrays.<SearchComponent>asList(queryComponent));
            checkResponse.setQparser(ultimateResponse.getQparser());
            checkResponse.setFilters(ultimateResponse.getFilters());
            checkResponse.components = Arrays.<SearchComponent>asList(queryComponent);
            if (useQStr) {
                checkResponse.setQueryString(collationQueryStr);
            }
            try {
                queryComponent.prepare(checkResponse);
                if (docCollectionLimit > 0) {
                    int f = checkResponse.getFieldFlags();
                    checkResponse.setFieldFlags(f |= SolrIndexSearcher.TERMINATE_EARLY);
                }
                queryComponent.process(checkResponse);
                hits = (Integer) checkResponse.rsp.getToLog().get("hits");
            } catch (EarlyTerminatingCollectorException etce) {
                assert (docCollectionLimit > 0);
                assert 0 < etce.getNumberScanned();
                assert 0 < etce.getNumberCollected();

                if (etce.getNumberScanned() == maxDocId) {
                    hits = etce.getNumberCollected();
                } else {
                    hits = (int) (((float) (maxDocId * etce.getNumberCollected()))
                            / (float) etce.getNumberScanned());
                }
            } catch (Exception e) {
                LOG.warn(
                        "Exception trying to re-query to check if a spell check possibility would return any hits."
                                + e);
            } finally {
                checkResponse.req.close();
            }
        }
        if (hits > 0 || !verifyCandidateWithQuery) {
            collNo++;
            AlfrescoSpellCheckCollation collation = new AlfrescoSpellCheckCollation();
            collation.setCollationQuery(aftsQuery);
            collation.setCollationQueryString(collationQueryStr);
            collation.setHits(hits);
            collation.setInternalRank(
                    suggestionsMayOverlap ? ((possibility.rank * 1000) + possibility.index) : possibility.rank);

            NamedList<String> misspellingsAndCorrections = new NamedList<>();
            for (SpellCheckCorrection corr : possibility.corrections) {
                misspellingsAndCorrections.add(corr.getOriginal().toString(), corr.getCorrection());
            }
            collation.setMisspellingsAndCorrections(misspellingsAndCorrections);
            collations.add(collation);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug("Collation: " + aftsQuery
                    + (verifyCandidateWithQuery ? (" will return " + hits + " hits.") : ""));
        }
    }
    return collations;
}

From source file:org.apache.blur.manager.IndexManager.java

License:Apache License

public static void fetchRow(IndexReader reader, String table, String shard, Selector selector,
        FetchResult fetchResult, Query highlightQuery, FieldManager fieldManager, int maxHeap,
        TableContext tableContext, Filter filter) throws CorruptIndexException, IOException {
    try {//from  ww  w  .j  a va2  s  .  c om
        fetchResult.table = table;
        String locationId = selector.locationId;
        int lastSlash = locationId.lastIndexOf('/');
        int docId = Integer.parseInt(locationId.substring(lastSlash + 1));
        if (docId >= reader.maxDoc()) {
            throw new RuntimeException(
                    "Location id [" + locationId + "] with docId [" + docId + "] is not valid.");
        }

        boolean returnIdsOnly = false;
        if (selector.columnFamiliesToFetch != null && selector.columnsToFetch != null
                && selector.columnFamiliesToFetch.isEmpty() && selector.columnsToFetch.isEmpty()) {
            // exit early
            returnIdsOnly = true;
        }

        Tracer t1 = Trace.trace("fetchRow - live docs");
        Bits liveDocs = MultiFields.getLiveDocs(reader);
        t1.done();
        ResetableDocumentStoredFieldVisitor fieldVisitor = getFieldSelector(selector);
        if (selector.isRecordOnly()) {
            // select only the row for the given data or location id.
            if (isFiltered(docId, reader, filter)) {
                fetchResult.exists = false;
                fetchResult.deleted = false;
                return;
            } else if (liveDocs != null && !liveDocs.get(docId)) {
                fetchResult.exists = false;
                fetchResult.deleted = true;
                return;
            } else {
                fetchResult.exists = true;
                fetchResult.deleted = false;
                reader.document(docId, fieldVisitor);
                Document document = fieldVisitor.getDocument();
                if (highlightQuery != null && fieldManager != null) {
                    HighlightOptions highlightOptions = selector.getHighlightOptions();
                    String preTag = highlightOptions.getPreTag();
                    String postTag = highlightOptions.getPostTag();
                    try {
                        document = HighlightHelper.highlight(docId, document, highlightQuery, fieldManager,
                                reader, preTag, postTag);
                    } catch (InvalidTokenOffsetsException e) {
                        LOG.error("Unknown error while tring to highlight", e);
                    }
                }
                fieldVisitor.reset();
                fetchResult.recordResult = getRecord(document);
                return;
            }
        } else {
            Tracer trace = Trace.trace("fetchRow - Row read");
            try {
                if (liveDocs != null && !liveDocs.get(docId)) {
                    fetchResult.exists = false;
                    fetchResult.deleted = true;
                    return;
                } else {
                    fetchResult.exists = true;
                    fetchResult.deleted = false;
                    if (returnIdsOnly) {
                        String rowId = selector.getRowId();
                        if (rowId == null) {
                            rowId = getRowId(reader, docId);
                        }
                        fetchResult.rowResult = new FetchRowResult();
                        fetchResult.rowResult.row = new Row(rowId, null);
                    } else {
                        List<Document> docs;
                        AtomicBoolean moreDocsToFetch = new AtomicBoolean(false);
                        AtomicInteger totalRecords = new AtomicInteger();
                        BlurHighlighter highlighter = new BlurHighlighter(highlightQuery, fieldManager,
                                selector);
                        Tracer docTrace = Trace.trace("fetchRow - Document read");
                        docs = BlurUtil.fetchDocuments(reader, fieldVisitor, selector, maxHeap,
                                table + "/" + shard, tableContext.getDefaultPrimeDocTerm(), filter,
                                moreDocsToFetch, totalRecords, highlighter);
                        docTrace.done();
                        Tracer rowTrace = Trace.trace("fetchRow - Row create");
                        Row row = getRow(docs);
                        if (row == null) {
                            String rowId = selector.getRowId();
                            if (rowId == null) {
                                rowId = getRowId(reader, docId);
                            }
                            row = new Row(rowId, null);
                        }
                        fetchResult.rowResult = new FetchRowResult(row, selector.getStartRecord(),
                                selector.getMaxRecordsToFetch(), moreDocsToFetch.get(), totalRecords.get());
                        rowTrace.done();
                    }
                    return;
                }
            } finally {
                trace.done();
            }
        }
    } finally {
        if (fetchResult.rowResult != null) {
            if (fetchResult.rowResult.row != null && fetchResult.rowResult.row.records != null) {
                _readRecordsMeter.mark(fetchResult.rowResult.row.records.size());
            }
            _readRowMeter.mark();
        } else if (fetchResult.recordResult != null) {
            _readRecordsMeter.mark();
        }
    }
}

From source file:org.apache.gaelucene.tools.LuceneIndexPushUtil.java

License:Apache License

public static void main(String[] args) throws IOException {
    for (int i = 0; i < args.length; i++) {
        if ("-app-url".equals(args[i])) {
            gaeAppURL = args[++i];//from  w  w  w  .j  a va  2  s.c o m
        } else if ("-auth-cookie".equals(args[i])) {
            authCookie = args[++i];
        } else if ("-src".equals(args[i])) {
            sourceDirName = args[++i];
        } else if ("-cat".equals(args[i])) {
            category = args[++i];
        } else if ("-rec-file".equals(args[i])) {
            jobRecFileName = args[++i];
        }
    }

    if (gaeAppURL == null || authCookie == null || sourceDirName == null || category == null
            || jobRecFileName == null) {
        System.err.println(USAGE);
        System.exit(-1);
    }

    File sourceDir = new File(sourceDirName);
    if (!sourceDir.exists()) {
        System.err.println("'" + sourceDir.getAbsolutePath() + "' DOES NOT EXIST!");
        System.exit(-1);
    }
    sourceDirName = sourceDir.getAbsolutePath();

    // load filenames that have been uploaded successfully last time.
    HashSet<String> uploadedRec = new HashSet<String>();
    File jobRecFile = new File(jobRecFileName);
    if (jobRecFile.exists()) {
        LineNumberReader reader = new LineNumberReader(new FileReader(jobRecFile));
        for (String line = reader.readLine(); line != null;) {
            if (line.indexOf(" OK") > -1) {
                line = line.substring(0, line.indexOf(" ")).trim();
            }
            uploadedRec.add(line);
            line = reader.readLine();
        }
        reader.close();
    }

    System.out.println("[INFO ] - trying to open index under " + sourceDirName);
    IndexReader indexReader = IndexReader.open(sourceDir);
    int maxDoc = indexReader.maxDoc();
    int numDocs = indexReader.numDocs();
    long version = indexReader.getVersion();
    boolean hasDeletions = indexReader.hasDeletions();
    boolean isOptimized = indexReader.isOptimized();

    System.out.println("maxDoc:" + maxDoc);
    System.out.println("numDocs:" + numDocs);
    System.out.println("version:" + version);
    System.out.println("hasDeletions:" + hasDeletions);
    System.out.println("isOptimized:" + isOptimized);

    // record filenames that were uploaded successfully
    BufferedWriter dataWriter = new BufferedWriter(
            new OutputStreamWriter(new FileOutputStream(jobRecFile, true)));
    System.out.println("[INFO ] - trying to synchronize the index files onto gae...");
    File[] files = sourceDir.listFiles();
    for (int i = 0; i < files.length; i++) {
        File file = files[i];
        if (uploadedRec.contains(file.getName())) {
            System.out.println("[INFO ] - skip file '" + file.getName() + "'");
            continue;
        }
        try {
            commitFile(file, category, version, i);
            dataWriter.write(file.getName() + " OK\n");
        } catch (IOException ioe) {
            System.out.println("[WARN ] - failed to upload '" + file.getName() + "', because:" + ioe);
        }
    }
    dataWriter.flush();
    dataWriter.close();

    System.out.println("[INFO ] - trying to activate the index...");
    try {
        activateIndex(category, version);
    } catch (IOException ioe) {
        System.out.println("[WARN ] - failed to activate the index, because:" + ioe);
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Creates a new <code>CachingIndexReader</code> based on
 * <code>delegatee</code>//  ww w . j a  v a  2s.c o  m
 *
 * @param delegatee the base <code>IndexReader</code>.
 * @param cache     a document number cache, or <code>null</code> if not
 *                  available to this reader.
 * @param initCache if the {@link #parents} cache should be initialized
 *                  when this index reader is constructed. Otherwise
 *                  initialization happens in a background thread.
 * @throws IOException if an error occurs while reading from the index.
 */
CachingIndexReader(IndexReader delegatee, DocNumberCache cache, boolean initCache) throws IOException {
    super(delegatee);
    this.cache = cache;
    this.parents = new DocId[delegatee.maxDoc()];
    this.cacheInitializer = new CacheInitializer(delegatee);
    if (initCache) {
        cacheInitializer.run();
    } else {
        try {
            SERIAL_EXECUTOR.execute(cacheInitializer);
        } catch (InterruptedException e) {
            // ignore
        }
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Creates a new <code>CachingIndexReader</code> based on
 * <code>delegatee</code>/*from   w w  w. j  a v  a2 s .c o m*/
 *
 * @param delegatee the base <code>IndexReader</code>.
 * @param cache     a document number cache, or <code>null</code> if not
 *                  available to this reader.
 * @throws IOException if an error occurs while reading from the index.
 */
CachingIndexReader(IndexReader delegatee, DocNumberCache cache) throws IOException {
    super(delegatee);
    this.cache = cache;
    parents = new DocId[delegatee.maxDoc()];
    initializeParents(delegatee);
}

From source file:org.apache.jackrabbit.core.query.lucene.CachingIndexReader.java

License:Apache License

/**
 * Initializes the {@link #parents} <code>DocId</code> array.
 *
 * @param reader the underlying index reader.
 * @throws IOException if an error occurs while reading from the index.
 *///w  ww  .  jav  a2 s.  c  om
private void initializeParents(IndexReader reader) throws IOException {
    long time = System.currentTimeMillis();
    Map docs = new HashMap();
    for (int i = 0; i < reader.maxDoc(); i++) {
        if (!reader.isDeleted(i)) {
            Document doc = reader.document(i, FieldSelectors.UUID_AND_PARENT);
            UUID uuid = UUID.fromString(doc.get(FieldNames.UUID));
            UUID parent = null;
            try {
                parent = UUID.fromString(doc.get(FieldNames.PARENT));
            } catch (IllegalArgumentException e) {
                // root node does not have a parent
            }
            NodeInfo info = new NodeInfo(i, uuid, parent);
            docs.put(uuid, info);
        }
    }
    double foreignParents = 0;
    Iterator it = docs.values().iterator();
    while (it.hasNext()) {
        NodeInfo info = (NodeInfo) it.next();
        NodeInfo parent = (NodeInfo) docs.get(info.parent);
        if (parent != null) {
            parents[info.docId] = DocId.create(parent.docId);
        } else if (info.parent != null) {
            foreignParents++;
            parents[info.docId] = DocId.create(info.parent);
        } else {
            // no parent -> root node
            parents[info.docId] = DocId.NULL;
        }
    }
    if (log.isDebugEnabled()) {
        NumberFormat nf = NumberFormat.getPercentInstance();
        nf.setMaximumFractionDigits(1);
        time = System.currentTimeMillis() - time;
        if (parents.length > 0) {
            foreignParents /= parents.length;
        }
        log.debug("initialized {} DocIds in {} ms, {} foreign parents",
                new Object[] { new Integer(parents.length), new Long(time), nf.format(foreignParents) });
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.AbstractIndex.java

License:Apache License

/**
 * Returns a read-only index reader, that can be used concurrently with
 * other threads writing to this index. The returned index reader is
 * read-only, that is, any attempt to delete a document from the index
 * will throw an <code>UnsupportedOperationException</code>.
 *
 * @return a read-only index reader.//from www.  ja v a2s .c  o  m
 * @throws IOException if an error occurs while obtaining the index reader.
 */
synchronized ReadOnlyIndexReader getReadOnlyIndexReader() throws IOException {
    // get current modifiable index reader
    IndexReader modifiableReader = getIndexReader();
    // capture snapshot of deleted documents
    BitSet deleted = new BitSet(modifiableReader.maxDoc());
    for (int i = 0; i < modifiableReader.maxDoc(); i++) {
        if (modifiableReader.isDeleted(i)) {
            deleted.set(i);
        }
    }
    if (sharedReader == null) {
        // create new shared reader
        CachingIndexReader cr = new CachingIndexReader(IndexReader.open(getDirectory()), cache);
        sharedReader = new SharedIndexReader(cr);
    }
    return new ReadOnlyIndexReader(sharedReader, deleted);
}

From source file:org.apache.jackrabbit.core.query.lucene.SharedFieldCache.java

License:Apache License

/**
 * Creates a <code>ValueIndex</code> for a <code>field</code> and a term
 * <code>prefix</code>. The term prefix acts as the property name for the
 * shared <code>field</code>.
 * <p/>//w w w  . j  a v a2 s .  c  om
 * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
 *
 * @param reader     the <code>IndexReader</code>.
 * @param field      name of the shared field.
 * @param prefix     the property name, will be used as term prefix.
 * @param comparator the sort comparator instance.
 * @return a ValueIndex that contains the field values and order
 *         information.
 * @throws IOException if an error occurs while reading from the index.
 */
public ValueIndex getValueIndex(IndexReader reader, String field, String prefix, SortComparator comparator)
        throws IOException {

    if (reader instanceof ReadOnlyIndexReader) {
        reader = ((ReadOnlyIndexReader) reader).getBase();
    }

    field = field.intern();
    ValueIndex ret = lookup(reader, field, prefix, comparator);
    if (ret == null) {
        Comparable[] retArray = new Comparable[reader.maxDoc()];
        int setValues = 0;
        if (retArray.length > 0) {
            IndexFormatVersion version = IndexFormatVersion.getVersion(reader);
            boolean hasPayloads = version.isAtLeast(IndexFormatVersion.V3);
            TermDocs termDocs;
            byte[] payload = null;
            int type;
            if (hasPayloads) {
                termDocs = reader.termPositions();
                payload = new byte[1];
            } else {
                termDocs = reader.termDocs();
            }
            TermEnum termEnum = reader.terms(new Term(field, prefix));

            char[] tmp = new char[16];
            try {
                if (termEnum.term() == null) {
                    throw new RuntimeException("no terms in field " + field);
                }
                do {
                    Term term = termEnum.term();
                    if (term.field() != field || !term.text().startsWith(prefix)) {
                        break;
                    }

                    // make sure term is compacted
                    String text = term.text();
                    int len = text.length() - prefix.length();
                    if (tmp.length < len) {
                        // grow tmp
                        tmp = new char[len];
                    }
                    text.getChars(prefix.length(), text.length(), tmp, 0);
                    String value = new String(tmp, 0, len);

                    termDocs.seek(termEnum);
                    while (termDocs.next()) {
                        type = PropertyType.UNDEFINED;
                        if (hasPayloads) {
                            TermPositions termPos = (TermPositions) termDocs;
                            termPos.nextPosition();
                            if (termPos.isPayloadAvailable()) {
                                payload = termPos.getPayload(payload, 0);
                                type = PropertyMetaData.fromByteArray(payload).getPropertyType();
                            }
                        }
                        setValues++;
                        retArray[termDocs.doc()] = getValue(value, type);
                    }
                } while (termEnum.next());
            } finally {
                termDocs.close();
                termEnum.close();
            }
        }
        ValueIndex value = new ValueIndex(retArray, setValues);
        store(reader, field, prefix, comparator, value);
        return value;
    }
    return ret;
}

From source file:org.apache.jackrabbit.core.query.lucene.SharedFieldCache.java

License:Apache License

/**
 * Creates a <code>StringIndex</code> for a <code>field</code> and a term
 * <code>prefix</code>. The term prefix acts as the property name for the
 * shared <code>field</code>.
 * <p/>/*from   w w  w .  j  a  va  2s  . com*/
 * This method is an adapted version of: <code>FieldCacheImpl.getStringIndex()</code>
 * The returned string index will <b>not</b> have a term lookup array!
 * See {@link SharedFieldSortComparator} for more info.
 *
 * @param reader     the <code>IndexReader</code>.
 * @param field      name of the shared field.
 * @param prefix     the property name, will be used as term prefix.
 * @param comparator the sort comparator instance.
 * @return a StringIndex that contains the field values and order
 *         information.
 * @throws IOException if an error occurs while reading from the index.
 */
public SharedFieldCache.StringIndex getStringIndex(IndexReader reader, String field, String prefix,
        SortComparator comparator) throws IOException {

    if (reader instanceof ReadOnlyIndexReader) {
        reader = ((ReadOnlyIndexReader) reader).getBase();
    }

    field = field.intern();
    SharedFieldCache.StringIndex ret = lookup(reader, field, prefix, comparator);
    if (ret == null) {
        final String[] retArray = new String[reader.maxDoc()];
        int setValues = 0;
        if (retArray.length > 0) {
            TermDocs termDocs = reader.termDocs();
            TermEnum termEnum = reader.terms(new Term(field, prefix));

            char[] tmp = new char[16];
            try {
                if (termEnum.term() == null) {
                    throw new RuntimeException("no terms in field " + field);
                }
                do {
                    Term term = termEnum.term();
                    if (term.field() != field || !term.text().startsWith(prefix)) {
                        break;
                    }

                    // make sure term is compacted
                    String text = term.text();
                    int len = text.length() - prefix.length();
                    if (tmp.length < len) {
                        // grow tmp
                        tmp = new char[len];
                    }
                    text.getChars(prefix.length(), text.length(), tmp, 0);
                    String value = new String(tmp, 0, len);

                    termDocs.seek(termEnum);
                    while (termDocs.next()) {
                        setValues++;
                        retArray[termDocs.doc()] = value;
                    }
                } while (termEnum.next());
            } finally {
                termDocs.close();
                termEnum.close();
            }
        }
        SharedFieldCache.StringIndex value = new SharedFieldCache.StringIndex(retArray, setValues);
        store(reader, field, prefix, comparator, value);
        return value;
    }
    return ret;
}