Example usage for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

/**
 * sets up indexer just for reading... if needed for writing only, call
 * setupForWrite. if need both read & write, call both.
 *///from  w  w  w .ja  v a2 s  .  co m
synchronized void setupForRead() {
    log.info("setting up index for read only access");
    long startTime = System.currentTimeMillis();

    //closeHandles();
    try {
        setupDirectory();

        String[] defaultSearchFields, defaultSearchFieldsOriginal;
        String[] defaultSearchFieldSubject = new String[] { "title" }; // for subject only search
        String[] defaultSearchFieldCorrespondents;
        //body field should be there, as the content of the attachment lies in this field, should also include meta field?
        //why the search over en-names and en-names-original when body/body_original is included in the search fields?
        defaultSearchFields = new String[] { "body", "title", "to_names", "from_names", "cc_names", "bcc_names",
                "to_emails", "from_emails", "cc_emails", "bcc_emails" };
        defaultSearchFieldsOriginal = new String[] { "body_original", "title" }; // we want to leave title there because we want to always hit the title -- discussed with Peter June 27 2015
        defaultSearchFieldCorrespondents = new String[] { "to_names", "from_names", "cc_names", "bcc_names",
                "to_emails", "from_emails", "cc_emails", "bcc_emails" };
        // names field added above after email discussion with Sit 6/11/2013. problem is that we're not using the Lucene EnglishPossessiveFilter, so
        // NER will extract the name Stanford University in a sentence like:
        // "This is Stanford University's website."
        // but when the user clicks on the name "Stanford University" in say monthly cards, we
        // will not match the message with this sentence because of the apostrophe.

        //for searching an attchment with fileName
        String[] metaSearchFields = new String[] { "fileName" };
        // Parse a simple query that searches for "text":
        if (parser == null) {
            //parser = new QueryParser(MUSE_LUCENE_VERSION, defaultSearchField, analyzer);
            parser = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFields, analyzer);
            parserOriginal = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldsOriginal, analyzer);
            parserSubject = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldSubject, analyzer);
            parserCorrespondents = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldCorrespondents,
                    analyzer);
            parserMeta = new MultiFieldQueryParser(LUCENE_VERSION, metaSearchFields, new KeywordAnalyzer());
        }

        /**
         * Bunch of gotchas here
         * Its a bad idea to store lucene internal docIds, as no assumptions about the internal docIds should be made;
         * not even that they are serial. When searching, lucene may ignore logically deleted docs.
         * Lucene does not handle deleted docs, and having these docs in search may bring down the search performance by 50%
         * Deleted docs are cleaned only during merging of indices.*/
        int numContentDocs = 0, numContentDeletedDocs = 0, numAttachmentDocs = 0, numAttachmentDeletedDocs = 0;
        if (DirectoryReader.indexExists(directory)) {
            DirectoryReader ireader = DirectoryReader.open(directory);
            if (ireader.numDeletedDocs() > 0)
                log.warn("!!!!!!!\nIndex reader has " + ireader.numDocs() + " doc(s) of which "
                        + ireader.numDeletedDocs() + " are deleted)\n!!!!!!!!!!");
            isearcher = new IndexSearcher(ireader);
            contentDocIds = new LinkedHashMap<>();
            numContentDocs = ireader.numDocs();
            numContentDeletedDocs = ireader.numDeletedDocs();

            Bits liveDocs = MultiFields.getLiveDocs(ireader);
            Set<String> fieldsToLoad = new HashSet<>();
            fieldsToLoad.add("docId");
            for (int i = 0; i < ireader.maxDoc(); i++) {
                org.apache.lucene.document.Document doc = ireader.document(i, fieldsToLoad);
                if (liveDocs != null && !liveDocs.get(i))
                    continue;

                if (doc == null || doc.get("docId") == null)
                    continue;
                contentDocIds.put(i, doc.get("docId"));
            }
            log.info("Loaded: " + contentDocIds.size() + " content docs");
        }

        if (DirectoryReader.indexExists(directory_blob)) {
            IndexReader ireader_blob = DirectoryReader.open(directory_blob);
            isearcher_blob = new IndexSearcher(ireader_blob); // read-only=true
            blobDocIds = new LinkedHashMap<Integer, String>();

            numAttachmentDocs = ireader_blob.numDocs();
            numAttachmentDeletedDocs = ireader_blob.numDeletedDocs();

            Bits liveDocs = MultiFields.getLiveDocs(ireader_blob);
            Set<String> fieldsToLoad = new HashSet<String>();
            fieldsToLoad.add("docId");
            for (int i = 0; i < ireader_blob.maxDoc(); i++) {
                org.apache.lucene.document.Document doc = ireader_blob.document(i, fieldsToLoad);
                if (liveDocs != null && !liveDocs.get(i))
                    continue;

                if (doc == null || doc.get("docId") == null)
                    continue;
                blobDocIds.put(i, doc.get("docId"));
            }
            log.info("Loaded: " + blobDocIds.size() + " attachment docs");
        }

        log.warn("Number of content docs: " + numContentDocs + ", number deleted: " + numContentDeletedDocs);
        log.warn("Number of attachment docs: " + numAttachmentDocs + ", number deleted: "
                + numAttachmentDeletedDocs);

        if (dirNameToDocIdMap == null)
            dirNameToDocIdMap = new LinkedHashMap<String, Map<Integer, String>>();
    } catch (Exception e) {
        Util.print_exception(e, log);
    }
    log.info("Setting up index for read took " + (System.currentTimeMillis() - startTime) + " ms");
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * Get the facets and results//from  ww  w . ja v a  2s.c  om
 * @see edu.ur.ir.institution.InstitutionalItemSearchService#executeSearchWithFacets(java.lang.String, java.lang.String, int, int, int, int)
 */
public FacetSearchHelper executeSearchWithFacets(String mainQueryString, String indexFolder,
        int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets, int numberOfFactsToShow,
        int numberOfIdsToCollect, int idsToCollectStartPosition)
        throws CorruptIndexException, IOException, ParseException {
    log.debug("orginal query 4 = " + mainQueryString);
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);
    Query mainQuery = parser.parse(executedQuery);
    if (log.isDebugEnabled()) {
        log.debug("main query = " + executedQuery);
        log.debug(
                "main query parsed = " + mainQuery + " maxNumberOfMainQueryHits = " + maxNumberOfMainQueryHits);
    }

    TopDocs topDocs = searcher.search(mainQuery, maxNumberOfMainQueryHits);

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(topDocs,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);
    log.debug("executeSearchWithFacets 1 query = " + mainQuery);
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();

    // process the data and determine the facets
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            topDocs, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);
    helper.setExecutedQuery(executedQuery);
    searcher.close();
    reader.close();
    return helper;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * Determines the number of hits for each facet across the main query.
 * /* ww w.  ja v a  2s.co  m*/
 * @param facets
 * @param reader
 * @param mainQueryBits
 * @throws ParseException
 * @throws IOException
 */
private void processFacetCategory(Collection<FacetResult> facets, IndexReader reader,
        OpenBitSetDISI mainQueryBitSet, IndexSearcher searcher) throws ParseException, IOException {
    for (FacetResult f : facets) {
        long count = 0;

        String searchString = f.getFacetName();

        if (!searchString.trim().equals("")) {
            QueryParser subQueryParser = new QueryParser(Version.LUCENE_35, f.getField(), keywordAnalyzer);
            searchString = "\"" + searchString + "\"";
            Query subQuery = subQueryParser.parse(searchString);

            QueryWrapperFilter subQueryWrapper = new QueryWrapperFilter(subQuery);

            log.debug("Fixed query in process facet catagory 2 = " + subQuery + " subQueryWrapper = "
                    + subQueryWrapper);

            DocIdSet subQueryBits = subQueryWrapper.getDocIdSet(reader);

            OpenBitSetDISI subQuerybitSet = new OpenBitSetDISI(subQueryBits.iterator(), reader.maxDoc());

            count = getFacetHitCount(mainQueryBitSet, subQuerybitSet);

            log.debug("count = " + count);
        } else {
            log.error("bad search string " + searchString);
        }
        f.setHits(count);
    }
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * Execute the search with a set of facet filters
 * /*  ww w .  ja v a 2s  . c  om*/
 * @see edu.ur.ir.repository.InstitutionalItemSearchService#executeSearchWithFacets(java.lang.String, java.util.Set, java.lang.String, int, int, int)
 */
public FacetSearchHelper executeSearchWithFacets(String mainQueryString, List<FacetFilter> filters,
        String indexFolder, int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets,
        int numberOfFactsToShow, int numberOfIdsToCollect, int idsToCollectStartPosition)
        throws CorruptIndexException, IOException, ParseException {

    log.debug("orignal query 3 = " + mainQueryString);

    // return if the main query is invalid
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);

    if (log.isDebugEnabled()) {
        log.debug("parsed query = " + executedQuery.trim());
    }
    Query mainQuery = parser.parse(executedQuery);

    //create a filter for the main query
    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);

    // get the bitset for main query
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());
    TopDocs hits = null;
    if (filters.size() > 0) {
        // create a filter that will match the main query plus all other filters
        List<Filter> luceneFilters = getSubQueryFilters(filters, searcher);
        Filter filter = new ChainedFilter(luceneFilters.toArray(new Filter[luceneFilters.size()]),
                ChainedFilter.AND);
        if (log.isDebugEnabled()) {
            log.debug("filter = " + filter);
        }

        // apply the facets and include them in the main query bit set
        DocIdSet filterQueryBits = filter.getDocIdSet(reader);

        OpenBitSetDISI filterBitSet = new OpenBitSetDISI(filterQueryBits.iterator(), reader.maxDoc());
        mainQueryBitSet.and(filterBitSet);

        hits = searcher.search(mainQuery, filter, maxNumberOfMainQueryHits);
        log.debug(" executeSearchWithFacets 2 = mainQuery = " + executedQuery + " filter = " + filter);
    } else {
        hits = searcher.search(mainQuery, maxNumberOfMainQueryHits);
        log.debug(" executeSearchWithFacets 3 = mainQuery = " + mainQuery);

    }

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(hits,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            hits, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);

    helper.setExecutedQuery(executedQuery);
    helper.setFacetTrail(filters);

    searcher.close();
    reader.close();
    return helper;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

public FacetSearchHelper executeSearchWithFacets(String mainQueryString, List<FacetFilter> filters,
        String indexFolder, int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets,
        int numberOfFactsToShow, int numberOfIdsToCollect, int idsToCollectStartPosition,
        InstitutionalCollection collection) throws CorruptIndexException, IOException, ParseException {

    log.debug("orginal query 2 = " + mainQueryString);
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);//from w ww. ja v a 2 s.  c o  m
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);
    Query mainQuery = parser.parse(executedQuery);

    if (log.isDebugEnabled()) {
        log.debug("parsed query = " + executedQuery);
    }
    //create a filter for the main query
    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);

    // get the bitset for main query
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);

    List<Filter> luceneFilters = new LinkedList<Filter>();

    if (filters.size() > 0) {
        // create a filter that will match the main query plus all other filters
        luceneFilters.addAll(getSubQueryFilters(filters, searcher));
    }
    // add filters for the collection first
    luceneFilters.addAll(0, getCollectionFilters(collection));

    Filter filter = new ChainedFilter(luceneFilters.toArray(new Filter[luceneFilters.size()]),
            ChainedFilter.AND);

    if (log.isDebugEnabled()) {
        log.debug("filter = " + filter);
    }

    // get the filter query doc id set
    DocIdSet filterQueryBits = filter.getDocIdSet(reader);

    // apply the facets and include them in the main query bit set
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());
    OpenBitSetDISI filterBitSet = new OpenBitSetDISI(filterQueryBits.iterator(), reader.maxDoc());
    mainQueryBitSet.and(filterBitSet);

    TopDocs hits = searcher.search(mainQuery, filter, maxNumberOfMainQueryHits);
    log.debug(" executeSearchWithFacets 4 = mainQuery = " + mainQuery + " filter = " + filter
            + "maxNumberOfMainQueryHits = " + maxNumberOfMainQueryHits);

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(hits,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            hits, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);

    helper.setFacetTrail(filters);
    helper.setExecutedQuery(executedQuery);
    searcher.close();
    reader.close();
    return helper;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

public FacetSearchHelper executeSearchWithFacets(String mainQueryString, String indexFolder,
        int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets, int numberOfFactsToShow,
        int numberOfIdsToCollect, int idsToCollectStartPosition, InstitutionalCollection collection)
        throws CorruptIndexException, IOException, ParseException {
    log.debug("orginal query 1= " + mainQueryString);
    log.debug("execute search with facets for a collection");
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        log.debug("problem with search!");
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);//w w w . j a  va 2  s.com
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);
    Query mainQuery = parser.parse(executedQuery);

    if (log.isDebugEnabled()) {
        log.debug("Executed query = " + executedQuery);
    }

    Filter[] aFilters = this.getCollectionFilters(collection).toArray(new Filter[2]);

    Filter chainedFilter = new ChainedFilter(aFilters, ChainedFilter.AND);

    //create a filter for the main query
    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);

    // get the bitset for main query
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);

    // get the filter query doc id set
    DocIdSet filterQueryBits = chainedFilter.getDocIdSet(reader);

    // apply the filters for the collection root and range
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());
    OpenBitSetDISI filterBitSet = new OpenBitSetDISI(filterQueryBits.iterator(), reader.maxDoc());
    mainQueryBitSet.and(filterBitSet);

    log.debug(" executeSearchWithFacets 5 = mainQuery = " + mainQuery + " filter = " + chainedFilter);
    TopDocs hits = searcher.search(mainQuery, chainedFilter, maxNumberOfMainQueryHits);

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(hits,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();
    // process the data and determine the facets
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            hits, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);

    helper.setExecutedQuery(executedQuery);
    searcher.close();
    reader.close();
    return helper;
}

From source file:indexer.SplitCells.java

public static SplitCells readFromIndex(IndexReader reader) throws Exception {
    SplitCells splitCells = new SplitCells();

    // The last document contains the split information.
    int numDocs = reader.maxDoc();
    Document splitCellInfoDoc = reader.document(numDocs - 1);

    String splitCellsInfo = splitCellInfoDoc.get(OptimizedRealValuedVecIndexer.SPLIT_CELLS_FIELD);
    if (splitCellsInfo == null)
        return null;

    String[] tokens = splitCellsInfo.split("\\s+");
    for (String token : tokens) {
        Cell cell = new Cell(token);
        splitCells.addSplit(cell);// w w w .ja va 2  s.c  o  m
    }

    return splitCells;
}

From source file:indexing.eval.Eval.java

License:Open Source License

private static float[] calculateVocabularyGrowth(int per_number_of_reviews, boolean cumulative, TermType type) {
    try {//from   w w  w .jav a 2 s .co  m
        IndexReader ir = IndexReader.open(new SimpleFSDirectory(new File(Paths.luceneIndex)), true);
        String field = "text";

        float num_docs = ir.maxDoc();
        int num_agegroups = (int) num_docs / per_number_of_reviews;
        float age_totals[] = new float[num_agegroups];

        String internedField = field.intern();
        TermEnum te = ir.terms(new Term(internedField, ""));
        Term term = te.term();

        while (term != null) {
            if (TermTypeFilter.isTermType(term.text(), type)) {
                if (internedField != term.field()) {
                    break;
                }
                TermDocs td = ir.termDocs(term);
                td.next();
                float firstdocid = td.doc();
                int age_bracket = (int) (firstdocid / num_docs * num_agegroups);
                age_totals[age_bracket]++;
            }

            if (te.next()) {
                term = te.term();
            } else {
                term = null;// ends loop
            }
        }

        float total = 0.0f;
        float max = 0.0f;
        for (int i = 0; i < age_totals.length; i++) {
            if (age_totals[i] > max) {
                max = age_totals[i];
            }
            total += age_totals[i];
            if (i > 0 && cumulative) {
                age_totals[i] += age_totals[i - 1]; // make totals cumulative
            }
        }

        return age_totals;
    } catch (Exception e) {
        AppLogger.error.log(Level.SEVERE, "analyzeVocabularyGrowth failed\n" + e.getMessage());
    }

    return null;
}

From source file:indexing.eval.Eval.java

License:Open Source License

public static void printReviewIds() {
    try {/*  w ww  .  j a  v  a 2s. c om*/
        IndexReader ir = IndexReader.open(new SimpleFSDirectory(new File(Paths.luceneIndex)), true);
        String field = "reviewid";

        int ndocs = ir.maxDoc();
        for (int i = 0; i < ndocs; i++) {
            Document doc = ir.document(i);
            System.out.println(doc.get(field));
        }

    } catch (Exception e) {
        // TODO: handle exception
    }

}

From source file:info.boytsov.lucene.CheckSort.java

License:Open Source License

public static void main(String[] args) {
    if (args.length != 2) {
        printUsage();/*from   w  w w . j av a  2  s .  c o m*/
        System.exit(1);
    }
    int dir = 1;

    String srcDirName = args[0];
    System.out.println("Source dir: " + srcDirName);
    if (args[1].equals("forward"))
        dir = 1;
    else if (args[1].equals("backward"))
        dir = -1;
    else {
        System.err.println("Invalid direction: " + args[1]);
        printUsage();
        System.exit(1);
    }

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(srcDirName)));

        int docQty = reader.maxDoc();
        int sortTable[] = new int[docQty];

        Arrays.fill(sortTable, -1);

        int sortedQty = 0;

        double sortedStreak = 0;
        int sortedStreakQty = 0;

        URL2DocID remap[] = new URL2DocID[docQty];

        String prevURL = "";

        int prevSorted = 0;

        for (int docID = 0; docID < docQty; ++docID) {
            Document doc = reader.document(docID);
            String url = doc.get("url");
            if (dir > 0) {
                remap[docID] = new URL2DocID(url, docID);
            } else {
                remap[docQty - 1 - docID] = new URL2DocID(url, docID);
            }
            if (docID % 100000 == 0) {
                System.out.println("Collected " + (docID + 1) + " URLs, sorted so far, direct " + sortedQty
                        + " avg. sorted streak QTY: " + (sortedStreak / sortedStreakQty) + " sortedStreakQty: "
                        + sortedStreakQty);
            }
            // Assuming the increasing order
            if (dir * url.compareTo(prevURL) >= 0) {
                ++sortedQty;
            } else {
                sortedStreak += docID - prevSorted - 1;
                sortedStreakQty++;

                prevSorted = docID;
            }
            prevURL = url;
        }

        System.out.println("Collected " + docQty + " URLs, sorted so far, direct " + sortedQty
                + " avg. sorted streak QTY: " + (sortedStreak / sortedStreakQty) + " sortedStreakQty: "
                + sortedStreakQty);

        double invQty = Inversions.count(remap);
        System.out.println("A total number of inversions: " + invQty + " relative to n*(n-1)/2: "
                + (invQty * 2.0 / docQty / (docQty + 1)));

    } catch (Exception e) {
        System.err.println("Error: " + e.getMessage());
        e.printStackTrace();
        System.exit(1);
    }

}