Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

/**
 * sets up indexer just for reading... if needed for writing only, call
 * setupForWrite. if need both read & write, call both.
 *///from  w  w  w .ja  v a2 s  .  co m
synchronized void setupForRead() {
    log.info("setting up index for read only access");
    long startTime = System.currentTimeMillis();

    //closeHandles();
    try {
        setupDirectory();

        String[] defaultSearchFields, defaultSearchFieldsOriginal;
        String[] defaultSearchFieldSubject = new String[] { "title" }; // for subject only search
        String[] defaultSearchFieldCorrespondents;
        //body field should be there, as the content of the attachment lies in this field, should also include meta field?
        //why the search over en-names and en-names-original when body/body_original is included in the search fields?
        defaultSearchFields = new String[] { "body", "title", "to_names", "from_names", "cc_names", "bcc_names",
                "to_emails", "from_emails", "cc_emails", "bcc_emails" };
        defaultSearchFieldsOriginal = new String[] { "body_original", "title" }; // we want to leave title there because we want to always hit the title -- discussed with Peter June 27 2015
        defaultSearchFieldCorrespondents = new String[] { "to_names", "from_names", "cc_names", "bcc_names",
                "to_emails", "from_emails", "cc_emails", "bcc_emails" };
        // names field added above after email discussion with Sit 6/11/2013. problem is that we're not using the Lucene EnglishPossessiveFilter, so
        // NER will extract the name Stanford University in a sentence like:
        // "This is Stanford University's website."
        // but when the user clicks on the name "Stanford University" in say monthly cards, we
        // will not match the message with this sentence because of the apostrophe.

        //for searching an attchment with fileName
        String[] metaSearchFields = new String[] { "fileName" };
        // Parse a simple query that searches for "text":
        if (parser == null) {
            //parser = new QueryParser(MUSE_LUCENE_VERSION, defaultSearchField, analyzer);
            parser = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFields, analyzer);
            parserOriginal = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldsOriginal, analyzer);
            parserSubject = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldSubject, analyzer);
            parserCorrespondents = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldCorrespondents,
                    analyzer);
            parserMeta = new MultiFieldQueryParser(LUCENE_VERSION, metaSearchFields, new KeywordAnalyzer());
        }

        /**
         * Bunch of gotchas here
         * Its a bad idea to store lucene internal docIds, as no assumptions about the internal docIds should be made;
         * not even that they are serial. When searching, lucene may ignore logically deleted docs.
         * Lucene does not handle deleted docs, and having these docs in search may bring down the search performance by 50%
         * Deleted docs are cleaned only during merging of indices.*/
        int numContentDocs = 0, numContentDeletedDocs = 0, numAttachmentDocs = 0, numAttachmentDeletedDocs = 0;
        if (DirectoryReader.indexExists(directory)) {
            DirectoryReader ireader = DirectoryReader.open(directory);
            if (ireader.numDeletedDocs() > 0)
                log.warn("!!!!!!!\nIndex reader has " + ireader.numDocs() + " doc(s) of which "
                        + ireader.numDeletedDocs() + " are deleted)\n!!!!!!!!!!");
            isearcher = new IndexSearcher(ireader);
            contentDocIds = new LinkedHashMap<>();
            numContentDocs = ireader.numDocs();
            numContentDeletedDocs = ireader.numDeletedDocs();

            Bits liveDocs = MultiFields.getLiveDocs(ireader);
            Set<String> fieldsToLoad = new HashSet<>();
            fieldsToLoad.add("docId");
            for (int i = 0; i < ireader.maxDoc(); i++) {
                org.apache.lucene.document.Document doc = ireader.document(i, fieldsToLoad);
                if (liveDocs != null && !liveDocs.get(i))
                    continue;

                if (doc == null || doc.get("docId") == null)
                    continue;
                contentDocIds.put(i, doc.get("docId"));
            }
            log.info("Loaded: " + contentDocIds.size() + " content docs");
        }

        if (DirectoryReader.indexExists(directory_blob)) {
            IndexReader ireader_blob = DirectoryReader.open(directory_blob);
            isearcher_blob = new IndexSearcher(ireader_blob); // read-only=true
            blobDocIds = new LinkedHashMap<Integer, String>();

            numAttachmentDocs = ireader_blob.numDocs();
            numAttachmentDeletedDocs = ireader_blob.numDeletedDocs();

            Bits liveDocs = MultiFields.getLiveDocs(ireader_blob);
            Set<String> fieldsToLoad = new HashSet<String>();
            fieldsToLoad.add("docId");
            for (int i = 0; i < ireader_blob.maxDoc(); i++) {
                org.apache.lucene.document.Document doc = ireader_blob.document(i, fieldsToLoad);
                if (liveDocs != null && !liveDocs.get(i))
                    continue;

                if (doc == null || doc.get("docId") == null)
                    continue;
                blobDocIds.put(i, doc.get("docId"));
            }
            log.info("Loaded: " + blobDocIds.size() + " attachment docs");
        }

        log.warn("Number of content docs: " + numContentDocs + ", number deleted: " + numContentDeletedDocs);
        log.warn("Number of attachment docs: " + numAttachmentDocs + ", number deleted: "
                + numAttachmentDeletedDocs);

        if (dirNameToDocIdMap == null)
            dirNameToDocIdMap = new LinkedHashMap<String, Map<Integer, String>>();
    } catch (Exception e) {
        Util.print_exception(e, log);
    }
    log.info("Setting up index for read took " + (System.currentTimeMillis() - startTime) + " ms");
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * Get the facets and results//from  ww  w . ja v a  2s.c  om
 * @see edu.ur.ir.institution.InstitutionalItemSearchService#executeSearchWithFacets(java.lang.String, java.lang.String, int, int, int, int)
 */
public FacetSearchHelper executeSearchWithFacets(String mainQueryString, String indexFolder,
        int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets, int numberOfFactsToShow,
        int numberOfIdsToCollect, int idsToCollectStartPosition)
        throws CorruptIndexException, IOException, ParseException {
    log.debug("orginal query 4 = " + mainQueryString);
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);
    Query mainQuery = parser.parse(executedQuery);
    if (log.isDebugEnabled()) {
        log.debug("main query = " + executedQuery);
        log.debug(
                "main query parsed = " + mainQuery + " maxNumberOfMainQueryHits = " + maxNumberOfMainQueryHits);
    }

    TopDocs topDocs = searcher.search(mainQuery, maxNumberOfMainQueryHits);

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(topDocs,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);
    log.debug("executeSearchWithFacets 1 query = " + mainQuery);
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();

    // process the data and determine the facets
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            topDocs, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);
    helper.setExecutedQuery(executedQuery);
    searcher.close();
    reader.close();
    return helper;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * Determines the number of hits for each facet across the main query.
 * /* ww w.  ja v a  2s.co  m*/
 * @param facets
 * @param reader
 * @param mainQueryBits
 * @throws ParseException
 * @throws IOException
 */
private void processFacetCategory(Collection<FacetResult> facets, IndexReader reader,
        OpenBitSetDISI mainQueryBitSet, IndexSearcher searcher) throws ParseException, IOException {
    for (FacetResult f : facets) {
        long count = 0;

        String searchString = f.getFacetName();

        if (!searchString.trim().equals("")) {
            QueryParser subQueryParser = new QueryParser(Version.LUCENE_35, f.getField(), keywordAnalyzer);
            searchString = "\"" + searchString + "\"";
            Query subQuery = subQueryParser.parse(searchString);

            QueryWrapperFilter subQueryWrapper = new QueryWrapperFilter(subQuery);

            log.debug("Fixed query in process facet catagory 2 = " + subQuery + " subQueryWrapper = "
                    + subQueryWrapper);

            DocIdSet subQueryBits = subQueryWrapper.getDocIdSet(reader);

            OpenBitSetDISI subQuerybitSet = new OpenBitSetDISI(subQueryBits.iterator(), reader.maxDoc());

            count = getFacetHitCount(mainQueryBitSet, subQuerybitSet);

            log.debug("count = " + count);
        } else {
            log.error("bad search string " + searchString);
        }
        f.setHits(count);
    }
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

/**
 * Execute the search with a set of facet filters
 * /*  ww w .  ja v a 2s  . c  om*/
 * @see edu.ur.ir.repository.InstitutionalItemSearchService#executeSearchWithFacets(java.lang.String, java.util.Set, java.lang.String, int, int, int)
 */
public FacetSearchHelper executeSearchWithFacets(String mainQueryString, List<FacetFilter> filters,
        String indexFolder, int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets,
        int numberOfFactsToShow, int numberOfIdsToCollect, int idsToCollectStartPosition)
        throws CorruptIndexException, IOException, ParseException {

    log.debug("orignal query 3 = " + mainQueryString);

    // return if the main query is invalid
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);

    if (log.isDebugEnabled()) {
        log.debug("parsed query = " + executedQuery.trim());
    }
    Query mainQuery = parser.parse(executedQuery);

    //create a filter for the main query
    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);

    // get the bitset for main query
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());
    TopDocs hits = null;
    if (filters.size() > 0) {
        // create a filter that will match the main query plus all other filters
        List<Filter> luceneFilters = getSubQueryFilters(filters, searcher);
        Filter filter = new ChainedFilter(luceneFilters.toArray(new Filter[luceneFilters.size()]),
                ChainedFilter.AND);
        if (log.isDebugEnabled()) {
            log.debug("filter = " + filter);
        }

        // apply the facets and include them in the main query bit set
        DocIdSet filterQueryBits = filter.getDocIdSet(reader);

        OpenBitSetDISI filterBitSet = new OpenBitSetDISI(filterQueryBits.iterator(), reader.maxDoc());
        mainQueryBitSet.and(filterBitSet);

        hits = searcher.search(mainQuery, filter, maxNumberOfMainQueryHits);
        log.debug(" executeSearchWithFacets 2 = mainQuery = " + executedQuery + " filter = " + filter);
    } else {
        hits = searcher.search(mainQuery, maxNumberOfMainQueryHits);
        log.debug(" executeSearchWithFacets 3 = mainQuery = " + mainQuery);

    }

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(hits,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            hits, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);

    helper.setExecutedQuery(executedQuery);
    helper.setFacetTrail(filters);

    searcher.close();
    reader.close();
    return helper;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

public FacetSearchHelper executeSearchWithFacets(String mainQueryString, List<FacetFilter> filters,
        String indexFolder, int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets,
        int numberOfFactsToShow, int numberOfIdsToCollect, int idsToCollectStartPosition,
        InstitutionalCollection collection) throws CorruptIndexException, IOException, ParseException {

    log.debug("orginal query 2 = " + mainQueryString);
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);//from w ww. ja v a 2 s.  c o  m
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);
    Query mainQuery = parser.parse(executedQuery);

    if (log.isDebugEnabled()) {
        log.debug("parsed query = " + executedQuery);
    }
    //create a filter for the main query
    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);

    // get the bitset for main query
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);

    List<Filter> luceneFilters = new LinkedList<Filter>();

    if (filters.size() > 0) {
        // create a filter that will match the main query plus all other filters
        luceneFilters.addAll(getSubQueryFilters(filters, searcher));
    }
    // add filters for the collection first
    luceneFilters.addAll(0, getCollectionFilters(collection));

    Filter filter = new ChainedFilter(luceneFilters.toArray(new Filter[luceneFilters.size()]),
            ChainedFilter.AND);

    if (log.isDebugEnabled()) {
        log.debug("filter = " + filter);
    }

    // get the filter query doc id set
    DocIdSet filterQueryBits = filter.getDocIdSet(reader);

    // apply the facets and include them in the main query bit set
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());
    OpenBitSetDISI filterBitSet = new OpenBitSetDISI(filterQueryBits.iterator(), reader.maxDoc());
    mainQueryBitSet.and(filterBitSet);

    TopDocs hits = searcher.search(mainQuery, filter, maxNumberOfMainQueryHits);
    log.debug(" executeSearchWithFacets 4 = mainQuery = " + mainQuery + " filter = " + filter
            + "maxNumberOfMainQueryHits = " + maxNumberOfMainQueryHits);

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(hits,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            hits, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);

    helper.setFacetTrail(filters);
    helper.setExecutedQuery(executedQuery);
    searcher.close();
    reader.close();
    return helper;
}

From source file:edu.ur.ir.institution.service.DefaultInstitutionalItemSearchService.java

License:Apache License

public FacetSearchHelper executeSearchWithFacets(String mainQueryString, String indexFolder,
        int numberOfHitsToProcessForFacets, int numberOfResultsToCollectForFacets, int numberOfFactsToShow,
        int numberOfIdsToCollect, int idsToCollectStartPosition, InstitutionalCollection collection)
        throws CorruptIndexException, IOException, ParseException {
    log.debug("orginal query 1= " + mainQueryString);
    log.debug("execute search with facets for a collection");
    if (searchDirectoryIsEmpty(indexFolder) || isInvalidQuery(mainQueryString)) {
        log.debug("problem with search!");
        return new FacetSearchHelper(new HashSet<Long>(), 0, new HashMap<String, Collection<FacetResult>>(),
                mainQueryString);//w w w . j a  va 2  s.com
    }

    FSDirectory directory = FSDirectory.open(new File(indexFolder));
    IndexReader reader = IndexReader.open(directory, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer, getBoostedFields());
    parser.setDefaultOperator(QueryParser.AND_OPERATOR);

    // execute the main query - we will use this to extract data to determine the facet searches
    // the search helper MUST BE SET TO FALSE if diacritic based searches are to work
    // putting a * following a diacritic does not work
    String executedQuery = SearchHelper.prepareMainSearchString(mainQueryString, false);
    Query mainQuery = parser.parse(executedQuery);

    if (log.isDebugEnabled()) {
        log.debug("Executed query = " + executedQuery);
    }

    Filter[] aFilters = this.getCollectionFilters(collection).toArray(new Filter[2]);

    Filter chainedFilter = new ChainedFilter(aFilters, ChainedFilter.AND);

    //create a filter for the main query
    QueryWrapperFilter mainQueryWrapper = new QueryWrapperFilter(mainQuery);

    // get the bitset for main query
    DocIdSet mainQueryBits = mainQueryWrapper.getDocIdSet(reader);

    // get the filter query doc id set
    DocIdSet filterQueryBits = chainedFilter.getDocIdSet(reader);

    // apply the filters for the collection root and range
    OpenBitSetDISI mainQueryBitSet = new OpenBitSetDISI(mainQueryBits.iterator(), reader.maxDoc());
    OpenBitSetDISI filterBitSet = new OpenBitSetDISI(filterQueryBits.iterator(), reader.maxDoc());
    mainQueryBitSet.and(filterBitSet);

    log.debug(" executeSearchWithFacets 5 = mainQuery = " + mainQuery + " filter = " + chainedFilter);
    TopDocs hits = searcher.search(mainQuery, chainedFilter, maxNumberOfMainQueryHits);

    // determine the set of data we should use to determine facets
    HashMap<String, HashMap<String, FacetResult>> possibleFacets = this.generateFacetSearches(hits,
            numberOfHitsToProcessForFacets, numberOfResultsToCollectForFacets, searcher);

    HashMap<String, Collection<FacetResult>> facetResults = new HashMap<String, Collection<FacetResult>>();
    // process the data and determine the facets
    FacetSearchHelper helper = processPossibleFacets(possibleFacets, reader, mainQueryBitSet, facetResults,
            hits, numberOfIdsToCollect, idsToCollectStartPosition, numberOfFactsToShow, mainQueryString,
            searcher);

    helper.setExecutedQuery(executedQuery);
    searcher.close();
    reader.close();
    return helper;
}

From source file:indexer.SplitCells.java

public static SplitCells readFromIndex(IndexReader reader) throws Exception {
    SplitCells splitCells = new SplitCells();

    // The last document contains the split information.
    int numDocs = reader.maxDoc();
    Document splitCellInfoDoc = reader.document(numDocs - 1);

    String splitCellsInfo = splitCellInfoDoc.get(OptimizedRealValuedVecIndexer.SPLIT_CELLS_FIELD);
    if (splitCellsInfo == null)
        return null;

    String[] tokens = splitCellsInfo.split("\\s+");
    for (String token : tokens) {
        Cell cell = new Cell(token);
        splitCells.addSplit(cell);// w w w .ja va 2  s.c  o  m
    }

    return splitCells;
}

From source file:indexing.eval.Eval.java

License:Open Source License

private static float[] calculateVocabularyGrowth(int per_number_of_reviews, boolean cumulative, TermType type) {
    try {//from   w w  w .jav a 2 s .co  m
        IndexReader ir = IndexReader.open(new SimpleFSDirectory(new File(Paths.luceneIndex)), true);
        String field = "text";

        float num_docs = ir.maxDoc();
        int num_agegroups = (int) num_docs / per_number_of_reviews;
        float age_totals[] = new float[num_agegroups];

        String internedField = field.intern();
        TermEnum te = ir.terms(new Term(internedField, ""));
        Term term = te.term();

        while (term != null) {
            if (TermTypeFilter.isTermType(term.text(), type)) {
                if (internedField != term.field()) {
                    break;
                }
                TermDocs td = ir.termDocs(term);
                td.next();
                float firstdocid = td.doc();
                int age_bracket = (int) (firstdocid / num_docs * num_agegroups);
                age_totals[age_bracket]++;
            }

            if (te.next()) {
                term = te.term();
            } else {
                term = null;// ends loop
            }
        }

        float total = 0.0f;
        float max = 0.0f;
        for (int i = 0; i < age_totals.length; i++) {
            if (age_totals[i] > max) {
                max = age_totals[i];
            }
            total += age_totals[i];
            if (i > 0 && cumulative) {
                age_totals[i] += age_totals[i - 1]; // make totals cumulative
            }
        }

        return age_totals;
    } catch (Exception e) {
        AppLogger.error.log(Level.SEVERE, "analyzeVocabularyGrowth failed\n" + e.getMessage());
    }

    return null;
}

From source file:indexing.eval.Eval.java

License:Open Source License

public static void printReviewIds() {
    try {/*  w ww  .  j a  v  a 2s. c om*/
        IndexReader ir = IndexReader.open(new SimpleFSDirectory(new File(Paths.luceneIndex)), true);
        String field = "reviewid";

        int ndocs = ir.maxDoc();
        for (int i = 0; i < ndocs; i++) {
            Document doc = ir.document(i);
            System.out.println(doc.get(field));
        }

    } catch (Exception e) {
        // TODO: handle exception
    }

}

From source file:info.boytsov.lucene.CheckSort.java

License:Open Source License

public static void main(String[] args) {
    if (args.length != 2) {
        printUsage();/*from   w  w w . j av a  2  s .  c o m*/
        System.exit(1);
    }
    int dir = 1;

    String srcDirName = args[0];
    System.out.println("Source dir: " + srcDirName);
    if (args[1].equals("forward"))
        dir = 1;
    else if (args[1].equals("backward"))
        dir = -1;
    else {
        System.err.println("Invalid direction: " + args[1]);
        printUsage();
        System.exit(1);
    }

    try {
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(srcDirName)));

        int docQty = reader.maxDoc();
        int sortTable[] = new int[docQty];

        Arrays.fill(sortTable, -1);

        int sortedQty = 0;

        double sortedStreak = 0;
        int sortedStreakQty = 0;

        URL2DocID remap[] = new URL2DocID[docQty];

        String prevURL = "";

        int prevSorted = 0;

        for (int docID = 0; docID < docQty; ++docID) {
            Document doc = reader.document(docID);
            String url = doc.get("url");
            if (dir > 0) {
                remap[docID] = new URL2DocID(url, docID);
            } else {
                remap[docQty - 1 - docID] = new URL2DocID(url, docID);
            }
            if (docID % 100000 == 0) {
                System.out.println("Collected " + (docID + 1) + " URLs, sorted so far, direct " + sortedQty
                        + " avg. sorted streak QTY: " + (sortedStreak / sortedStreakQty) + " sortedStreakQty: "
                        + sortedStreakQty);
            }
            // Assuming the increasing order
            if (dir * url.compareTo(prevURL) >= 0) {
                ++sortedQty;
            } else {
                sortedStreak += docID - prevSorted - 1;
                sortedStreakQty++;

                prevSorted = docID;
            }
            prevURL = url;
        }

        System.out.println("Collected " + docQty + " URLs, sorted so far, direct " + sortedQty
                + " avg. sorted streak QTY: " + (sortedStreak / sortedStreakQty) + " sortedStreakQty: "
                + sortedStreakQty);

        double invQty = Inversions.count(remap);
        System.out.println("A total number of inversions: " + invQty + " relative to n*(n-1)/2: "
                + (invQty * 2.0 / docQty / (docQty + 1)));

    } catch (Exception e) {
        System.err.println("Error: " + e.getMessage());
        e.printStackTrace();
        System.exit(1);
    }

}