Example usage for org.apache.lucene.index IndexReader numDocs

List of usage examples for org.apache.lucene.index IndexReader numDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDocs.

Prototype

public abstract int numDocs();

Source Link

Document

Returns the number of documents in this index.

Usage

From source file:com.doculibre.constellio.services.ImportExportServicesImpl.java

License:Open Source License

@SuppressWarnings("unchecked")
@Override/*  w ww  .jav  a2 s  .  c o  m*/
public void convertData(Directory directory, OutputStream output) {
    try {
        WritableWorkbook workbook = Workbook.createWorkbook(output);
        WritableSheet sheet = workbook.createSheet("fields", 0);

        WritableFont arial10font = new WritableFont(WritableFont.ARIAL, 10);
        WritableCellFormat arial10format = new WritableCellFormat(arial10font);
        IndexReader indexReader = DirectoryReader.open(directory);

        //         {
        //         int column = 0;
        //         for (String fieldName : (Collection<String>) indexReader.getFieldNames()) {
        //            Label label = new Label(column, 0, fieldName, arial10format);
        //            sheet.addCell(label);
        //            column++;
        //         }
        //         }

        int row = 1;
        for (int i = 0; i < indexReader.numDocs() /* && i != 502 */; i++) {
            Document document = indexReader.document(i);
            int column = 0;
            for (IndexableField field : document.getFields()) {
                if (row == 1) {
                    Label label = new Label(column, 0, field.name(), arial10format);
                    sheet.addCell(label);
                }

                if (field != null && field.fieldType().stored() && field.binaryValue() == null) {
                    String indexedContent = field.stringValue();
                    indexedContent = convertText(indexedContent);
                    Label label = new Label(column, row, indexedContent, arial10format);
                    sheet.addCell(label);
                }
                column++;
            }
            row++;
            // if (i == 502) {
            // break;
            // }
        }

        indexReader.close();
        workbook.write();
        workbook.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (RowsExceededException e) {
        throw new RuntimeException(e);
    } catch (WriteException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private TopDocs performSearch(IndexSearcher searcher, Query query, IndexReader reader, Integer maxResultsCount,
        Sort sort) throws IOException {
    final TopDocs docs;
    int resultsCount = maxResultsCount == null ? reader.numDocs() : maxResultsCount;
    if (sort == null) {
        docs = searcher.search(query, resultsCount);
    } else {//w w  w. ja  v  a 2 s.c  o  m
        docs = searcher.search(query, resultsCount, sort);
    }

    return docs;
}

From source file:com.esri.gpt.catalog.lucene.stats.Collectable.java

License:Apache License

/**
 * Determines the number of documents considered during stats collection.
 * <br/>If the document filter bitset is not null, the count will be based upon it's cardinality.
 * <br/>Otherwise the count will be based upon the number of docs returned by the reader.
 * @param reader the index reader//from  www .j  a  v a2 s .  c o m
 * @param documentFilterBitSet the bitset represing the subset of documents being cosidered
 * @return the total number of documents
 */
protected long determineNumberOfDocsConsidered(IndexReader reader, OpenBitSet documentFilterBitSet) {
    if (documentFilterBitSet != null) {
        this.setNumberOfDocsConsidered(documentFilterBitSet.cardinality());
    } else {
        this.setNumberOfDocsConsidered(reader.numDocs());
    }
    return this.getNumberOfDocsConsidered();
}

From source file:com.flaptor.hounder.util.Idx.java

License:Apache License

public static void main(String arg[]) throws Exception {
    check(arg.length > 1, null);//from w  w  w . j  a v a2s.  c om
    String cmd = arg[0];
    File idx = new File(arg[1]);
    if ("list".equals(cmd)) {
        int num = (arg.length > 2) ? Integer.parseInt(arg[2]) : -1;
        check(idx.exists(), "Index dir not found");
        IndexReader reader = IndexReader.open(idx);
        int docs = reader.numDocs();
        int max = reader.maxDoc();
        System.err.println("Index contains " + docs + " documents plus " + (max - docs) + " deleted.");
        if (num > -1) {
            if (num == 0)
                num = docs;
            for (int i = 0; i < max && i < num; i++) {
                System.out.println("----------------------------------------");
                if (!reader.isDeleted(i)) {
                    Document doc = reader.document(i);
                    List flds = doc.getFields();
                    Iterator iter = flds.iterator();
                    while (iter.hasNext()) {
                        Field fld = (Field) iter.next();
                        String attr = (fld.isIndexed() ? ",i" : "") + (fld.isStored() ? ",s" : "")
                                + (fld.isTokenized() ? ",t" : "");
                        System.out.println(fld.name() + attr + ": " + fld.stringValue());
                    }
                }
            }
            reader.close();
            System.out.println();
        }
    } else if ("search".equals(cmd)) {
        check(idx.exists(), "Index dir not found");
        check(arg.length > 3, "Not enough arguments");
        String field = arg[2];
        String value = arg[3];
        IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx));
        ScorelessHitCollector collector = new HashSetScorelessHitCollector();
        searcher.search(new TermQuery(new Term(field, value)), collector);
        Set<Integer> docIds = collector.getMatchingDocuments();
        System.out.println("\nNumber of hits: " + docIds.size() + "\n");
        for (Integer docId : docIds) {
            Document doc = searcher.doc(docId);
            List flds = doc.getFields();
            Iterator iter = flds.iterator();
            while (iter.hasNext()) {
                Field fld = (Field) iter.next();
                System.out.println(fld.name() + ": " + fld.stringValue());
            }
        }
        searcher.close();
        System.out.println();
    } else if ("delete".equals(cmd)) {
        check(idx.exists(), "Index dir not found");
        check(arg.length > 3, "Not enough arguments");
        String field = arg[2];
        String value = arg[3];
        IndexReader reader = IndexReader.open(idx);
        reader.deleteDocuments(new Term(field, value));
        reader.close();
    } else if ("optimize".equals(cmd)) {
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.optimize();
        writer.close();
    } else if ("merge".equals(cmd)) {
        check(arg.length == 3, "not enough parameters");
        File idx2 = new File(arg[2]);
        check(idx.exists(), "Index dir 1 not found");
        check(idx2.exists(), "Index dir 2 not found");
        IndexReader reader = IndexReader.open(idx2);
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.addIndexes(new IndexReader[] { reader });
        writer.close();
        reader.close();
    } else if ("term-count".equals(cmd)) {
        check(arg.length == 3, "not enough parameters");
        check(idx.exists(), "Index dir not found");
        IndexReader reader = IndexReader.open(idx);
        String field = arg[2];
        int count = 0;
        TermEnum terms = reader.terms();
        while (terms.next()) {
            Term term = terms.term();
            if (term.field().equals(field))
                count++;
        }
        terms.close();
        reader.close();
        System.out.println("Found " + count + " different values for field " + field);
    } else if ("hit-count".equals(cmd)) {
        check(arg.length > 3, "Not enough arguments");
        check(idx.exists(), "Index dir not found");
        String field = arg[2];
        String value = arg[3];
        IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx));
        CountingHitCollector collector = new CountingHitCollector();
        searcher.search(new TermQuery(new Term(field, value)), collector);
        System.out.println("\nNumber of hits: " + collector.getDocCount() + "\n");
        searcher.close();
    } else if ("uncompound".equals(cmd)) {
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setUseCompoundFile(false);
        writer.optimize();
        writer.close();
    } else if ("compound".equals(cmd)) {
        IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false,
                IndexWriter.MaxFieldLength.UNLIMITED);
        writer.setUseCompoundFile(true);
        writer.optimize();
        writer.close();
    } else if ("terms".equals(cmd)) {
        check(arg.length == 3, "not enough parameters");
        check(idx.exists(), "Index dir not found");
        String field = arg[2];
        IndexReader reader = IndexReader.open(idx);
        TermEnum terms = reader.terms();
        while (terms.next()) {
            Term t = terms.term();
            if (t.field().equals(field)) {
                System.out.println(t.text());
            }
        }

    }

}

From source file:com.foundationdb.lucene.SimpleTest.java

License:Open Source License

private void assertDocumentsAreThere(Directory dir, int amount) throws IOException {
    IndexReader reader = DirectoryReader.open(dir);
    try {/*  w  w w  .  j  a v a2 s . c o  m*/
        assertEquals(amount, reader.numDocs());
    } finally {
        reader.close();
    }
}

From source file:com.github.parzonka.esa.indexing.IndexInverter.java

License:Apache License

public void createInvertedIndex() throws CorruptIndexException, IOException, SimilarityException {

    deleteQuietly(invertedIndexDir);//from   w  w  w  . j  av a  2 s .c o m
    invertedIndexDir.mkdirs();

    final IndexReader reader = IndexReader.open(FSDirectory.open(luceneIndexDir));
    final int maxDocumentDistributionCount = (int) Math.ceil(maxCorpusDistribution * reader.numDocs());
    final TermEnum termEnum = reader.terms();
    final Set<String> terms = new HashSet<String>();

    int totalTerms = 0;
    while (termEnum.next()) {
        final String term = termEnum.term().text();
        final int termDocFreq = termEnum.docFreq();
        if (minDocumentFrequency <= termDocFreq && termDocFreq < maxDocumentDistributionCount) {
            terms.add(term);
        }
        totalTerms++;
    }
    reader.close();

    System.out.println("Using " + terms.size() + " terms out of " + totalTerms);
    System.out.println("Input Lucene index: " + luceneIndexDir);
    final LuceneVectorReader luceneVectorReader = new LuceneVectorReader(luceneIndexDir);
    configureLuceneVectorReader(luceneVectorReader);
    System.out.println("Output inverted index: " + invertedIndexDir);
    final VectorIndexWriter vectorIndexWriter = new VectorIndexWriter(invertedIndexDir,
            luceneVectorReader.getConceptCount());

    final ProgressMeter progressMeter = new ProgressMeter(terms.size());
    for (String term : terms) {
        final Vector vector = luceneVectorReader.getVector(term);
        vectorIndexWriter.put(term, vector);
        progressMeter.next();
        System.out.println("[" + term + "] " + progressMeter);
    }
    vectorIndexWriter.close();
}

From source file:com.github.rnewson.couchdb.lucene.Search.java

License:Apache License

public static void main(final String[] args) {
    Utils.LOG.info("searcher started.");
    try {//from w  ww  .  j  av a2s .c  o  m
        IndexReader reader = null;
        IndexSearcher searcher = null;

        final Scanner scanner = new Scanner(System.in);
        while (scanner.hasNextLine()) {
            if (reader == null) {
                // Open a reader and searcher if index exists.
                if (IndexReader.indexExists(Config.INDEX_DIR)) {
                    reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true);
                    searcher = new IndexSearcher(reader);
                }
            }

            final String line = scanner.nextLine();

            // Process search request if index exists.
            if (searcher == null) {
                System.out.println(Utils.error(503, "couchdb-lucene not available."));
                continue;
            }

            final JSONObject obj;
            try {
                obj = JSONObject.fromObject(line);
            } catch (final JSONException e) {
                System.out.println(Utils.error(400, "invalid JSON."));
                continue;
            }

            if (!obj.has("query")) {
                System.out.println(Utils.error(400, "No query found in request."));
                continue;
            }

            final JSONObject query = obj.getJSONObject("query");

            final boolean reopen = !"ok".equals(query.optString("stale", "not-ok"));

            // Refresh reader and searcher if necessary.
            if (reader != null && reopen) {
                final IndexReader newReader = reader.reopen();
                if (reader != newReader) {
                    Utils.LOG.info("Lucene index was updated, reopening searcher.");
                    final IndexReader oldReader = reader;
                    reader = newReader;
                    searcher = new IndexSearcher(reader);
                    oldReader.close();
                }
            }

            try {
                // A query.
                if (query.has("q")) {
                    final JSONArray path = obj.getJSONArray("path");

                    if (path.size() < 3) {
                        System.out.println(Utils.error(400, "No design document in path."));
                        continue;
                    }

                    if (path.size() < 4) {
                        System.out.println(Utils.error(400, "No view name in path."));
                    }

                    if (path.size() > 4) {
                        System.out.println(Utils.error(400, "Extra path info in request."));
                    }

                    assert path.size() == 4;
                    final SearchRequest request = new SearchRequest(obj);
                    final String result = request.execute(searcher);
                    System.out.println(result);
                    continue;
                }
                // info.
                if (query.keySet().isEmpty()) {
                    final JSONObject json = new JSONObject();
                    json.put("current", reader.isCurrent());
                    json.put("disk_size", size(reader.directory()));
                    json.put("doc_count", reader.numDocs());
                    json.put("doc_del_count", reader.numDeletedDocs());
                    final JSONArray fields = new JSONArray();
                    for (final Object field : reader.getFieldNames(FieldOption.INDEXED)) {
                        if (((String) field).startsWith("_"))
                            continue;
                        fields.add(field);
                    }
                    json.put("fields", fields);
                    json.put("last_modified", IndexReader.lastModified(Config.INDEX_DIR));
                    json.put("optimized", reader.isOptimized());

                    final JSONObject info = new JSONObject();
                    info.put("code", 200);
                    info.put("json", json);
                    final JSONObject headers = new JSONObject();
                    headers.put("Content-Type", "text/plain");
                    info.put("headers", headers);

                    System.out.println(info);
                }
            } catch (final Exception e) {
                System.out.println(Utils.error(400, e));
            }

            System.out.println(Utils.error(400, "Bad request."));
        }
        if (reader != null) {
            reader.close();
        }
    } catch (final Exception e) {
        System.out.println(Utils.error(500, e.getMessage()));
    }
    Utils.LOG.info("searcher stopped.");
}

From source file:com.globalsight.ling.lucene.Index.java

License:Apache License

/**
 * Returns the number of documents stored in this index.
 *//*www  . j a  v  a  2 s  .  com*/
public int getDocumentCount() throws IOException {
    synchronized (m_state) {
        if (m_state != STATE_OPENED) {
            throw new IOException("index is not available");
        }
    }

    try {
        m_lock.readLock().acquire();

        try {
            IndexReader reader = LuceneCache.getLuceneCache(m_directory).getIndexReader();
            int result = reader.numDocs();
            return result;
        } finally {
            m_lock.readLock().release();
        }
    } catch (InterruptedException ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:com.jaeksoft.searchlib.index.IndexStatistics.java

License:Open Source License

protected IndexStatistics(IndexReader indexReader) {
    maxDoc = indexReader.maxDoc();/*w w w .j av a  2  s .c om*/
    numDocs = indexReader.numDocs();
    numDeletedDocs = indexReader.numDeletedDocs();
    hasDeletions = indexReader.hasDeletions();
    isOptimized = indexReader.isOptimized();
}

From source file:com.jamespot.glifpix.index.ResourceDocument.java

License:Open Source License

protected static Set<Long> getCRCs(IndexReader ir) throws IOException {
    logger.info("Loading CRCs");
    int nbDocs = ir.numDocs();
    float loadFactor = .75f;
    int initCapacity = ((int) (nbDocs / loadFactor)) + 1000;

    Set<Long> toRet = new HashSet<Long>(initCapacity, loadFactor);
    int nbCrc = 0;
    TermEnum te = ir.terms(new Term("crc"));

    for (boolean hasNext = true; hasNext; hasNext = te.next()) {
        nbCrc++;//from ww w  .j a  v a2 s  .co  m
        if (nbCrc % 100000 == 0) {
            logger.info(nbCrc + " CRCs loaded");
        }
        Term t = te.term();
        if (!t.field().equalsIgnoreCase("crc"))
            break;
        Long crc = Long.parseLong(t.text());
        toRet.add(crc);
    }
    logger.info("Total CRCs loaded :" + nbCrc);
    return toRet;
}