Example usage for org.apache.lucene.index IndexReader numDeletedDocs

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader numDeletedDocs.

Prototype

public final int numDeletedDocs()

Source Link

Document

Returns the number of deleted documents.

Usage

From source file:com.github.rnewson.couchdb.lucene.Search.java

License:Apache License

public static void main(final String[] args) {
    Utils.LOG.info("searcher started.");
    try {//from   w w w.j a  va  2  s.  c om
        IndexReader reader = null;
        IndexSearcher searcher = null;

        final Scanner scanner = new Scanner(System.in);
        while (scanner.hasNextLine()) {
            if (reader == null) {
                // Open a reader and searcher if index exists.
                if (IndexReader.indexExists(Config.INDEX_DIR)) {
                    reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true);
                    searcher = new IndexSearcher(reader);
                }
            }

            final String line = scanner.nextLine();

            // Process search request if index exists.
            if (searcher == null) {
                System.out.println(Utils.error(503, "couchdb-lucene not available."));
                continue;
            }

            final JSONObject obj;
            try {
                obj = JSONObject.fromObject(line);
            } catch (final JSONException e) {
                System.out.println(Utils.error(400, "invalid JSON."));
                continue;
            }

            if (!obj.has("query")) {
                System.out.println(Utils.error(400, "No query found in request."));
                continue;
            }

            final JSONObject query = obj.getJSONObject("query");

            final boolean reopen = !"ok".equals(query.optString("stale", "not-ok"));

            // Refresh reader and searcher if necessary.
            if (reader != null && reopen) {
                final IndexReader newReader = reader.reopen();
                if (reader != newReader) {
                    Utils.LOG.info("Lucene index was updated, reopening searcher.");
                    final IndexReader oldReader = reader;
                    reader = newReader;
                    searcher = new IndexSearcher(reader);
                    oldReader.close();
                }
            }

            try {
                // A query.
                if (query.has("q")) {
                    final JSONArray path = obj.getJSONArray("path");

                    if (path.size() < 3) {
                        System.out.println(Utils.error(400, "No design document in path."));
                        continue;
                    }

                    if (path.size() < 4) {
                        System.out.println(Utils.error(400, "No view name in path."));
                    }

                    if (path.size() > 4) {
                        System.out.println(Utils.error(400, "Extra path info in request."));
                    }

                    assert path.size() == 4;
                    final SearchRequest request = new SearchRequest(obj);
                    final String result = request.execute(searcher);
                    System.out.println(result);
                    continue;
                }
                // info.
                if (query.keySet().isEmpty()) {
                    final JSONObject json = new JSONObject();
                    json.put("current", reader.isCurrent());
                    json.put("disk_size", size(reader.directory()));
                    json.put("doc_count", reader.numDocs());
                    json.put("doc_del_count", reader.numDeletedDocs());
                    final JSONArray fields = new JSONArray();
                    for (final Object field : reader.getFieldNames(FieldOption.INDEXED)) {
                        if (((String) field).startsWith("_"))
                            continue;
                        fields.add(field);
                    }
                    json.put("fields", fields);
                    json.put("last_modified", IndexReader.lastModified(Config.INDEX_DIR));
                    json.put("optimized", reader.isOptimized());

                    final JSONObject info = new JSONObject();
                    info.put("code", 200);
                    info.put("json", json);
                    final JSONObject headers = new JSONObject();
                    headers.put("Content-Type", "text/plain");
                    info.put("headers", headers);

                    System.out.println(info);
                }
            } catch (final Exception e) {
                System.out.println(Utils.error(400, e));
            }

            System.out.println(Utils.error(400, "Bad request."));
        }
        if (reader != null) {
            reader.close();
        }
    } catch (final Exception e) {
        System.out.println(Utils.error(500, e.getMessage()));
    }
    Utils.LOG.info("searcher stopped.");
}

From source file:com.jaeksoft.searchlib.index.IndexStatistics.java

License:Open Source License

protected IndexStatistics(IndexReader indexReader) {
    maxDoc = indexReader.maxDoc();/*from  w ww.ja  va  2  s .  co m*/
    numDocs = indexReader.numDocs();
    numDeletedDocs = indexReader.numDeletedDocs();
    hasDeletions = indexReader.hasDeletions();
    isOptimized = indexReader.isOptimized();
}

From source file:com.qwazr.search.index.IndexStatus.java

License:Apache License

public IndexStatus(IndexReader indexReader, IndexSettingsDefinition settings, Set<String> analyzers,
        Set<String> fields) {
    num_docs = (long) indexReader.numDocs();
    num_deleted_docs = (long) indexReader.numDeletedDocs();
    this.settings = settings;
    this.analyzers = analyzers;
    this.fields = fields;
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

/**
 * sets up indexer just for reading... if needed for writing only, call
 * setupForWrite. if need both read & write, call both.
 *//*from  w  w w .jav a2s.co m*/
synchronized void setupForRead() {
    log.info("setting up index for read only access");
    long startTime = System.currentTimeMillis();

    //closeHandles();
    try {
        setupDirectory();

        String[] defaultSearchFields, defaultSearchFieldsOriginal;
        String[] defaultSearchFieldSubject = new String[] { "title" }; // for subject only search
        String[] defaultSearchFieldCorrespondents;
        //body field should be there, as the content of the attachment lies in this field, should also include meta field?
        //why the search over en-names and en-names-original when body/body_original is included in the search fields?
        defaultSearchFields = new String[] { "body", "title", "to_names", "from_names", "cc_names", "bcc_names",
                "to_emails", "from_emails", "cc_emails", "bcc_emails" };
        defaultSearchFieldsOriginal = new String[] { "body_original", "title" }; // we want to leave title there because we want to always hit the title -- discussed with Peter June 27 2015
        defaultSearchFieldCorrespondents = new String[] { "to_names", "from_names", "cc_names", "bcc_names",
                "to_emails", "from_emails", "cc_emails", "bcc_emails" };
        // names field added above after email discussion with Sit 6/11/2013. problem is that we're not using the Lucene EnglishPossessiveFilter, so
        // NER will extract the name Stanford University in a sentence like:
        // "This is Stanford University's website."
        // but when the user clicks on the name "Stanford University" in say monthly cards, we
        // will not match the message with this sentence because of the apostrophe.

        //for searching an attchment with fileName
        String[] metaSearchFields = new String[] { "fileName" };
        // Parse a simple query that searches for "text":
        if (parser == null) {
            //parser = new QueryParser(MUSE_LUCENE_VERSION, defaultSearchField, analyzer);
            parser = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFields, analyzer);
            parserOriginal = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldsOriginal, analyzer);
            parserSubject = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldSubject, analyzer);
            parserCorrespondents = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldCorrespondents,
                    analyzer);
            parserMeta = new MultiFieldQueryParser(LUCENE_VERSION, metaSearchFields, new KeywordAnalyzer());
        }

        /**
         * Bunch of gotchas here
         * Its a bad idea to store lucene internal docIds, as no assumptions about the internal docIds should be made;
         * not even that they are serial. When searching, lucene may ignore logically deleted docs.
         * Lucene does not handle deleted docs, and having these docs in search may bring down the search performance by 50%
         * Deleted docs are cleaned only during merging of indices.*/
        int numContentDocs = 0, numContentDeletedDocs = 0, numAttachmentDocs = 0, numAttachmentDeletedDocs = 0;
        if (DirectoryReader.indexExists(directory)) {
            DirectoryReader ireader = DirectoryReader.open(directory);
            if (ireader.numDeletedDocs() > 0)
                log.warn("!!!!!!!\nIndex reader has " + ireader.numDocs() + " doc(s) of which "
                        + ireader.numDeletedDocs() + " are deleted)\n!!!!!!!!!!");
            isearcher = new IndexSearcher(ireader);
            contentDocIds = new LinkedHashMap<>();
            numContentDocs = ireader.numDocs();
            numContentDeletedDocs = ireader.numDeletedDocs();

            Bits liveDocs = MultiFields.getLiveDocs(ireader);
            Set<String> fieldsToLoad = new HashSet<>();
            fieldsToLoad.add("docId");
            for (int i = 0; i < ireader.maxDoc(); i++) {
                org.apache.lucene.document.Document doc = ireader.document(i, fieldsToLoad);
                if (liveDocs != null && !liveDocs.get(i))
                    continue;

                if (doc == null || doc.get("docId") == null)
                    continue;
                contentDocIds.put(i, doc.get("docId"));
            }
            log.info("Loaded: " + contentDocIds.size() + " content docs");
        }

        if (DirectoryReader.indexExists(directory_blob)) {
            IndexReader ireader_blob = DirectoryReader.open(directory_blob);
            isearcher_blob = new IndexSearcher(ireader_blob); // read-only=true
            blobDocIds = new LinkedHashMap<Integer, String>();

            numAttachmentDocs = ireader_blob.numDocs();
            numAttachmentDeletedDocs = ireader_blob.numDeletedDocs();

            Bits liveDocs = MultiFields.getLiveDocs(ireader_blob);
            Set<String> fieldsToLoad = new HashSet<String>();
            fieldsToLoad.add("docId");
            for (int i = 0; i < ireader_blob.maxDoc(); i++) {
                org.apache.lucene.document.Document doc = ireader_blob.document(i, fieldsToLoad);
                if (liveDocs != null && !liveDocs.get(i))
                    continue;

                if (doc == null || doc.get("docId") == null)
                    continue;
                blobDocIds.put(i, doc.get("docId"));
            }
            log.info("Loaded: " + blobDocIds.size() + " attachment docs");
        }

        log.warn("Number of content docs: " + numContentDocs + ", number deleted: " + numContentDeletedDocs);
        log.warn("Number of attachment docs: " + numAttachmentDocs + ", number deleted: "
                + numAttachmentDeletedDocs);

        if (dirNameToDocIdMap == null)
            dirNameToDocIdMap = new LinkedHashMap<String, Map<Integer, String>>();
    } catch (Exception e) {
        Util.print_exception(e, log);
    }
    log.info("Setting up index for read took " + (System.currentTimeMillis() - startTime) + " ms");
}

From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexTest.java

License:Apache License

public int getDeletedDocCount(NodeBuilder idx, IndexDefinition definition) throws IOException {
    IndexReader reader = DirectoryReader.open(newIndexDirectory(definition, idx));
    int numDeletes = reader.numDeletedDocs();
    reader.close();/* w w w. j  av  a  2 s .c  o  m*/
    return numDeletes;
}

From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java

License:Open Source License

/**
 * This method returns a Map containing information about the index engine.<br>
 * Right now this Map contains the info bellow:
 * <ul>/*from   w ww . j  a va  2s  . c o  m*/
 * <li>Number of inodes (key => "inodes")</li>
 * <li>Storage backend (key => "backend")</li>
 * <li>Maximum search hits (key => "max hits")</li>
 * <li>The date of the last optimization (key => "last optimization")</li>
 * <li>The date of the last backup (key => "last backup")</li>
 * <li>The date of the last uptade of the search engine (key => "last search engine update")</li>
 * <li>Amount of cached documents (key => "cached inodes")</li>
 * <li>Amount of used memory (key => "ram usage")</li>
 * <li>The size in disk of the index (key => "disk usage")</li>
 * </ul>
 */
public Map<String, String> getStatus() {
    Map<String, String> status = new LinkedHashMap<String, String>();

    DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.LONG);
    String lastOp = df.format(new Date(_maintenanceThread.getLastOptimizationTime()));
    String lastBackup = df.format(new Date(_backupThread.getLastBackup()));
    status.put("backend", "Apache Lucene (http://lucene.apache.org)");

    try {
        status.put("inodes", String.valueOf(_iWriter.numDocs()));
    } catch (IOException e) {
        logger.error("IOException getting IndexWriter", e);
    }

    IndexReader iReader = null;
    try {
        iReader = IndexReader.open(_iWriter, true);
        status.put("deleted inodes", String.valueOf(iReader.numDeletedDocs()));
    } catch (CorruptIndexException e) {
        logger.error(EXCEPTION_OCCURED_WHILE_SEARCHING, e);
    } catch (IOException e) {
        logger.error(EXCEPTION_OCCURED_WHILE_SEARCHING, e);
    } finally {
        if (iReader != null) {
            try {
                iReader.close();
            } catch (IOException e) {
                logger.error("IOException closing IndexReader obtained from the IndexWriter", e);
            }
        }
    }

    status.put("cached inodes", String.valueOf(_iWriter.numRamDocs()));
    status.put("max hits", String.valueOf(_maxHitsNumber));
    status.put("last optimization", lastOp);
    status.put("last backup", lastBackup);
    status.put("ram usage", Bytes.formatBytes(_iWriter.ramSizeInBytes()));

    long size = 0L;
    String[] paths;
    try {
        paths = _storage.listAll();
        for (String path : paths) {
            size += new PhysicalFile(INDEX_DIR + "/" + path).length();
        }

        status.put("size", Bytes.formatBytes(size));
    } catch (IOException e) {
        logger.error("IOException getting size of index dir", e);
    }

    return status;
}

From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java

License:Open Source License

private void printStatistic() throws IOException {
    if (LOG.isDebugEnabled()) {
        IndexSearcher luceneSearcher = null;
        try {//from   w ww . ja  va  2  s  .c o m
            searcherManager.maybeRefresh();
            luceneSearcher = searcherManager.acquire();
            IndexReader reader = luceneSearcher.getIndexReader();
            LOG.debug(
                    "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={}  hasUncommittedChanges={} hasDeletions={}",
                    reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(),
                    luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(),
                    luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(),
                    luceneIndexWriter.hasDeletions());
        } finally {
            searcherManager.release(luceneSearcher);
        }
    }
}

From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java

License:Open Source License

private void logIndexStats() {
    try {//  w ww  .  j a v a 2 s  . c  o  m
        IndexReader reader = null;
        try {
            reader = getIndexReader();

            Document doc;
            int totalFields = 0;

            Set<String> ids = new HashSet<String>();
            String[] idArray;
            int count = 0;
            for (int i = 0; i < reader.maxDoc(); i++) {
                if (isDeleted(reader, i))
                    continue;
                doc = readDocument(reader, i, null);
                totalFields += doc.getFields().size();
                count++;
                idArray = doc.getValues("id");
                for (String id : idArray)
                    ids.add(id);

            }

            logger.info("Total documents in the index: " + reader.numDocs()
                    + ", number of deletable documents in the index: " + reader.numDeletedDocs()
                    + ", valid documents: " + count + ", total fields in all documents: " + totalFields
                    + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs());
            logger.info("Distinct ids in the index: " + ids.size());

        } finally {
            ReaderMonitor toCloseCurrentMonitor = currentMonitor;
            currentMonitor = null;
            if (toCloseCurrentMonitor != null) {
                toCloseCurrentMonitor.closeWhenPossible();
            }
        }
    } catch (IOException e) {
        logger.warn(e.getMessage(), e);
    }

}

From source file:org.elasticsearch.index.engine.robin.RobinEngine.java

License:Apache License

@Override
public List<Segment> segments() {
    rwl.readLock().lock();/*from ww w  .  ja va  2 s.c  om*/
    try {
        IndexWriter indexWriter = this.indexWriter;
        if (indexWriter == null) {
            throw new EngineClosedException(shardId, failedEngine);
        }
        Map<String, Segment> segments = new HashMap<String, Segment>();

        // first, go over and compute the search ones...
        Searcher searcher = searcher();
        try {
            IndexReader[] readers = searcher.reader().getSequentialSubReaders();
            for (IndexReader reader : readers) {
                assert reader instanceof SegmentReader;
                SegmentInfo info = Lucene.getSegmentInfo((SegmentReader) reader);
                assert !segments.containsKey(info.name);
                Segment segment = new Segment(info.name);
                segment.search = true;
                segment.docCount = reader.numDocs();
                segment.delDocCount = reader.numDeletedDocs();
                try {
                    segment.sizeInBytes = info.sizeInBytes(true);
                } catch (IOException e) {
                    logger.trace("failed to get size for [{}]", e, info.name);
                }
                segments.put(info.name, segment);
            }
        } finally {
            searcher.release();
        }

        // now, correlate or add the committed ones...
        if (lastCommittedSegmentInfos != null) {
            SegmentInfos infos = lastCommittedSegmentInfos;
            for (SegmentInfo info : infos) {
                Segment segment = segments.get(info.name);
                if (segment == null) {
                    segment = new Segment(info.name);
                    segment.search = false;
                    segment.committed = true;
                    segment.docCount = info.docCount;
                    try {
                        segment.delDocCount = indexWriter.numDeletedDocs(info);
                    } catch (IOException e) {
                        logger.trace("failed to get deleted docs for committed segment", e);
                    }
                    try {
                        segment.sizeInBytes = info.sizeInBytes(true);
                    } catch (IOException e) {
                        logger.trace("failed to get size for [{}]", e, info.name);
                    }
                    segments.put(info.name, segment);
                } else {
                    segment.committed = true;
                }
            }
        }

        Segment[] segmentsArr = segments.values().toArray(new Segment[segments.values().size()]);
        Arrays.sort(segmentsArr, new Comparator<Segment>() {
            @Override
            public int compare(Segment o1, Segment o2) {
                return (int) (o1.generation() - o2.generation());
            }
        });

        return Arrays.asList(segmentsArr);
    } finally {
        rwl.readLock().unlock();
    }
}

From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java

License:Apache License

public void testLoadQueries() throws Exception {
    Directory directory = newDirectory();
    IndexWriter indexWriter = new IndexWriter(directory,
            new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE));

    boolean legacyFormat = randomBoolean();
    Version version = legacyFormat ? Version.V_2_0_0 : Version.CURRENT;
    IndexShard indexShard = mockIndexShard(version, legacyFormat);

    storeQuery("0", indexWriter, termQuery("field1", "value1"), true, legacyFormat);
    storeQuery("1", indexWriter, wildcardQuery("field1", "v*"), true, legacyFormat);
    storeQuery("2", indexWriter,
            boolQuery().must(termQuery("field1", "value1")).must(termQuery("field2", "value2")), true,
            legacyFormat);//from  www  . j  a v  a 2s  . c o m
    // dymmy docs should be skipped during loading:
    Document doc = new Document();
    doc.add(new StringField("dummy", "value", Field.Store.YES));
    indexWriter.addDocument(doc);
    storeQuery("4", indexWriter, termQuery("field2", "value2"), true, legacyFormat);
    // only documents that .percolator type should be loaded:
    storeQuery("5", indexWriter, termQuery("field2", "value2"), false, legacyFormat);
    storeQuery("6", indexWriter, termQuery("field3", "value3"), true, legacyFormat);
    indexWriter.forceMerge(1);

    // also include queries for percolator docs marked as deleted:
    indexWriter.deleteDocuments(new Term("id", "6"));
    indexWriter.close();

    ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0);
    IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), shardId);
    assertThat(indexReader.leaves().size(), equalTo(1));
    assertThat(indexReader.numDeletedDocs(), equalTo(1));
    assertThat(indexReader.maxDoc(), equalTo(7));

    initialize("field1", "type=keyword", "field2", "type=keyword", "field3", "type=keyword");

    PercolatorQueryCache.QueriesLeaf leaf = cache.loadQueries(indexReader.leaves().get(0), indexShard);
    assertThat(leaf.queries.size(), equalTo(5));
    assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("field1", "value1"))));
    assertThat(leaf.getQuery(1), equalTo(new WildcardQuery(new Term("field1", "v*"))));
    assertThat(leaf.getQuery(2),
            equalTo(new BooleanQuery.Builder()
                    .add(new TermQuery(new Term("field1", "value1")), BooleanClause.Occur.MUST)
                    .add(new TermQuery(new Term("field2", "value2")), BooleanClause.Occur.MUST).build()));
    assertThat(leaf.getQuery(4), equalTo(new TermQuery(new Term("field2", "value2"))));
    assertThat(leaf.getQuery(6), equalTo(new TermQuery(new Term("field3", "value3"))));

    indexReader.close();
    directory.close();
}