List of usage examples for org.apache.lucene.index IndexReader numDeletedDocs
public final int numDeletedDocs()
From source file:com.github.rnewson.couchdb.lucene.Search.java
License:Apache License
public static void main(final String[] args) { Utils.LOG.info("searcher started."); try {//from w w w.j a va 2 s. c om IndexReader reader = null; IndexSearcher searcher = null; final Scanner scanner = new Scanner(System.in); while (scanner.hasNextLine()) { if (reader == null) { // Open a reader and searcher if index exists. if (IndexReader.indexExists(Config.INDEX_DIR)) { reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true); searcher = new IndexSearcher(reader); } } final String line = scanner.nextLine(); // Process search request if index exists. if (searcher == null) { System.out.println(Utils.error(503, "couchdb-lucene not available.")); continue; } final JSONObject obj; try { obj = JSONObject.fromObject(line); } catch (final JSONException e) { System.out.println(Utils.error(400, "invalid JSON.")); continue; } if (!obj.has("query")) { System.out.println(Utils.error(400, "No query found in request.")); continue; } final JSONObject query = obj.getJSONObject("query"); final boolean reopen = !"ok".equals(query.optString("stale", "not-ok")); // Refresh reader and searcher if necessary. if (reader != null && reopen) { final IndexReader newReader = reader.reopen(); if (reader != newReader) { Utils.LOG.info("Lucene index was updated, reopening searcher."); final IndexReader oldReader = reader; reader = newReader; searcher = new IndexSearcher(reader); oldReader.close(); } } try { // A query. if (query.has("q")) { final JSONArray path = obj.getJSONArray("path"); if (path.size() < 3) { System.out.println(Utils.error(400, "No design document in path.")); continue; } if (path.size() < 4) { System.out.println(Utils.error(400, "No view name in path.")); } if (path.size() > 4) { System.out.println(Utils.error(400, "Extra path info in request.")); } assert path.size() == 4; final SearchRequest request = new SearchRequest(obj); final String result = request.execute(searcher); System.out.println(result); continue; } // info. if (query.keySet().isEmpty()) { final JSONObject json = new JSONObject(); json.put("current", reader.isCurrent()); json.put("disk_size", size(reader.directory())); json.put("doc_count", reader.numDocs()); json.put("doc_del_count", reader.numDeletedDocs()); final JSONArray fields = new JSONArray(); for (final Object field : reader.getFieldNames(FieldOption.INDEXED)) { if (((String) field).startsWith("_")) continue; fields.add(field); } json.put("fields", fields); json.put("last_modified", IndexReader.lastModified(Config.INDEX_DIR)); json.put("optimized", reader.isOptimized()); final JSONObject info = new JSONObject(); info.put("code", 200); info.put("json", json); final JSONObject headers = new JSONObject(); headers.put("Content-Type", "text/plain"); info.put("headers", headers); System.out.println(info); } } catch (final Exception e) { System.out.println(Utils.error(400, e)); } System.out.println(Utils.error(400, "Bad request.")); } if (reader != null) { reader.close(); } } catch (final Exception e) { System.out.println(Utils.error(500, e.getMessage())); } Utils.LOG.info("searcher stopped."); }
From source file:com.jaeksoft.searchlib.index.IndexStatistics.java
License:Open Source License
protected IndexStatistics(IndexReader indexReader) { maxDoc = indexReader.maxDoc();/*from w ww.ja va 2 s . co m*/ numDocs = indexReader.numDocs(); numDeletedDocs = indexReader.numDeletedDocs(); hasDeletions = indexReader.hasDeletions(); isOptimized = indexReader.isOptimized(); }
From source file:com.qwazr.search.index.IndexStatus.java
License:Apache License
public IndexStatus(IndexReader indexReader, IndexSettingsDefinition settings, Set<String> analyzers, Set<String> fields) { num_docs = (long) indexReader.numDocs(); num_deleted_docs = (long) indexReader.numDeletedDocs(); this.settings = settings; this.analyzers = analyzers; this.fields = fields; }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
/** * sets up indexer just for reading... if needed for writing only, call * setupForWrite. if need both read & write, call both. *//*from w w w .jav a2s.co m*/ synchronized void setupForRead() { log.info("setting up index for read only access"); long startTime = System.currentTimeMillis(); //closeHandles(); try { setupDirectory(); String[] defaultSearchFields, defaultSearchFieldsOriginal; String[] defaultSearchFieldSubject = new String[] { "title" }; // for subject only search String[] defaultSearchFieldCorrespondents; //body field should be there, as the content of the attachment lies in this field, should also include meta field? //why the search over en-names and en-names-original when body/body_original is included in the search fields? defaultSearchFields = new String[] { "body", "title", "to_names", "from_names", "cc_names", "bcc_names", "to_emails", "from_emails", "cc_emails", "bcc_emails" }; defaultSearchFieldsOriginal = new String[] { "body_original", "title" }; // we want to leave title there because we want to always hit the title -- discussed with Peter June 27 2015 defaultSearchFieldCorrespondents = new String[] { "to_names", "from_names", "cc_names", "bcc_names", "to_emails", "from_emails", "cc_emails", "bcc_emails" }; // names field added above after email discussion with Sit 6/11/2013. problem is that we're not using the Lucene EnglishPossessiveFilter, so // NER will extract the name Stanford University in a sentence like: // "This is Stanford University's website." // but when the user clicks on the name "Stanford University" in say monthly cards, we // will not match the message with this sentence because of the apostrophe. //for searching an attchment with fileName String[] metaSearchFields = new String[] { "fileName" }; // Parse a simple query that searches for "text": if (parser == null) { //parser = new QueryParser(MUSE_LUCENE_VERSION, defaultSearchField, analyzer); parser = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFields, analyzer); parserOriginal = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldsOriginal, analyzer); parserSubject = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldSubject, analyzer); parserCorrespondents = new MultiFieldQueryParser(LUCENE_VERSION, defaultSearchFieldCorrespondents, analyzer); parserMeta = new MultiFieldQueryParser(LUCENE_VERSION, metaSearchFields, new KeywordAnalyzer()); } /** * Bunch of gotchas here * Its a bad idea to store lucene internal docIds, as no assumptions about the internal docIds should be made; * not even that they are serial. When searching, lucene may ignore logically deleted docs. * Lucene does not handle deleted docs, and having these docs in search may bring down the search performance by 50% * Deleted docs are cleaned only during merging of indices.*/ int numContentDocs = 0, numContentDeletedDocs = 0, numAttachmentDocs = 0, numAttachmentDeletedDocs = 0; if (DirectoryReader.indexExists(directory)) { DirectoryReader ireader = DirectoryReader.open(directory); if (ireader.numDeletedDocs() > 0) log.warn("!!!!!!!\nIndex reader has " + ireader.numDocs() + " doc(s) of which " + ireader.numDeletedDocs() + " are deleted)\n!!!!!!!!!!"); isearcher = new IndexSearcher(ireader); contentDocIds = new LinkedHashMap<>(); numContentDocs = ireader.numDocs(); numContentDeletedDocs = ireader.numDeletedDocs(); Bits liveDocs = MultiFields.getLiveDocs(ireader); Set<String> fieldsToLoad = new HashSet<>(); fieldsToLoad.add("docId"); for (int i = 0; i < ireader.maxDoc(); i++) { org.apache.lucene.document.Document doc = ireader.document(i, fieldsToLoad); if (liveDocs != null && !liveDocs.get(i)) continue; if (doc == null || doc.get("docId") == null) continue; contentDocIds.put(i, doc.get("docId")); } log.info("Loaded: " + contentDocIds.size() + " content docs"); } if (DirectoryReader.indexExists(directory_blob)) { IndexReader ireader_blob = DirectoryReader.open(directory_blob); isearcher_blob = new IndexSearcher(ireader_blob); // read-only=true blobDocIds = new LinkedHashMap<Integer, String>(); numAttachmentDocs = ireader_blob.numDocs(); numAttachmentDeletedDocs = ireader_blob.numDeletedDocs(); Bits liveDocs = MultiFields.getLiveDocs(ireader_blob); Set<String> fieldsToLoad = new HashSet<String>(); fieldsToLoad.add("docId"); for (int i = 0; i < ireader_blob.maxDoc(); i++) { org.apache.lucene.document.Document doc = ireader_blob.document(i, fieldsToLoad); if (liveDocs != null && !liveDocs.get(i)) continue; if (doc == null || doc.get("docId") == null) continue; blobDocIds.put(i, doc.get("docId")); } log.info("Loaded: " + blobDocIds.size() + " attachment docs"); } log.warn("Number of content docs: " + numContentDocs + ", number deleted: " + numContentDeletedDocs); log.warn("Number of attachment docs: " + numAttachmentDocs + ", number deleted: " + numAttachmentDeletedDocs); if (dirNameToDocIdMap == null) dirNameToDocIdMap = new LinkedHashMap<String, Map<Integer, String>>(); } catch (Exception e) { Util.print_exception(e, log); } log.info("Setting up index for read took " + (System.currentTimeMillis() - startTime) + " ms"); }
From source file:org.apache.jackrabbit.oak.plugins.index.lucene.LuceneIndexTest.java
License:Apache License
public int getDeletedDocCount(NodeBuilder idx, IndexDefinition definition) throws IOException { IndexReader reader = DirectoryReader.open(newIndexDirectory(definition, idx)); int numDeletes = reader.numDeletedDocs(); reader.close();/* w w w. j av a 2 s .c o m*/ return numDeletes; }
From source file:org.drftpd.vfs.index.lucene.LuceneEngine.java
License:Open Source License
/** * This method returns a Map containing information about the index engine.<br> * Right now this Map contains the info bellow: * <ul>/*from w ww . j a va 2s . c o m*/ * <li>Number of inodes (key => "inodes")</li> * <li>Storage backend (key => "backend")</li> * <li>Maximum search hits (key => "max hits")</li> * <li>The date of the last optimization (key => "last optimization")</li> * <li>The date of the last backup (key => "last backup")</li> * <li>The date of the last uptade of the search engine (key => "last search engine update")</li> * <li>Amount of cached documents (key => "cached inodes")</li> * <li>Amount of used memory (key => "ram usage")</li> * <li>The size in disk of the index (key => "disk usage")</li> * </ul> */ public Map<String, String> getStatus() { Map<String, String> status = new LinkedHashMap<String, String>(); DateFormat df = DateFormat.getDateTimeInstance(DateFormat.MEDIUM, DateFormat.LONG); String lastOp = df.format(new Date(_maintenanceThread.getLastOptimizationTime())); String lastBackup = df.format(new Date(_backupThread.getLastBackup())); status.put("backend", "Apache Lucene (http://lucene.apache.org)"); try { status.put("inodes", String.valueOf(_iWriter.numDocs())); } catch (IOException e) { logger.error("IOException getting IndexWriter", e); } IndexReader iReader = null; try { iReader = IndexReader.open(_iWriter, true); status.put("deleted inodes", String.valueOf(iReader.numDeletedDocs())); } catch (CorruptIndexException e) { logger.error(EXCEPTION_OCCURED_WHILE_SEARCHING, e); } catch (IOException e) { logger.error(EXCEPTION_OCCURED_WHILE_SEARCHING, e); } finally { if (iReader != null) { try { iReader.close(); } catch (IOException e) { logger.error("IOException closing IndexReader obtained from the IndexWriter", e); } } } status.put("cached inodes", String.valueOf(_iWriter.numRamDocs())); status.put("max hits", String.valueOf(_maxHitsNumber)); status.put("last optimization", lastOp); status.put("last backup", lastBackup); status.put("ram usage", Bytes.formatBytes(_iWriter.ramSizeInBytes())); long size = 0L; String[] paths; try { paths = _storage.listAll(); for (String path : paths) { size += new PhysicalFile(INDEX_DIR + "/" + path).length(); } status.put("size", Bytes.formatBytes(size)); } catch (IOException e) { logger.error("IOException getting size of index dir", e); } return status; }
From source file:org.eclipse.che.api.search.server.impl.LuceneSearcher.java
License:Open Source License
private void printStatistic() throws IOException { if (LOG.isDebugEnabled()) { IndexSearcher luceneSearcher = null; try {//from w ww . ja va 2 s .c o m searcherManager.maybeRefresh(); luceneSearcher = searcherManager.acquire(); IndexReader reader = luceneSearcher.getIndexReader(); LOG.debug( "IndexReader numDocs={} numDeletedDocs={} maxDoc={} hasDeletions={}. Writer numDocs={} numRamDocs={} hasPendingMerges={} hasUncommittedChanges={} hasDeletions={}", reader.numDocs(), reader.numDeletedDocs(), reader.maxDoc(), reader.hasDeletions(), luceneIndexWriter.numDocs(), luceneIndexWriter.numRamDocs(), luceneIndexWriter.hasPendingMerges(), luceneIndexWriter.hasUncommittedChanges(), luceneIndexWriter.hasDeletions()); } finally { searcherManager.release(luceneSearcher); } } }
From source file:org.eclipse.rdf4j.sail.lucene.LuceneIndex.java
License:Open Source License
private void logIndexStats() { try {// w ww . j a v a 2 s . c o m IndexReader reader = null; try { reader = getIndexReader(); Document doc; int totalFields = 0; Set<String> ids = new HashSet<String>(); String[] idArray; int count = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (isDeleted(reader, i)) continue; doc = readDocument(reader, i, null); totalFields += doc.getFields().size(); count++; idArray = doc.getValues("id"); for (String id : idArray) ids.add(id); } logger.info("Total documents in the index: " + reader.numDocs() + ", number of deletable documents in the index: " + reader.numDeletedDocs() + ", valid documents: " + count + ", total fields in all documents: " + totalFields + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); logger.info("Distinct ids in the index: " + ids.size()); } finally { ReaderMonitor toCloseCurrentMonitor = currentMonitor; currentMonitor = null; if (toCloseCurrentMonitor != null) { toCloseCurrentMonitor.closeWhenPossible(); } } } catch (IOException e) { logger.warn(e.getMessage(), e); } }
From source file:org.elasticsearch.index.engine.robin.RobinEngine.java
License:Apache License
@Override public List<Segment> segments() { rwl.readLock().lock();/*from ww w . ja va 2 s.c om*/ try { IndexWriter indexWriter = this.indexWriter; if (indexWriter == null) { throw new EngineClosedException(shardId, failedEngine); } Map<String, Segment> segments = new HashMap<String, Segment>(); // first, go over and compute the search ones... Searcher searcher = searcher(); try { IndexReader[] readers = searcher.reader().getSequentialSubReaders(); for (IndexReader reader : readers) { assert reader instanceof SegmentReader; SegmentInfo info = Lucene.getSegmentInfo((SegmentReader) reader); assert !segments.containsKey(info.name); Segment segment = new Segment(info.name); segment.search = true; segment.docCount = reader.numDocs(); segment.delDocCount = reader.numDeletedDocs(); try { segment.sizeInBytes = info.sizeInBytes(true); } catch (IOException e) { logger.trace("failed to get size for [{}]", e, info.name); } segments.put(info.name, segment); } } finally { searcher.release(); } // now, correlate or add the committed ones... if (lastCommittedSegmentInfos != null) { SegmentInfos infos = lastCommittedSegmentInfos; for (SegmentInfo info : infos) { Segment segment = segments.get(info.name); if (segment == null) { segment = new Segment(info.name); segment.search = false; segment.committed = true; segment.docCount = info.docCount; try { segment.delDocCount = indexWriter.numDeletedDocs(info); } catch (IOException e) { logger.trace("failed to get deleted docs for committed segment", e); } try { segment.sizeInBytes = info.sizeInBytes(true); } catch (IOException e) { logger.trace("failed to get size for [{}]", e, info.name); } segments.put(info.name, segment); } else { segment.committed = true; } } } Segment[] segmentsArr = segments.values().toArray(new Segment[segments.values().size()]); Arrays.sort(segmentsArr, new Comparator<Segment>() { @Override public int compare(Segment o1, Segment o2) { return (int) (o1.generation() - o2.generation()); } }); return Arrays.asList(segmentsArr); } finally { rwl.readLock().unlock(); } }
From source file:org.elasticsearch.index.percolator.PercolatorQueryCacheTests.java
License:Apache License
public void testLoadQueries() throws Exception { Directory directory = newDirectory(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(NoMergePolicy.INSTANCE)); boolean legacyFormat = randomBoolean(); Version version = legacyFormat ? Version.V_2_0_0 : Version.CURRENT; IndexShard indexShard = mockIndexShard(version, legacyFormat); storeQuery("0", indexWriter, termQuery("field1", "value1"), true, legacyFormat); storeQuery("1", indexWriter, wildcardQuery("field1", "v*"), true, legacyFormat); storeQuery("2", indexWriter, boolQuery().must(termQuery("field1", "value1")).must(termQuery("field2", "value2")), true, legacyFormat);//from www . j a v a 2s . c o m // dymmy docs should be skipped during loading: Document doc = new Document(); doc.add(new StringField("dummy", "value", Field.Store.YES)); indexWriter.addDocument(doc); storeQuery("4", indexWriter, termQuery("field2", "value2"), true, legacyFormat); // only documents that .percolator type should be loaded: storeQuery("5", indexWriter, termQuery("field2", "value2"), false, legacyFormat); storeQuery("6", indexWriter, termQuery("field3", "value3"), true, legacyFormat); indexWriter.forceMerge(1); // also include queries for percolator docs marked as deleted: indexWriter.deleteDocuments(new Term("id", "6")); indexWriter.close(); ShardId shardId = new ShardId("_index", ClusterState.UNKNOWN_UUID, 0); IndexReader indexReader = ElasticsearchDirectoryReader.wrap(DirectoryReader.open(directory), shardId); assertThat(indexReader.leaves().size(), equalTo(1)); assertThat(indexReader.numDeletedDocs(), equalTo(1)); assertThat(indexReader.maxDoc(), equalTo(7)); initialize("field1", "type=keyword", "field2", "type=keyword", "field3", "type=keyword"); PercolatorQueryCache.QueriesLeaf leaf = cache.loadQueries(indexReader.leaves().get(0), indexShard); assertThat(leaf.queries.size(), equalTo(5)); assertThat(leaf.getQuery(0), equalTo(new TermQuery(new Term("field1", "value1")))); assertThat(leaf.getQuery(1), equalTo(new WildcardQuery(new Term("field1", "v*")))); assertThat(leaf.getQuery(2), equalTo(new BooleanQuery.Builder() .add(new TermQuery(new Term("field1", "value1")), BooleanClause.Occur.MUST) .add(new TermQuery(new Term("field2", "value2")), BooleanClause.Occur.MUST).build())); assertThat(leaf.getQuery(4), equalTo(new TermQuery(new Term("field2", "value2")))); assertThat(leaf.getQuery(6), equalTo(new TermQuery(new Term("field3", "value3")))); indexReader.close(); directory.close(); }