List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:org.olat.search.service.update.IndexUpdater.java
License:Apache License
/** * Check update and delete-queue. Update existing index and writes new index file. *//*from w w w.j a va2 s . co m*/ private void doUpdate() { if (!updateQueue.isEmpty() || !deleteQueue.isEmpty()) { try { log.info("updateQueue.size=" + updateQueue.size() + " deleteQueue.size" + deleteQueue.size()); // 0. make copy of queue's and delete it List<Document> updateCopy; synchronized (updateQueue) { updateCopy = new Vector<Document>(updateQueue); updateQueue.clear(); } List<Document> deleteCopy; synchronized (deleteQueue) { deleteCopy = new Vector<Document>(deleteQueue); deleteQueue.clear(); } // 1. Open Index Reader final File indexFile = new File(indexPath); final Directory directory = FSDirectory.open(indexFile); final IndexReader indexReader = IndexReader.open(directory); log.info("befor delete: indexReader.numDocs()=" + indexReader.numDocs()); // 2. Delete old Document // loop over all documents in updateQueue for (int i = 0; i < updateCopy.size(); i++) { final String resourceUrl = updateCopy.get(i).get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); final Term term = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl); log.info("updateQueue:delete documents with resourceUrl=" + resourceUrl); indexReader.deleteDocuments(term); } // loop over all documents in deleteQueue for (int i = 0; i < deleteCopy.size(); i++) { final String resourceUrl = deleteCopy.get(i).get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); final Term term = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl); log.info("deleteQueue:delete documents with resourceUrl='" + resourceUrl + "'"); indexReader.deleteDocuments(term); } log.info("after delete: indexReader.numDocs()=" + indexReader.numDocs()); // 3. Close reader indexReader.close(); directory.close(); // 4. open writer final IndexWriter indexWriter = new IndexWriter(directory, new StandardAnalyzer(Version.LUCENE_CURRENT), false, IndexWriter.MaxFieldLength.UNLIMITED); indexWriter.setMergeFactor(INDEX_MERGE_FACTOR); // for better performance // 5. Add new Document for (int i = 0; i < updateCopy.size(); i++) { final Document document = updateCopy.get(i); log.info("addDocument:" + document); indexWriter.addDocument(document); } // 6. Close writer long startOptimizeTime = 0; if (log.isDebug()) { startOptimizeTime = System.currentTimeMillis(); } indexWriter.optimize();// TODO:chg: dauert ev. zulange oder nocht noetig if (log.isDebug()) { log.debug("Optimized in " + (System.currentTimeMillis() - startOptimizeTime) + "ms"); } indexWriter.close(); } catch (final Exception ex) { log.warn("Exception during doUpdate. ", ex); } } else { log.debug("Queues are ampty."); } }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
/** * Get all files in this index database. * * @throws IOException If an IO error occurs while reading from the database * @return set of files in this index database *//*from w ww . ja va 2s . c o m*/ public Set<String> getFiles() throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms; Set<String> files = new HashSet<>(); try { ireader = DirectoryReader.open(indexDirectory); // open existing index int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { String value = iter.term().utf8ToString(); if (value.isEmpty()) { iter.next(); continue; } files.add(Util.uid2url(value)); BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } return files; }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
/** * Get number of documents in this index database. * @return number of documents/* w w w. j av a 2s . c o m*/ * @throws IOException if I/O exception occurred */ public int getNumFiles() throws IOException { IndexReader ireader = null; int numDocs = 0; try { ireader = DirectoryReader.open(indexDirectory); // open existing index numDocs = ireader.numDocs(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } return numDocs; }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null;//from ww w .j av a2 s. c om Terms terms; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); iter = terms.iterator(); // init uid iterator } while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { LOGGER.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { LOGGER.log(Level.WARNING, "An error occurred while closing index reader", e); } } } }
From source file:org.opengrok.suggest.SuggesterSearcher.java
License:Open Source License
/** * @param reader reader of the index for which to provide suggestions * @param resultSize size of the results */// w w w. java2 s. c o m SuggesterSearcher(final IndexReader reader, final int resultSize) { super(reader); numDocs = reader.numDocs(); this.resultSize = resultSize; }
From source file:org.opengrok.suggest.SuggesterUtils.java
License:Open Source License
private static double computeNormalizedDocumentFrequency(final IndexReader indexReader, final Term term) throws IOException { int documentFrequency = indexReader.docFreq(term); return ((double) documentFrequency) / indexReader.numDocs(); }
From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
private void logIndexStats() { try {/*from w ww . ja v a 2 s. co m*/ IndexReader reader = null; try { reader = getIndexReader(); Document doc; int totalFields = 0; Set<String> ids = new HashSet<String>(); String[] idArray; int count = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (reader.isDeleted(i)) continue; doc = reader.document(i); totalFields += doc.getFields().size(); count++; idArray = doc.getValues("id"); for (String id : idArray) ids.add(id); } logger.info("Total documents in the index: " + reader.numDocs() + ", number of deletable documents in the index: " + reader.numDeletedDocs() + ", valid documents: " + count + ", total fields in all documents: " + totalFields + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); logger.info("Distinct ids in the index: " + ids.size()); } finally { if (currentMonitor != null) { currentMonitor.closeWhenPossible(); currentMonitor = null; } } } catch (IOException e) { logger.warn(e.getMessage(), e); } }
From source file:org.openrdf.sail.lucene.LuceneIndexTest.java
License:BSD License
public void testAddStatement() throws IOException, ParseException { // add a statement to an index index.addStatement(statement11);/*w w w . ja v a2s . c om*/ // check that it arrived properly IndexReader reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); Term term = new Term(LuceneIndex.URI_FIELD_NAME, subject.toString()); TermDocs docs = reader.termDocs(term); assertTrue(docs.next()); int documentNr = docs.doc(); Document document = reader.document(documentNr); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // add another statement index.addStatement(statement12); // See if everything remains consistent. We must create a new IndexReader // in order to be able to see the updates reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); // #docs should *not* have increased docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); // see if we can query for these literals IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_35, LuceneIndex.TEXT_FIELD_NAME, analyzer); Query query = parser.parse(object1.getLabel()); System.out.println("query=" + query); TotalHitCountCollector results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); query = parser.parse(object2.getLabel()); results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); searcher.close(); reader.close(); // remove the first statement index.removeStatement(statement11); // check that that statement is actually removed and that the other still // exists reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertNull(document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // remove the other statement index.removeStatement(statement12); // check that there are no documents left (i.e. the last Document was // removed completely, rather than its remaining triple removed) reader = IndexReader.open(directory); assertEquals(0, reader.numDocs()); reader.close(); }
From source file:org.openrdf.sail.lucene.LuceneIndexTest.java
License:BSD License
public void testAddMultiple() throws Exception { // add a statement to an index HashSet<Statement> added = new HashSet<Statement>(); HashSet<Statement> removed = new HashSet<Statement>(); added.add(statement11);/*from w ww . j av a2 s . c om*/ added.add(statement12); added.add(statement21); added.add(statement22); index.addRemoveStatements(added, removed); // check that it arrived properly IndexReader reader = IndexReader.open(directory); assertEquals(2, reader.numDocs()); reader.close(); // check the documents Document document = index.getDocuments(subject).iterator().next(); assertEquals(subject.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertStatement(statement11, document); assertStatement(statement12, document); document = index.getDocuments(subject2).iterator().next(); assertEquals(subject2.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertStatement(statement21, document); assertStatement(statement22, document); // check if the text field stores all added string values Set<String> texts = new HashSet<String>(); texts.add("cats"); texts.add("dogs"); // FIXME // assertTexts(texts, document); // add/remove one added.clear(); removed.clear(); added.add(statement23); removed.add(statement22); index.addRemoveStatements(added, removed); // check doc 2 document = index.getDocuments(subject2).iterator().next(); assertEquals(subject2.toString(), document.get(LuceneIndex.URI_FIELD_NAME)); assertStatement(statement21, document); assertStatement(statement23, document); assertNoStatement(statement22, document); // check if the text field stores all added and no deleted string values texts.remove("dogs"); texts.add("chicken"); // FIXME // assertTexts(texts, document); // TODO: check deletion of the rest }
From source file:org.openrdf.sail.lucene3.LuceneIndex.java
License:BSD License
private void logIndexStats() { try {/*from w w w . j a va 2 s . c o m*/ IndexReader reader = null; try { reader = getIndexReader(); Document doc; int totalFields = 0; Set<String> ids = new HashSet<String>(); String[] idArray; int count = 0; for (int i = 0; i < reader.maxDoc(); i++) { if (isDeleted(reader, i)) continue; doc = readDocument(reader, i, null); totalFields += doc.getFields().size(); count++; idArray = doc.getValues("id"); for (String id : idArray) ids.add(id); } logger.info("Total documents in the index: " + reader.numDocs() + ", number of deletable documents in the index: " + reader.numDeletedDocs() + ", valid documents: " + count + ", total fields in all documents: " + totalFields + ", average number of fields per document: " + ((double) totalFields) / reader.numDocs()); logger.info("Distinct ids in the index: " + ids.size()); } finally { if (currentMonitor != null) { currentMonitor.closeWhenPossible(); currentMonitor = null; } } } catch (IOException e) { logger.warn(e.getMessage(), e); } }