List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java
License:BSD License
@Test public void testAddStatement() throws IOException, ParseException { // add a statement to an index index.begin();/* w ww. j a v a 2s . c o m*/ index.addStatement(statement11); index.commit(); // check that it arrived properly IndexReader reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); Term term = new Term(SearchFields.URI_FIELD_NAME, subject.toString()); TermDocs docs = reader.termDocs(term); assertTrue(docs.next()); int documentNr = docs.doc(); Document document = reader.document(documentNr); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // add another statement index.begin(); index.addStatement(statement12); index.commit(); // See if everything remains consistent. We must create a new IndexReader // in order to be able to see the updates reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); // #docs should *not* have increased docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertEquals(object1.getLabel(), document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); // see if we can query for these literals IndexSearcher searcher = new IndexSearcher(reader); QueryParser parser = new QueryParser(Version.LUCENE_35, SearchFields.TEXT_FIELD_NAME, analyzer); Query query = parser.parse(object1.getLabel()); System.out.println("query=" + query); TotalHitCountCollector results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); query = parser.parse(object2.getLabel()); results = new TotalHitCountCollector(); searcher.search(query, results); assertEquals(1, results.getTotalHits()); searcher.close(); reader.close(); // remove the first statement index.begin(); index.removeStatement(statement11); index.commit(); // check that that statement is actually removed and that the other still // exists reader = IndexReader.open(directory); assertEquals(1, reader.numDocs()); docs = reader.termDocs(term); assertTrue(docs.next()); documentNr = docs.doc(); document = reader.document(documentNr); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertNull(document.get(predicate1.toString())); assertEquals(object2.getLabel(), document.get(predicate2.toString())); assertFalse(docs.next()); docs.close(); reader.close(); // remove the other statement index.begin(); index.removeStatement(statement12); index.commit(); // check that there are no documents left (i.e. the last Document was // removed completely, rather than its remaining triple removed) reader = IndexReader.open(directory); assertEquals(0, reader.numDocs()); reader.close(); }
From source file:org.openrdf.sail.lucene3.LuceneIndexTest.java
License:BSD License
@Test public void testAddMultiple() throws Exception { // add a statement to an index HashSet<Statement> added = new HashSet<Statement>(); HashSet<Statement> removed = new HashSet<Statement>(); added.add(statement11);/* w ww . j a v a2 s . c o m*/ added.add(statement12); added.add(statement21); added.add(statement22); index.begin(); index.addRemoveStatements(added, removed); index.commit(); // check that it arrived properly IndexReader reader = IndexReader.open(directory); assertEquals(2, reader.numDocs()); reader.close(); // check the documents Document document = index.getDocuments(subject).iterator().next(); assertEquals(subject.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertStatement(statement11, document); assertStatement(statement12, document); document = index.getDocuments(subject2).iterator().next(); assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertStatement(statement21, document); assertStatement(statement22, document); // check if the text field stores all added string values Set<String> texts = new HashSet<String>(); texts.add("cats"); texts.add("dogs"); // FIXME // assertTexts(texts, document); // add/remove one added.clear(); removed.clear(); added.add(statement23); removed.add(statement22); index.begin(); index.addRemoveStatements(added, removed); index.commit(); // check doc 2 document = index.getDocuments(subject2).iterator().next(); assertEquals(subject2.toString(), document.get(SearchFields.URI_FIELD_NAME)); assertStatement(statement21, document); assertStatement(statement23, document); assertNoStatement(statement22, document); // check if the text field stores all added and no deleted string values texts.remove("dogs"); texts.add("chicken"); // FIXME // assertTexts(texts, document); // TODO: check deletion of the rest }
From source file:org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
/** * Update the content of this index database * * @throws IOException if an error occurs * @throws HistoryException if an error occurs when accessing the history *///from w w w.j a v a 2 s . co m public void update() throws IOException, HistoryException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } String ctgs = RuntimeEnvironment.getInstance().getCtags(); if (ctgs != null) { ctags = new Ctags(); ctags.setBinary(ctgs); } if (ctags == null) { log.severe("Unable to run ctags! searching definitions will not work!"); } if (ctags != null) { String filename = RuntimeEnvironment.getInstance().getCTagsExtraOptionsFile(); if (filename != null) { ctags.setCTagsExtraOptionsFile(filename); } } try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(SearchEngine.LUCENE_VERSION, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(RuntimeEnvironment.getInstance().getRamBufferSize()); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = RuntimeEnvironment.getInstance().getSourceRootFile(); } else { sourceRoot = new File(RuntimeEnvironment.getInstance().getSourceRootFile(), dir); } HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); String startuid = Util.path2uid(dir, ""); IndexReader reader = DirectoryReader.open(indexDirectory); // open existing index Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (numDocs > 0) { uidIter = terms.iterator(uidIter); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; log.log(Level.WARNING, "Couldn't find a start term for {0}, empty u field?", startuid); } } // The code below traverses the tree to get total count. int file_cnt = 0; if (RuntimeEnvironment.getInstance().isPrintProgress()) { log.log(Level.INFO, "Counting files in {0} ...", dir); file_cnt = indexDown(sourceRoot, dir, true, 0, 0); if (log.isLoggable(Level.INFO)) { log.log(Level.INFO, "Need to process: {0} files for {1}", new Object[] { file_cnt, dir }); } } indexDown(sourceRoot, dir, false, 0, file_cnt); while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } } finally { reader.close(); } } } finally { if (writer != null) { try { writer.prepareCommit(); writer.commit(); writer.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing writer", e); } } if (ctags != null) { try { ctags.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing ctags process", e); } } synchronized (lock) { running = false; } } if (!isInterrupted() && isDirty()) { if (RuntimeEnvironment.getInstance().isOptimizeDatabase()) { optimize(); } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); File timestamp = new File(env.getDataRootFile(), "timestamp"); String purpose = "used for timestamping the index database."; if (timestamp.exists()) { if (!timestamp.setLastModified(System.currentTimeMillis())) { log.log(Level.WARNING, "Failed to set last modified time on ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose }); } } else { if (!timestamp.createNewFile()) { log.log(Level.WARNING, "Failed to create file ''{0}'', {1}", new Object[] { timestamp.getAbsolutePath(), purpose }); } } } }
From source file:org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
/** * List all of the files in this index database * * @throws IOException If an IO error occurs while reading from the database *//*from w ww . jav a2 s.c o m*/ public void listFiles() throws IOException { IndexReader ireader = null; TermsEnum iter = null; Terms terms = null; try { ireader = DirectoryReader.open(indexDirectory); // open existing index int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } iter = terms.iterator(iter); // init uid iterator while (iter != null && iter.term() != null) { log.fine(Util.uid2url(iter.term().utf8ToString())); BytesRef next = iter.next(); if (next == null) { iter = null; } } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing index reader", e); } } } }
From source file:org.opensolaris.opengrok.index.IndexDatabase.java
License:Open Source License
public void listTokens(int freq) throws IOException { IndexReader ireader = null; TermsEnum iter = null;//www.java 2 s.c o m Terms terms = null; try { ireader = DirectoryReader.open(indexDirectory); int numDocs = ireader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(ireader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.DEFS); } iter = terms.iterator(iter); // init uid iterator while (iter != null && iter.term() != null) { //if (iter.term().field().startsWith("f")) { if (iter.docFreq() > 16 && iter.term().utf8ToString().length() > freq) { log.warning(iter.term().utf8ToString()); } BytesRef next = iter.next(); if (next == null) { iter = null; } /*} else { break; }*/ } } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { log.log(Level.WARNING, "An error occured while closing index reader", e); } } } }
From source file:org.oscm.search.IndexRequestMasterListenerIT.java
private void assertDocsInIndex(final Class<?> clazz, final String comment, final int expectedNumDocs, final int expectedNumIndexedAttributes, final List<String> expectedAttributes) throws Exception { Boolean evaluationTookPlace = runTX(new Callable<Boolean>() { @Override// w ww .jav a2s. c o m public Boolean call() throws Exception { boolean evaluatedIndex = false; Session session = dm.getSession(); if (session != null) { FullTextSession fullTextSession = Search.getFullTextSession(session); SearchFactory searchFactory = fullTextSession.getSearchFactory(); IndexReader reader = searchFactory.getIndexReaderAccessor().open(clazz); try { assertEquals(comment, expectedNumDocs, reader.numDocs()); if (expectedNumDocs > 0) { final FieldInfos indexedFieldNames = ReaderUtil.getMergedFieldInfos(reader); for (String expectedAttr : expectedAttributes) { assertNotNull("attribute " + expectedAttr + " does not exist in index: " + indexedFieldNames, indexedFieldNames.fieldInfo(expectedAttr)); } assertNotNull("attribute \"key\" does not exist in index: " + indexedFieldNames, indexedFieldNames.fieldInfo("key")); assertNotNull( "attribute \"_hibernate_class\" does not exist in index: " + indexedFieldNames, indexedFieldNames.fieldInfo("_hibernate_class")); assertEquals( "More or less attributes indexed than expected, attributes retrieved from index: " + indexedFieldNames, expectedNumIndexedAttributes + 2, indexedFieldNames.size()); evaluatedIndex = true; } } finally { searchFactory.getIndexReaderAccessor().close(reader); } } return Boolean.valueOf(evaluatedIndex); } }); if (expectedNumDocs > 0) { Assert.assertTrue("Index not found, no evaluation took place", evaluationTookPlace.booleanValue()); } }
From source file:org.punksearch.crawler.IndexOperator.java
License:Open Source License
public static void deleteByAge(String dirPath, float days) { try {/*from w w w .jav a 2 s . c o m*/ final Directory dir = LuceneUtils.dir(dirPath); boolean indexExists = IndexReader.indexExists(dir); if (!indexExists) { return; } final IndexWriter iw = createIndexWriter(dirPath); final IndexReader ir = IndexReader.open(dir); IndexSearcher is = new IndexSearcher(ir); long min = 0; long max = System.currentTimeMillis() - Math.round(days * 1000 * 3600 * 24); final TermRangeQuery oldDocsQuery = new TermRangeQuery(IndexFields.INDEXED, DateTools.timeToString(min, DateTools.Resolution.MILLISECOND), DateTools.timeToString(max, DateTools.Resolution.MILLISECOND), true, false); final int docsInReader = ir.numDocs(); final TopDocs topDocs = is.search(oldDocsQuery, Math.max(1, docsInReader)); log.info("Deleting by age from index directory. Items to delete: " + topDocs.totalHits); iw.deleteDocuments(oldDocsQuery); iw.close(); } catch (IOException ex) { log.error("Exception during deleting by age from index directory", ex); throw new RuntimeException(ex); } }
From source file:org.punksearch.web.statistics.FileTypeStatistics.java
License:Open Source License
private static Hits extractDocsForType(String type) { Filter filter = TypeFilters.get(type); try {/*from www. j a va2s . c o m*/ IndexSearcher indexSearcher = Core.getIndexReaderHolder().getCurrentSearcher(); IndexReader indexReader = indexSearcher.getIndexReader(); final TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), filter, indexReader.numDocs()); return new Hits(indexSearcher, topDocs); } catch (Exception e) { log.error("error extractDocsForType", e); return null; } }
From source file:org.punksearch.web.statistics.FileTypeStatistics.java
License:Open Source License
public static synchronized Long totalSize() { if (totalSizeCache == null || indexChangedAfter(totalSizeCacheTimestamp)) { long size = 0; try {//from w w w . j a va 2 s . com // Rough approximation to the root directories. // Obviously, non-latin1 directory names slip through the filter, we'll catch them later // Maybe we should use some ranges with UTF8-16 characters... TODO String approxQuery = "*:* -Path:{a TO Z*} -Path:{0 TO 9*}"; QueryParser parser = new QueryParser(LuceneVersion.VERSION, "Host", new SimpleAnalyzer(LuceneVersion.VERSION)); Query query = parser.parse(approxQuery); IndexSearcher indexSearcher = Core.getIndexReaderHolder().getCurrentSearcher(); IndexReader indexReader = indexSearcher.getIndexReader(); final TopDocs topDocs = indexSearcher.search(query, indexReader.numDocs()); Hits hits = new Hits(indexSearcher, topDocs); for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); String path = doc.get(IndexFields.PATH); if (!path.equals("/")) { continue; } size += Long.parseLong(doc.get(IndexFields.SIZE)); } } catch (Exception e) { log.error("", e); } totalSizeCache = size; totalSizeCacheTimestamp = System.currentTimeMillis(); } return totalSizeCache; }
From source file:org.roosster.store.EntryStore.java
License:Open Source License
/** *//*www . ja v a 2 s . c o m*/ public int getDocNum() throws IOException { if (!isInitialized()) throw new IllegalStateException("Database must be initialized before use!"); int numdocs = 0; IndexReader reader = null; try { reader = getReader(); numdocs = reader.numDocs(); } finally { if (reader != null) reader.close(); } return numdocs; }