List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenDirectoryNewInstance() throws Exception { IndexReader reader = IndexReader.open(directory); writer.addDocument(doc1);/*from ww w . ja v a2s . c o m*/ writer.commit(); assertFalse(reader.isCurrent()); assertEquals(0, reader.maxDoc()); reader = IndexReader.open(directory); assertTrue(reader.isCurrent()); assertEquals(1, reader.maxDoc()); }
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenWriter() throws Exception { IndexReader reader = IndexReader.open(writer, true); assertNotNull(reader);//from ww w.jav a2s . co m assertSame(directory, reader.directory()); assertEquals(1, reader.getRefCount()); assertTrue(reader.isCurrent()); assertEquals(0, reader.maxDoc()); close(reader); assertEquals(0, reader.getRefCount()); }
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenWriterAddDocument() throws Exception { IndexReader reader = IndexReader.open(writer, true); writer.addDocument(doc1);/*from w w w .ja v a 2 s . c o m*/ assertFalse(reader.isCurrent()); assertEquals(1, writer.maxDoc()); assertEquals(0, reader.maxDoc()); }
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenWriterAddDocumentAndCommit() throws Exception { IndexReader reader = IndexReader.open(writer, true); writer.addDocument(doc1);//from www. j ava 2 s . co m writer.commit(); assertFalse(reader.isCurrent()); assertEquals(1, writer.maxDoc()); assertEquals(0, reader.maxDoc()); }
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenWriterReopen() throws Exception { IndexReader reader = IndexReader.open(writer, true); writer.addDocument(doc1);/*from ww w .j av a2 s .com*/ writer.commit(); assertFalse(reader.isCurrent()); assertEquals(0, reader.maxDoc()); reader = TestLARQUtils.openIfChanged(reader); assertTrue(reader.isCurrent()); assertEquals(1, reader.maxDoc()); }
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenWriterNewInstance() throws Exception { IndexReader reader = IndexReader.open(writer, true); writer.addDocument(doc1);//from w w w . j av a 2s .com writer.commit(); assertFalse(reader.isCurrent()); assertEquals(0, reader.maxDoc()); reader = IndexReader.open(writer, true); assertTrue(reader.isCurrent()); assertEquals(1, reader.maxDoc()); }
From source file:org.apache.jena.larq.TestLuceneNRT.java
License:Apache License
@Test public void indexReaderOpenWriterDeleteDocumentAndCommit() throws Exception { Term term = new Term("foo", "bar1"); IndexReader reader = IndexReader.open(writer, false); writer.addDocument(doc1);//from ww w. j a v a 2 s. c om writer.addDocument(doc2); writer.commit(); assertEquals(2, writer.maxDoc()); assertFalse(reader.isCurrent()); assertEquals(0, reader.maxDoc()); assertEquals(0, count(reader, term)); reader = TestLARQUtils.openIfChanged(reader); assertTrue(reader.isCurrent()); assertEquals(1, count(reader, term)); writer.deleteDocuments(term); assertEquals(2, writer.maxDoc()); assertEquals(1, count(reader, term)); reader = TestLARQUtils.openIfChanged(reader); assertEquals(1, count(reader, term)); writer.commit(); reader = TestLARQUtils.openIfChanged(reader); assertEquals(2, writer.maxDoc()); assertEquals(0, count(reader, term)); reader = TestLARQUtils.openIfChanged(reader); assertEquals(0, count(reader, term)); writer.forceMergeDeletes(); assertEquals(1, writer.maxDoc()); assertEquals(0, count(reader, term)); }
From source file:org.apache.mahout.utils.vectors.lucene.ClusterLabels.java
License:Apache License
/** * Get the list of labels, sorted by best score. *//* w w w. ja va 2s . c o m*/ protected List<TermInfoClusterInOut> getClusterLabels(Integer integer, Collection<WeightedPropertyVectorWritable> wpvws) throws IOException { if (wpvws.size() < minNumIds) { log.info("Skipping small cluster {} with size: {}", integer, wpvws.size()); return null; } log.info("Processing Cluster {} with {} documents", integer, wpvws.size()); Directory dir = FSDirectory.open(new File(this.indexDir)); IndexReader reader = DirectoryReader.open(dir); log.info("# of documents in the index {}", reader.numDocs()); Collection<String> idSet = Sets.newHashSet(); for (WeightedPropertyVectorWritable wpvw : wpvws) { Vector vector = wpvw.getVector(); if (vector instanceof NamedVector) { idSet.add(((NamedVector) vector).getName()); } } int numDocs = reader.numDocs(); OpenBitSet clusterDocBitset = getClusterDocBitset(reader, idSet, this.idField); log.info("Populating term infos from the index"); /** * This code is as that of CachedTermInfo, with one major change, which is to get the document frequency. * * Since we have deleted the documents out of the cluster, the document frequency for a term should only * include the in-cluster documents. The document frequency obtained from TermEnum reflects the frequency * in the entire index. To get the in-cluster frequency, we need to query the index to get the term * frequencies in each document. The number of results of this call will be the in-cluster document * frequency. */ Terms t = MultiFields.getTerms(reader, contentField); TermsEnum te = t.iterator(null); Map<String, TermEntry> termEntryMap = new LinkedHashMap<String, TermEntry>(); Bits liveDocs = MultiFields.getLiveDocs(reader); //WARNING: returns null if there are no deletions int count = 0; BytesRef term; while ((term = te.next()) != null) { OpenBitSet termBitset = new OpenBitSet(reader.maxDoc()); DocsEnum docsEnum = MultiFields.getTermDocsEnum(reader, null, contentField, term); int docID; while ((docID = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { //check to see if we don't have an deletions (null) or if document is live if (liveDocs != null && !liveDocs.get(docID)) { // document is deleted... termBitset.set(docsEnum.docID()); } } // AND the term's bitset with cluster doc bitset to get the term's in-cluster frequency. // This modifies the termBitset, but that's fine as we are not using it anywhere else. termBitset.and(clusterDocBitset); int inclusterDF = (int) termBitset.cardinality(); TermEntry entry = new TermEntry(term.utf8ToString(), count++, inclusterDF); termEntryMap.put(entry.getTerm(), entry); } List<TermInfoClusterInOut> clusteredTermInfo = Lists.newLinkedList(); int clusterSize = wpvws.size(); for (TermEntry termEntry : termEntryMap.values()) { int corpusDF = reader.docFreq(new Term(this.contentField, termEntry.getTerm())); int outDF = corpusDF - termEntry.getDocFreq(); int inDF = termEntry.getDocFreq(); double logLikelihoodRatio = scoreDocumentFrequencies(inDF, outDF, clusterSize, numDocs); TermInfoClusterInOut termInfoCluster = new TermInfoClusterInOut(termEntry.getTerm(), inDF, outDF, logLikelihoodRatio); clusteredTermInfo.add(termInfoCluster); } Collections.sort(clusteredTermInfo); // Cleanup Closeables.close(reader, true); termEntryMap.clear(); return clusteredTermInfo.subList(0, Math.min(clusteredTermInfo.size(), maxLabels)); }
From source file:org.apache.maven.index.AbstractRepoNexusIndexerTest.java
License:Apache License
public void testPackaging() throws Exception { IndexReader reader = context.acquireIndexSearcher().getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs == null || liveDocs.get(i)) { Document document = reader.document(i); String uinfo = document.get(ArtifactInfo.UINFO); if (uinfo != null) { String info = document.get(ArtifactInfo.INFO); assertFalse("Bad:" + info, info.startsWith("null")); }/*ww w . j ava2s .c om*/ } } // { // Query query = new TermQuery( new Term( MAVEN.PACKAGING, "jar" ) ); // FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); // assertEquals(response.getResults().toString(), 22, response.getTotalHits()); // } { Query query = nexusIndexer.constructQuery(MAVEN.PACKAGING, "tar.gz", SearchType.EXACT); FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); assertEquals(response.getResults().toString(), 1, response.getTotalHits()); ArtifactInfo ai = response.getResults().iterator().next(); assertEquals("tar.gz", ai.getPackaging()); assertEquals("tar.gz", ai.getFileExtension()); } { Query query = nexusIndexer.constructQuery(MAVEN.PACKAGING, "zip", SearchType.EXACT); FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); assertEquals(response.getResults().toString(), 1, response.getTotalHits()); ArtifactInfo ai = response.getResults().iterator().next(); assertEquals("zip", ai.getPackaging()); assertEquals("zip", ai.getFileExtension()); } }
From source file:org.apache.maven.index.context.DefaultIndexingContext.java
License:Apache License
public synchronized void merge(Directory directory, DocumentFilter filter) throws IOException { final IndexSearcher s = acquireIndexSearcher(); try {//from w w w .j a v a2s .c om final IndexWriter w = getIndexWriter(); final IndexReader directoryReader = DirectoryReader.open(directory); TopScoreDocCollector collector = null; try { int numDocs = directoryReader.maxDoc(); Bits liveDocs = MultiFields.getLiveDocs(directoryReader); for (int i = 0; i < numDocs; i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document d = directoryReader.document(i); if (filter != null && !filter.accept(d)) { continue; } String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null) { collector = TopScoreDocCollector.create(1); s.search(new TermQuery(new Term(ArtifactInfo.UINFO, uinfo)), collector); if (collector.getTotalHits() == 0) { w.addDocument(IndexUtils.updateDocument(d, this, false)); } } else { String deleted = d.get(ArtifactInfo.DELETED); if (deleted != null) { // Deleting the document loses history that it was delete, // so incrementals wont work. Therefore, put the delete // document in as well w.deleteDocuments(new Term(ArtifactInfo.UINFO, deleted)); w.addDocument(d); } } } } finally { directoryReader.close(); commit(); } rebuildGroups(); Date mergedTimestamp = IndexUtils.getTimestamp(directory); if (getTimestamp() != null && mergedTimestamp != null && mergedTimestamp.after(getTimestamp())) { // we have both, keep the newest updateTimestamp(true, mergedTimestamp); } else { updateTimestamp(true); } optimize(); } finally { releaseIndexSearcher(s); } }