List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.sindice.siren.demo.entity.EntityCentricIndexing.java
License:Apache License
public Document getDocument(final int docId) throws CorruptIndexException, IOException { final IndexReader reader = IndexReader.open(dir, true); try {//www. jav a2s.c o m return reader.document(docId); } finally { reader.close(); } }
From source file:org.sindice.siren.search.TestSirenFuzzyQuery.java
License:Open Source License
/** Test the {@link TopTermsBoostOnlySirenBooleanQueryRewrite} rewrite method. */ @Test//from w w w.java2 s . com public void testBoostOnlyRewrite() throws Exception { final Directory directory = new RAMDirectory(); final IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, new TupleAnalyzer(TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT), new AnyURIAnalyzer(TEST_VERSION_CURRENT))); final IndexWriter writer = new IndexWriter(directory, conf); this.addDoc("Lucene", writer); this.addDoc("Lucene", writer); this.addDoc("Lucenne", writer); final IndexReader reader = IndexReader.open(directory); final IndexSearcher searcher = new IndexSearcher(directory); writer.close(); final SirenFuzzyQuery query = new SirenFuzzyQuery(new Term("field", "Lucene")); query.setRewriteMethod(new SirenMultiTermQuery.TopTermsBoostOnlySirenBooleanQueryRewrite(50)); final ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs; assertEquals(3, hits.length); // normally, 'Lucenne' would be the first result as IDF will skew the score. assertEquals(this.getTriple("Lucene"), reader.document(hits[0].doc).get("field")); assertEquals(this.getTriple("Lucene"), reader.document(hits[1].doc).get("field")); assertEquals(this.getTriple("Lucenne"), reader.document(hits[2].doc).get("field")); searcher.close(); reader.close(); directory.close(); }
From source file:org.sonatype.nexus.index.AbstractRepoNexusIndexerTest.java
License:Open Source License
public void testPackaging() throws Exception { IndexReader reader = context.getIndexReader(); for (int i = 0; i < reader.numDocs(); i++) { if (!reader.isDeleted(i)) { Document document = reader.document(i); String uinfo = document.get(ArtifactInfo.UINFO); if (uinfo != null) { String info = document.get(ArtifactInfo.INFO); assertFalse(info.startsWith("null")); }/*from w w w . j a va 2 s . c o m*/ } } // { // Query query = new TermQuery( new Term( ArtifactInfo.PACKAGING, "jar" ) ); // FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); // assertEquals(response.getResults().toString(), 22, response.getTotalHits()); // } { Query query = new TermQuery(new Term(ArtifactInfo.PACKAGING, "tar.gz")); FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); assertEquals(response.getResults().toString(), 1, response.getTotalHits()); ArtifactInfo ai = response.getResults().iterator().next(); assertEquals("tar.gz", ai.packaging); assertEquals("tar.gz", ai.fextension); } { Query query = new TermQuery(new Term(ArtifactInfo.PACKAGING, "zip")); FlatSearchResponse response = nexusIndexer.searchFlat(new FlatSearchRequest(query)); assertEquals(response.getResults().toString(), 1, response.getTotalHits()); ArtifactInfo ai = response.getResults().iterator().next(); assertEquals("zip", ai.packaging); assertEquals("zip", ai.fextension); } }
From source file:org.sonatype.nexus.index.context.DefaultIndexingContext.java
License:Open Source License
public void merge(Directory directory, DocumentFilter filter) throws IOException { synchronized (indexLock) { closeReaders();/*from www . j av a 2 s.c o m*/ IndexWriter w = getIndexWriter(); IndexSearcher s = getIndexSearcher(); IndexReader r = IndexReader.open(directory); try { int numDocs = r.maxDoc(); for (int i = 0; i < numDocs; i++) { if (r.isDeleted(i)) { continue; } Document d = r.document(i); if (filter != null && !filter.accept(d)) { continue; } String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null) { Hits hits = s.search(new TermQuery(new Term(ArtifactInfo.UINFO, uinfo))); if (hits.length() == 0) { w.addDocument(IndexUtils.updateDocument(d, this, false)); } } else { String deleted = d.get(ArtifactInfo.DELETED); if (deleted != null) { // Deleting the document loses history that it was delete, // so incrementals wont work. Therefore, put the delete // document in as well w.deleteDocuments(new Term(ArtifactInfo.UINFO, deleted)); w.addDocument(d); } } } } finally { r.close(); closeReaders(); } rebuildGroups(); Date mergedTimestamp = IndexUtils.getTimestamp(directory); if (getTimestamp() != null && mergedTimestamp != null && mergedTimestamp.after(getTimestamp())) { // we have both, keep the newest updateTimestamp(true, mergedTimestamp); } else { updateTimestamp(true); } optimize(); } }
From source file:org.sonatype.nexus.index.context.IndexUtils.java
License:Open Source License
/** * Used to rebuild group information, for example on context which were merged, since merge() of contexts * only merges the Documents with UINFO record (Artifacts). *//*from w w w. ja v a 2 s. c om*/ public static void rebuildGroups(IndexingContext context) throws IOException { IndexReader r = context.getIndexReader(); Set<String> rootGroups = new LinkedHashSet<String>(); Set<String> allGroups = new LinkedHashSet<String>(); int numDocs = r.maxDoc(); for (int i = 0; i < numDocs; i++) { if (r.isDeleted(i)) { continue; } Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null) { ArtifactInfo info = IndexUtils.constructArtifactInfo(d, context); rootGroups.add(info.getRootGroup()); allGroups.add(info.groupId); } } setRootGroups(context, rootGroups); setAllGroups(context, allGroups); context.getIndexWriter().optimize(); context.getIndexWriter().flush(); }
From source file:org.sonatype.nexus.index.DefaultScannerListener.java
License:Open Source License
private void initialize(IndexingContext ctx) throws IOException, CorruptIndexException { IndexReader r = ctx.getIndexReader(); for (int i = 0; i < r.numDocs(); i++) { if (!r.isDeleted(i)) { Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null) { uinfos.add(uinfo);//from www . j a v a 2 s . c o m // add all existing groupIds to the lists, as they will // not be "discovered" and would be missing from the new list.. String groupId = uinfo.substring(0, uinfo.indexOf('|')); int n = groupId.indexOf('.'); groups.add(n == -1 ? groupId : groupId.substring(0, n)); allGroups.add(groupId); } } } }
From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationIT.java
License:Open Source License
protected void ensureUniqueness() throws IOException { final IndexingContext context = indexerManager.getRepositoryIndexContext("central"); final HashSet<String> uinfos = new HashSet<String>(); final ArrayList<String> duplicates = new ArrayList<String>(); final IndexSearcher indexSearcher = context.acquireIndexSearcher(); try {/*from w w w.j a va2s .co m*/ final IndexReader r = indexSearcher.getIndexReader(); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { final Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null && !uinfos.add(uinfo)) { duplicates.add(uinfo); } } } } finally { context.releaseIndexSearcher(indexSearcher); } // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no // dupes and only one artifact if (!duplicates.isEmpty() || uinfos.size() > 1) { Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates + ", uinfos=" + uinfos); } }
From source file:org.sonatype.nexus.index.Nexus5393IndexEntryDuplicationLocalTest.java
License:Open Source License
protected void ensureUniqueness() throws IOException { final IndexingContext context = indexerManager.getRepositoryIndexContext("releases"); final HashSet<String> uinfos = new HashSet<String>(); final ArrayList<String> duplicates = new ArrayList<String>(); final IndexSearcher indexSearcher = context.acquireIndexSearcher(); try {//from ww w . ja v a2 s. co m final IndexReader r = indexSearcher.getIndexReader(); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { final Document d = r.document(i); String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null && !uinfos.add(uinfo)) { duplicates.add(uinfo); } } } } finally { context.releaseIndexSearcher(indexSearcher); } // remote proxy contains only one artifact: log4j-1.2.13: so we expect out index to have no // dupes and only one artifact if (!duplicates.isEmpty() || uinfos.size() > 1) { Assert.fail("UINFOs are duplicated or we scanned some unexpected ones, duplicates=" + duplicates + ", uinfos=" + uinfos); } }
From source file:org.sonatype.nexus.index.Nexus737NexusIndexerTest.java
License:Open Source License
public void testValidateUINFOs() throws Exception { IndexReader reader = context.getIndexReader(); int foundCount = 0; for (int i = 0; i < reader.numDocs(); i++) { Document document = reader.document(i); String uinfo = document.get(ArtifactInfo.UINFO); if ("org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|NA".equals(uinfo) || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|zip".equals(uinfo) || "org.sonatype.nexus|nexus-webapp|1.0.0-SNAPSHOT|bundle|tar.gz".equals(uinfo)) { foundCount++;//from w w w . j a va 2 s.c om } } assertEquals(foundCount, 3); }
From source file:org.sonatype.nexus.index.NexusScanningListener.java
License:Open Source License
/** * Used in {@code update} mode, deletes documents from index that are not found during scanning (means * they were deleted from the storage being scanned). * * @param contextPath//ww w .ja va 2 s . c o m * @return * @throws IOException */ private int removeDeletedArtifacts(final String contextPath) throws IOException { int deleted = 0; final IndexReader r = contextIndexSearcher.getIndexReader(); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { final Document d = r.document(i); final String uinfo = d.get(ArtifactInfo.UINFO); if (uinfo != null && !processedUinfos.contains(uinfo)) { // file is not present in storage but is on index, delete it from index final String[] ra = ArtifactInfo.FS_PATTERN.split(uinfo); final ArtifactInfo ai = new ArtifactInfo(); ai.repository = context.getRepositoryId(); ai.groupId = ra[0]; ai.artifactId = ra[1]; ai.version = ra[2]; if (ra.length > 3) { ai.classifier = ArtifactInfo.renvl(ra[3]); } if (ra.length > 4) { ai.packaging = ArtifactInfo.renvl(ra[4]); } // minimal ArtifactContext for removal final ArtifactContext ac = new ArtifactContext(null, null, null, ai, ai.calculateGav()); if (contextPath == null || context.getGavCalculator().gavToPath(ac.getGav()).startsWith(contextPath)) { if (IndexOp.DELETED == remove(ac)) { deleted++; } } } } } return deleted; }