List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:org.musicbrainz.search.index.ReleaseGroupIndexTest.java
License:Open Source License
/** * Checks record with multiple releases/*from ww w .j a v a 2 s . co m*/ * * @throws Exception */ @Test public void testIndexReleaseGroupWithMultipleReleases() throws Exception { addReleaseGroupThree(); RAMDirectory ramDir = new RAMDirectory(); createIndex(ramDir); IndexReader ir = DirectoryReader.open(ramDir); assertEquals(2, ir.numDocs()); { Document doc = ir.document(1); assertEquals(2, doc.getFields(ReleaseGroupIndexField.RELEASE.getName()).length); String val1 = doc.getFields(ReleaseGroupIndexField.RELEASE.getName())[0].stringValue(); String val2 = doc.getFields(ReleaseGroupIndexField.RELEASE.getName())[1].stringValue(); assertTrue("Crocodiles (Bonus disc)".equals(val1) || "Crocodiles (Bonus disc)".equals(val2)); assertTrue("Crocodiles (Special disc)".equals(val1) || "Crocodiles (Special disc)".equals(val2)); } ir.close(); }
From source file:org.musicbrainz.search.index.ReleaseGroupIndexTest.java
License:Open Source License
@Test public void testIndexReleaseGroupMultipleArtists() throws Exception { addReleaseGroupFour();//from www . j a v a 2 s . c o m RAMDirectory ramDir = new RAMDirectory(); createIndex(ramDir); IndexReader ir = DirectoryReader.open(ramDir); assertEquals(2, ir.numDocs()); { Document doc = ir.document(1); Fields fields = MultiFields.getFields(ir); Terms terms = fields.terms(ReleaseGroupIndexField.ARTIST_NAME.getName()); TermsEnum tr = terms.iterator(null); tr.next(); assertEquals(1, tr.docFreq()); assertEquals("cincinnati", tr.term().utf8ToString()); tr.next(); assertEquals("erich", tr.term().utf8ToString()); tr.next(); assertEquals("kunzel", tr.term().utf8ToString()); tr.next(); assertEquals("kunzstel", tr.term().utf8ToString()); tr.next(); assertEquals("kunzstelein", tr.term().utf8ToString()); tr.next(); assertEquals("orchestra", tr.term().utf8ToString()); tr.next(); assertEquals("pops", tr.term().utf8ToString()); tr.next(); assertEquals("the", tr.term().utf8ToString()); terms = fields.terms(ReleaseGroupIndexField.ARTIST_ID.getName()); tr = terms.iterator(null); tr.next(); assertEquals(1, tr.docFreq()); assertEquals("99845d0c-f239-4051-a6b1-4b5e9f7ede0b", tr.term().utf8ToString()); tr.next(); assertEquals("d8fbd94c-cd06-4e8b-a559-761ad969d07e", tr.term().utf8ToString()); tr.next(); terms = fields.terms(ReleaseGroupIndexField.ARTIST_NAMECREDIT.getName()); tr = terms.iterator(null); tr.next(); assertEquals(1, tr.docFreq()); assertEquals("cincinnati", tr.term().utf8ToString()); tr.next(); assertEquals("erich", tr.term().utf8ToString()); tr.next(); assertEquals("kunzel", tr.term().utf8ToString()); tr.next(); assertEquals("pops", tr.term().utf8ToString()); assertEquals("Epics", doc.getFields(ReleaseGroupIndexField.RELEASEGROUP.getName())[0].stringValue()); assertEquals("efd2ace2-b3b9-305f-8a53-9803595c0e37", doc.getFields(ReleaseGroupIndexField.RELEASEGROUP_ID.getName())[0].stringValue()); ArtistCredit ac = ArtistCreditHelper .unserialize(doc.get(ReleaseGroupIndexField.ARTIST_CREDIT.getName())); assertNotNull(ac); assertEquals("Erich Kunzel", ac.getNameCredit().get(0).getArtist().getName()); assertEquals("Cincinnati Pops", ac.getNameCredit().get(1).getName()); assertEquals("The Cincinnati Pops Orchestra", ac.getNameCredit().get(1).getArtist().getName()); assertEquals(2, ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().size()); assertNull(ac.getNameCredit().get(1).getArtist().getAliasList()); assertEquals("Erich Kunzstel", ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().get(0).getContent()); assertEquals("Erich Kunzstelein", ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().get(1).getContent()); assertEquals("Kunzstel, Erich", ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().get(0).getSortName()); assertEquals("Kunzstelein, Erich", ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().get(1).getSortName()); assertEquals("en", ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().get(0).getLocale()); assertEquals("de", ac.getNameCredit().get(0).getArtist().getAliasList().getAlias().get(1).getLocale()); } ir.close(); }
From source file:org.musicbrainz.search.index.ReleaseGroupIndexTest.java
License:Open Source License
@Test public void testIndexReleaseGroupWithTag() throws Exception { addReleaseGroupFour();/*from w w w . j av a 2 s .co m*/ RAMDirectory ramDir = new RAMDirectory(); createIndex(ramDir); IndexReader ir = DirectoryReader.open(ramDir); assertEquals(2, ir.numDocs()); { Document doc = ir.document(1); assertEquals(1, doc.getFields(ReleaseGroupIndexField.RELEASEGROUP.getName()).length); assertEquals(1, doc.getFields(ReleaseGroupIndexField.TAG.getName()).length); assertEquals("punk", doc.getField(ReleaseGroupIndexField.TAG.getName()).stringValue()); } ir.close(); }
From source file:org.musicbrainz.search.index.ReleaseGroupIndexTest.java
License:Open Source License
@Test public void testIndexReleaseGroupWithStatus() throws Exception { addReleaseGroupFour();/* w w w . java 2 s. co m*/ RAMDirectory ramDir = new RAMDirectory(); createIndex(ramDir); IndexReader ir = DirectoryReader.open(ramDir); assertEquals(2, ir.numDocs()); { Document doc = ir.document(1); assertEquals(1, doc.getFields(ReleaseGroupIndexField.RELEASESTATUS.getName()).length); assertEquals("Official", doc.getField(ReleaseGroupIndexField.RELEASESTATUS.getName()).stringValue()); } ir.close(); }
From source file:org.musicbrainz.search.index.ReleaseGroupIndexTest.java
License:Open Source License
@Test public void testIndexReleaseGroupWithSecondaryTypes() throws Exception { addReleaseGroupFour();/* w w w . j a v a 2s. c o m*/ RAMDirectory ramDir = new RAMDirectory(); createIndex(ramDir); IndexReader ir = DirectoryReader.open(ramDir); assertEquals(2, ir.numDocs()); { Document doc = ir.document(1); assertEquals(1, doc.getFields(ReleaseGroupIndexField.PRIMARY_TYPE.getName()).length); assertEquals("Album", doc.getFields(ReleaseGroupIndexField.PRIMARY_TYPE.getName())[0].stringValue()); //NOte old type field maps secondary type to compilation assertEquals(1, doc.getFields(ReleaseGroupIndexField.TYPE.getName()).length); assertEquals("Compilation", doc.getFields(ReleaseGroupIndexField.TYPE.getName())[0].stringValue()); assertEquals(1, doc.getFields(ReleaseGroupIndexField.RELEASEGROUP.getName()).length); assertEquals(2, doc.getFields(ReleaseGroupIndexField.SECONDARY_TYPE.getName()).length); assertEquals("Compilation", doc.getFields(ReleaseGroupIndexField.SECONDARY_TYPE.getName())[0].stringValue()); assertEquals("Interview", doc.getFields(ReleaseGroupIndexField.SECONDARY_TYPE.getName())[1].stringValue()); } ir.close(); }
From source file:org.neo4j.index.lucene.repair.IndexHandler.java
License:Open Source License
public void deleteFieldFromNodeDocument(long nodeId, String fieldName) throws Exception { IndexReader reader = IndexReader.open(dir, false); Document newDoc = null;/* ww w. ja v a2 s .c o m*/ IndexSearcher searcher = new IndexSearcher(reader); TopDocs searchResult = searcher.search(new TermQuery(new Term("_id_", Long.toString(nodeId))), 2); if (searchResult.totalHits > 1) { throw new IllegalStateException( "There should be only one hit for node id " + nodeId + ", i got at least 2"); } int docId = searchResult.scoreDocs[0].doc; Document original = reader.document(docId); newDoc = new Document(); for (Fieldable f : original.getFields()) { if (!f.name().equals(fieldName)) { newDoc.add(f); } } reader.deleteDocument(docId); reader.commit(null); reader.close(); IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35))); writer.addDocument(newDoc); writer.commit(); writer.close(); }
From source file:org.netbeans.modules.jackpot30.ide.usages.hints.RemotelyUnused.java
License:Open Source License
@TriggerTreeKind({ Kind.VARIABLE, Kind.METHOD }) public static ErrorDescription hint(HintContext ctx) throws URISyntaxException, IOException { Element toSearch = ctx.getInfo().getTrees().getElement(ctx.getPath()); if (toSearch == null) return null; if (!toSearch.getKind().isField() && toSearch.getKind() != ElementKind.METHOD && toSearch.getKind() != ElementKind.CONSTRUCTOR) return null; if (toSearch.getKind() == ElementKind.METHOD && ctx.getInfo().getElementUtilities().overridesMethod((ExecutableElement) toSearch)) return null; final String serialized = JavaUtils.serialize(ElementHandle.create(toSearch)); for (RemoteIndex idx : RemoteIndex.loadIndices()) { String result = LocalCache.runOverLocalCache(idx, new Task<IndexReader, String>() { @Override//from ww w . jav a 2 s . c o m public String run(IndexReader reader, AtomicBoolean cancel) throws IOException { Query query = new TermQuery(new Term("usagesSignature", serialized)); Searcher s = new IndexSearcher(reader); BitSet matchingDocuments = new BitSet(reader.maxDoc()); Collector c = new BitSetCollector(matchingDocuments); s.search(query, c); for (int docNum = matchingDocuments.nextSetBit(0); docNum >= 0; docNum = matchingDocuments .nextSetBit(docNum + 1)) { if (cancel.get()) return VAL_UNKNOWN; final Document doc = reader.document(docNum); return doc.get("usagesUsages"); } return VAL_UNKNOWN; } }, null, new AtomicBoolean()/*XXX*/); if (result == null) { URI resolved = new URI(idx.remote.toExternalForm() + "/usages/search?path=" + WebUtilities.escapeForQuery(idx.remoteSegment) + "&signatures=" + WebUtilities.escapeForQuery(serialized)); String response = WebUtilities.requestStringResponse(resolved, new AtomicBoolean()); if (response != null) { result = response.trim().isEmpty() ? VAL_UNUSED : VAL_USED; } else { result = VAL_UNKNOWN; } final String resultFin = result; LocalCache.saveToLocalCache(idx, new Task<IndexWriter, Void>() { @Override public Void run(IndexWriter p, AtomicBoolean cancel) throws IOException { Document doc = new Document(); doc.add(new Field("usagesSignature", serialized, Store.NO, Index.NOT_ANALYZED)); doc.add(new Field("usagesUsages", resultFin, Store.YES, Index.NO)); p.addDocument(doc); return null; } }); } if (!VAL_UNUSED.equals(result)) return null; } return ErrorDescriptionFactory.forName(ctx, ctx.getPath(), Bundle.ERR_NoUsages()); }
From source file:org.netbeans.modules.jackpot30.impl.duplicates.indexing.RemoteDuplicatesIndex.java
License:Open Source License
private static Map<String, Collection<? extends String>> containsHash(IndexReader reader, Iterable<? extends String> hashes, AtomicBoolean cancel) throws IOException { Map<String, Collection<? extends String>> result = new LinkedHashMap<String, Collection<? extends String>>(); for (String hash : hashes) { if (cancel.get()) return Collections.emptyMap(); Collection<String> found = new LinkedList<String>(); Query query = new TermQuery(new Term("hash", hash)); Searcher s = new IndexSearcher(reader); BitSet matchingDocuments = new BitSet(reader.maxDoc()); Collector c = new BitSetCollector(matchingDocuments); s.search(query, c);//from www . j ava 2 s . c o m boolean wasFound = false; for (int docNum = matchingDocuments.nextSetBit(0); docNum >= 0; docNum = matchingDocuments .nextSetBit(docNum + 1)) { if (cancel.get()) return Collections.emptyMap(); final Document doc = reader.document(docNum); found.addAll(Arrays.asList(doc.getValues("path"))); wasFound = true; } if (wasFound) { result.put(hash, found); } } return result; }
From source file:org.netbeans.modules.jackpot30.indexer.usages.IndexerImplTest.java
License:Open Source License
public void testSubdirIndexing() throws IOException { final FileObject root = FileUtil.toFileObject(getWorkDir()); FileObject aFile = FileUtil.createData(root, "a/A.java"); copyToFile(aFile, "public class A {}"); FileObject bFile = FileUtil.createData(root, "b/B.java"); copyToFile(bFile, "public class B {}"); Directory store = new RAMDirectory(); IndexWriter iw = new IndexWriter(store, new KeywordAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED); IndexAccessor.current = new IndexAccessor(iw, root.getFileObject("a")); doIndex(root, aFile, bFile);/* w w w .j a va 2 s . co m*/ iw.close(); IndexReader ir = IndexReader.open(store); int maxDocs = ir.maxDoc(); boolean foundA = false; for (int i = 0; i < maxDocs; i++) { Fieldable f = ir.document(i).getFieldable("file"); if (f != null) { assertFalse(f.stringValue(), f.stringValue().contains("B")); if (f.stringValue().contains("A.java")) { foundA = true; } } } assertTrue(foundA); }
From source file:org.ohdsi.usagi.tests.TestLucene.java
License:Apache License
public static void main(String[] args) throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); //Analyzer analyzer = new UsagiAnalyzer(); FieldType textVectorField = new FieldType(); textVectorField.setIndexed(true);//w ww . j a va2s .co m textVectorField.setTokenized(true); textVectorField.setStoreTermVectors(true); textVectorField.setStoreTermVectorPositions(false); textVectorField.setStoreTermVectorPayloads(false); textVectorField.setStoreTermVectorOffsets(false); textVectorField.setStored(true); textVectorField.freeze(); File indexFolder = new File(folder); if (indexFolder.exists()) DirectoryUtilities.deleteDir(indexFolder); Directory dir = FSDirectory.open(indexFolder); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new Field("F", "word1 word2 w3 word4", textVectorField)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("F", "word1 word2 w3", textVectorField)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder))); for (int i = 0; i < reader.numDocs(); i++) { TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { System.out.print(text.utf8ToString() + ","); } System.out.println(); } IndexSearcher searcher = new IndexSearcher(reader); // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); // mlt.setMinTermFreq(0); // mlt.setMinDocFreq(0); // mlt.setMaxDocFreq(9999); // mlt.setMinWordLen(0); // mlt.setMaxWordLen(9999); // mlt.setMaxDocFreqPct(100); // mlt.setMaxNumTokensParsed(9999); // mlt.setMaxQueryTerms(9999); // mlt.setStopWords(null); // mlt.setFieldNames(new String[] { "F" }); // mlt.setAnalyzer(new UsagiAnalyzer()); // Query query = mlt.like("F", new StringReader("Systolic blood pressure")); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer); Query query = parser.parse("word1"); Explanation explanation = searcher.explain(query, 0); print(explanation); System.out.println(); explanation = searcher.explain(query, 1); print(explanation); System.out.println(); TopDocs topDocs = searcher.search(query, 99); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F")); } }