List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:de.schlund.pfixcore.lucefix.PfixReadjustment.java
License:Open Source License
/** * Checks list of include parts for changes and updates search index. *///from w w w . j a v a 2 s . c o m public void readjust() { Collection<Tripel> partsKnownByPustefix = getUsedTripels(); IndexReader reader = null; PfixQueueManager queue; boolean jobDone; long startLoop, stopLoop, startCollect, stopCollect, startIndexLoop, stopIndexLoop, startAddLoop, stopAddLoop; long collectTime = 0; int knownDocsSize, newDocs, deleteDocs, numDocs; startLoop = stopLoop = startCollect = stopCollect = startIndexLoop = stopIndexLoop = startAddLoop = stopAddLoop = 0; newDocs = knownDocsSize = deleteDocs = numDocs = 0; startLoop = System.currentTimeMillis(); Set<Tripel> tripelsToIndex = new TreeSet<Tripel>(); queue = PfixQueueManager.getInstance(null); try { jobDone = false; startCollect = System.currentTimeMillis(); partsKnownByPustefix = getUsedTripels(); stopCollect = System.currentTimeMillis(); collectTime = stopCollect - startCollect; knownDocsSize = partsKnownByPustefix.size(); try { reader = IndexReader.open(LUCENE_DATA); } catch (IOException ioe) { LOG.warn("broken or nonexistant database -> will queue ALL known parts"); for (Iterator<Tripel> iter = partsKnownByPustefix.iterator(); iter.hasNext();) { Tripel element = iter.next(); element.setType(Tripel.Type.INSERT); newDocs++; if (!tripelsToIndex.add(element)) { LOG.debug("duplicated insert"); } } jobDone = true; } if (!jobDone) { numDocs = reader.numDocs(); startIndexLoop = System.currentTimeMillis(); docloop: for (int i = 0; i < numDocs; i++) { Document currentdoc; try { currentdoc = reader.document(i); } catch (RuntimeException e) { // this happens if we want to access a deleted // document -> continue continue docloop; } // check if needed String path = currentdoc.get(PreDoc.PATH); Tripel pfixTripel = new Tripel(path, null); if (partsKnownByPustefix.contains(pfixTripel)) { // checkTs File f = new File(GlobalConfig.getDocroot(), currentdoc.get(PreDoc.FILENAME)); if (f.lastModified() != DateField.stringToTime(currentdoc.get(PreDoc.LASTTOUCH))) { // ts differs pfixTripel.setType(Tripel.Type.INSERT); LOG.debug("TS differs: " + pfixTripel); newDocs++; if (!tripelsToIndex.add(pfixTripel)) { LOG.debug("duplicated insert " + pfixTripel); } } partsKnownByPustefix.remove(pfixTripel); } else { // part not needed anymore Tripel newTripel = new Tripel(currentdoc.get(PreDoc.PATH), Tripel.Type.DELETE); deleteDocs++; queue.queue(newTripel); } } stopIndexLoop = System.currentTimeMillis(); // now partsKnownByPustefix only contains parts which are NOT indexed... startAddLoop = System.currentTimeMillis(); for (Iterator<Tripel> iter = partsKnownByPustefix.iterator(); iter.hasNext();) { Tripel element = iter.next(); element.setType(Tripel.Type.INSERT); // LOG.debug("adding " + element + " to queue // (INDEX)"); newDocs++; if (!tripelsToIndex.add(element)) { LOG.debug("duplicated insert " + element); } // queue.queue(element); } stopAddLoop = System.currentTimeMillis(); } } catch (IOException ioe) { LOG.error("error reading index", ioe); } // its a treeset, it is already sorted :) // Collections.sort(tripelsToIndex); // Collections. for (Tripel tripel : tripelsToIndex) { queue.queue(tripel); } stopLoop = System.currentTimeMillis(); long needed = stopLoop - startLoop; if (newDocs != 0 || deleteDocs != 0) { LOG.debug(needed + "ms (getUsedTripels(): " + collectTime + "ms (" + knownDocsSize + "u) indexloop: " + (stopIndexLoop - startIndexLoop) + "|" + (stopAddLoop - startAddLoop) + "ms (" + numDocs + "u), added " + newDocs + "+" + deleteDocs + " queueitems"); } try { if (reader != null) { reader.close(); reader = null; } } catch (IOException e) { LOG.error("error while closing reader", e); } }
From source file:de.tudarmstadt.ukp.dkpro.core.decompounding.web1t.LuceneIndexerTest.java
License:Apache License
@Test public void testSearch() throws Exception { // Check if fields and all documents exists IndexReader ir0 = IndexReader.open(FSDirectory.open(targetIndex0)); IndexReader ir1 = IndexReader.open(FSDirectory.open(targetIndex1)); Assert.assertEquals("Number of documents", 3, ir0.numDocs() + ir1.numDocs()); Document doc = ir0.document(0); Assert.assertNotNull("Field: gram", doc.getField("gram")); Assert.assertNotNull("Field: freq", doc.getField("freq")); ir0.close(); ir1.close();/*from w ww . j a v a 2 s .com*/ // Search on the index Finder f = new Finder(index, jWeb1T); Assert.assertEquals(f.find("relax").size(), 3); Assert.assertEquals(f.find("couch").size(), 1); Assert.assertEquals(f.find("relax couch").size(), 1); Assert.assertEquals(f.find("couchdb").size(), 1); }
From source file:de.tudarmstadt.ukp.experiments.argumentation.clustering.debatefiltering.LuceneSearcher.java
License:Apache License
public List<String> retrieveTopNDocs(String textQuery, int topN) throws Exception { // Now search the index: Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); Directory directory = FSDirectory.open(luceneIndexDir); IndexReader reader = DirectoryReader.open(directory); IndexSearcher indexSearcher = new IndexSearcher(reader); // Parse a simple query QueryParser parser = new QueryParser(Version.LUCENE_44, LuceneIndexer.FIELD_TEXT_CONTENT, analyzer); Query query = parser.parse(textQuery); ScoreDoc[] hits = indexSearcher.search(query, null, topN).scoreDocs; List<String> result = new ArrayList<>(); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = indexSearcher.doc(hits[i].doc); result.add(hitDoc.getField(LuceneIndexer.FIELD_FILE).stringValue()); // System.out.println(hitDoc.toString()); // assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); }//from w ww . ja v a 2s . c om reader.close(); directory.close(); return result; }
From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.web1t.LuceneIndexerTest.java
License:Open Source License
@Test public void testSearch() throws Exception { // Check if fields and all documents exists IndexReader ir = IndexReader.open(FSDirectory.open(targetIndex)); Assert.assertEquals("Number of documents", 2, ir.numDocs()); Document doc = ir.document(0); Assert.assertNotNull("Field: gram", doc.getField("gram")); Assert.assertNotNull("Field: freq", doc.getField("freq")); ir.close(); // Search on the index IndexSearcher searcher = new IndexSearcher(FSDirectory.open(targetIndex)); QueryParser p = new QueryParser(Version.LUCENE_30, "token", new StandardAnalyzer(Version.LUCENE_30)); Query q = p.parse("gram:relax"); Assert.assertEquals("Hit count 'Relax'", 2, searcher.search(q, 100).totalHits); q = p.parse("gram:couch"); Assert.assertEquals("Hit count 'couch'", 1, searcher.search(q, 100).totalHits); q = p.parse("gram:relax AND gram:couch"); Assert.assertEquals("Hit count 'couch'", 1, searcher.search(q, 100).totalHits); q = p.parse("gram:couchdb"); Assert.assertEquals("Hit count 'couchdb'", 1, searcher.search(q, 100).totalHits); searcher.close();/*from w ww . j a v a2s . c o m*/ }
From source file:de.tudarmstadt.ukp.teaching.uima.nounDecompounding.web1t.LuceneIndexerTest.java
License:Open Source License
@Test public void testData() throws Exception { IndexReader ir = IndexReader.open(FSDirectory.open(targetIndex)); IndexSearcher searcher = new IndexSearcher(FSDirectory.open(targetIndex)); QueryParser p = new QueryParser(Version.LUCENE_30, "gram", new StandardAnalyzer(Version.LUCENE_30)); // Test if all data is set correct Query q = p.parse("gram:couch"); Document doc = ir.document(searcher.search(q, 100).scoreDocs[0].doc); Assert.assertEquals(new Integer(100), Integer.valueOf(doc.get("freq"))); Assert.assertEquals("relax on the couch", doc.get("gram")); ir.close(); searcher.close();/*w w w. java 2 s .co m*/ }
From source file:de.u808.simpleinquest.indexer.impl.IndexUpdater.java
License:Apache License
private void deleteDocuments(List<File> files) throws CorruptIndexException, IOException { if (!files.isEmpty() && IndexReader.indexExists(indexDirectory)) { this.setStatusMessage("Removing deleted files from the index"); IndexReader indexReader = IndexReader.open(indexDirectory); for (File file : files) { Term uidTerm = new Term(Indexer.PATH_FIELD_NAME, file.getPath()); indexReader.deleteDocuments(uidTerm); }//w w w. j a v a2s. c om indexReader.close(); this.setStatusMessage("All deleted files removed from index"); } else { log.info("Nothing to delete or index does not exist"); } }
From source file:demo.jaxrs.search.server.Catalog.java
License:Apache License
@GET @Produces(MediaType.APPLICATION_JSON)/*w w w. jav a2s.c om*/ public JsonArray getBooks() throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final JsonArrayBuilder builder = Json.createArrayBuilder(); try { final Query query = new MatchAllDocsQuery(); for (final ScoreDoc scoreDoc : searcher.search(query, 1000).scoreDocs) { final DocumentStoredFieldVisitor fieldVisitor = new DocumentStoredFieldVisitor( LuceneDocumentMetadata.SOURCE_FIELD); reader.document(scoreDoc.doc, fieldVisitor); builder.add(fieldVisitor.getDocument().getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue()); } return builder.build(); } finally { reader.close(); } }
From source file:demo.jaxrs.search.server.Catalog.java
License:Apache License
@GET @Produces(MediaType.APPLICATION_JSON)/*from w w w. jav a 2 s . c o m*/ @CrossOriginResourceSharing(allowAllOrigins = true) @Path("/search") public Response findBook(@Context SearchContext searchContext, @Context final UriInfo uri) throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); final JsonArrayBuilder builder = Json.createArrayBuilder(); try { visitor.reset(); visitor.visit(searchContext.getCondition(SearchBean.class)); final Query query = visitor.getQuery(); if (query != null) { final TopDocs topDocs = searcher.search(query, 1000); for (final ScoreDoc scoreDoc : topDocs.scoreDocs) { final Document document = reader.document(scoreDoc.doc); final String source = document.getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue(); builder.add(Json.createObjectBuilder().add("source", source).add("score", scoreDoc.score).add( "url", uri.getBaseUriBuilder().path(Catalog.class).path(source).build().toString())); } } return Response.ok(builder.build()).build(); } finally { reader.close(); } }
From source file:demo.jaxrs.search.server.Catalog.java
License:Apache License
private boolean exists(final String source) throws IOException { final IndexReader reader = DirectoryReader.open(directory); final IndexSearcher searcher = new IndexSearcher(reader); try {/*from w w w. j a va 2s .c o m*/ return searcher.search(new TermQuery(new Term(LuceneDocumentMetadata.SOURCE_FIELD, source)), 1).totalHits > 0; } finally { reader.close(); } }
From source file:demo.jaxrs.search.server.Indexer.java
License:Apache License
public boolean exists(final String source) throws IOException { final IndexReader reader = getIndexReader(); final IndexSearcher searcher = new IndexSearcher(reader); try {/*from w w w .j a va2 s . co m*/ return searcher.search(new TermQuery(new Term(LuceneDocumentMetadata.SOURCE_FIELD, source)), 1).totalHits > 0; } finally { reader.close(); } }