List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:org.apache.cocoon.transformation.LuceneIndexTransformer.java
License:Apache License
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { if (processing == STATE_GROUND) { if (LUCENE_URI.equals(namespaceURI) && LUCENE_QUERY_ELEMENT.equals(localName)) { String sCreate = atts.getValue(LUCENE_QUERY_CREATE_ATTRIBUTE); createIndex = BooleanUtils.toBoolean(sCreate); String analyzerClassname = atts.getValue(LUCENE_QUERY_ANALYZER_ATTRIBUTE); String indexDirectory = atts.getValue(LUCENE_QUERY_DIRECTORY_ATTRIBUTE); String mergeFactor = atts.getValue(LUCENE_QUERY_MERGE_FACTOR_ATTRIBUTE); queryConfiguration = new IndexerConfiguration( analyzerClassname != null ? analyzerClassname : setupConfiguration.analyzerClassname, indexDirectory != null ? indexDirectory : setupConfiguration.indexDirectory, mergeFactor != null ? Integer.parseInt(mergeFactor) : setupConfiguration.mergeFactor); if (!createIndex) { // Not asked to create the index - but check if this is necessary anyway: try { IndexReader reader = openReader(); reader.close(); } catch (IOException ioe) { // couldn't open the index - so recreate it createIndex = true;// w ww. jav a2 s . c om } } // propagate the lucene:index to the next stage in the pipeline super.startElement(namespaceURI, localName, qName, atts); processing = STATE_QUERY; } else { super.startElement(namespaceURI, localName, qName, atts); } } else if (processing == STATE_QUERY) { // processing a lucene:index - expecting a lucene:document if (LUCENE_URI.equals(namespaceURI) && LUCENE_DOCUMENT_ELEMENT.equals(localName)) { this.bodyDocumentURL = atts.getValue(LUCENE_DOCUMENT_URL_ATTRIBUTE); if (this.bodyDocumentURL == null) { throw new SAXException("<lucene:document> must have @url attribute"); } // Remember the time the document indexing began this.documentStartTime = System.currentTimeMillis(); // remember these attributes so they can be passed on to the next stage in the pipeline, // when this document element is ended. this.documentAttributes = new AttributesImpl(atts); this.bodyText = new StringBuffer(); this.bodyDocument = new Document(); this.elementStack.clear(); processing = STATE_DOCUMENT; } else { throw new SAXException("<lucene:index> element can contain only <lucene:document> elements!"); } } else if (processing == STATE_DOCUMENT) { elementStack.push(new IndexHelperField(localName, new AttributesImpl(atts))); } }
From source file:org.apache.cocoon.transformation.LuceneIndexTransformer.java
License:Apache License
private void reindexDocument() throws IOException { if (this.createIndex) { // The index is being created, so there's no need to delete the doc from an existing index. // This means we can keep a single IndexWriter open throughout the process. if (this.writer == null) openWriter();//from w w w . jav a 2 s . c om this.writer.addDocument(this.bodyDocument); } else { // This is an incremental reindex, so the document should be removed from the index before adding it try { IndexReader reader = openReader(); reader.delete(new Term(LuceneXMLIndexer.UID_FIELD, uid(this.bodyDocumentURL))); reader.close(); } catch (IOException e) { /* ignore */ } openWriter(); this.writer.addDocument(this.bodyDocument); this.writer.close(); this.writer = null; } this.bodyDocument = null; }
From source file:org.apache.cxf.jaxrs.ext.search.tika.TikaLuceneContentExtractorTest.java
License:Apache License
private ScoreDoc[] getHits(final String expression, final Map<String, Class<?>> fieldTypes) throws IOException { IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); try {/*w ww .j a v a2s . com*/ LuceneQueryVisitor<SearchBean> visitor = new LuceneQueryVisitor<SearchBean>("ct", "contents"); visitor.setPrimitiveFieldTypeMap(fieldTypes); visitor.visit(parser.parse(expression)); ScoreDoc[] hits = searcher.search(visitor.getQuery(), null, 1000).scoreDocs; assertNotNull(hits); return hits; } finally { reader.close(); } }
From source file:org.apache.cxf.systest.jaxrs.extraction.BookCatalog.java
License:Apache License
@GET @Produces(MediaType.APPLICATION_JSON)/* w w w.j a v a2s .c o m*/ public Collection<ScoreDoc> findBook(@Context SearchContext searchContext) throws IOException { IndexReader reader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); try { visitor.visit(searchContext.getCondition(SearchBean.class)); return Arrays.asList(searcher.search(visitor.getQuery(), null, 1000).scoreDocs); } finally { reader.close(); } }
From source file:org.apache.fuzzydb.demo.SearchFiles.java
License:Open Source License
@SuppressWarnings("deprecation") public static Map<String, Float> generateConfidence(final String indexFolder, String query, final String field, final int noOfPages, final String[] ignoreWordList) throws Exception { Map<String, Float> docNameScore = new HashMap<String, Float>(); IndexReader reader = null; Analyzer analyzer = null;//from w ww .j a v a2 s . c o m Searcher searcher = null; try { reader = IndexReader.open(FSDirectory.open(new File(indexFolder)), true); // only searching, so read-only=true searcher = new IndexSearcher(reader); analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer); if (query != null) { query = query.trim(); } else { System.out.println("Wrong Query generated"); return null; } parser.setAllowLeadingWildcard(true); BooleanQuery searchQuery = new BooleanQuery(); Query matchQuery = parser.parse(query); searchQuery.add(matchQuery, BooleanClause.Occur.MUST); if (ignoreWordList != null) { for (String ignoreWord : ignoreWordList) { TermQuery notMatchClause = new TermQuery(new Term(field, ignoreWord.toLowerCase())); searchQuery.add(notMatchClause, BooleanClause.Occur.MUST_NOT); } } ScoreDoc[] scoreDocs = doPagingSearch(searcher, searchQuery, noOfPages); if (scoreDocs != null && scoreDocs.length > 0) { for (int i = 0; i < scoreDocs.length; i++) { Document document = searcher.doc(scoreDocs[i].doc); docNameScore.put(document.get("rowId"), calculateConfidenceScore(scoreDocs[i].score)); } } } finally { if (reader != null) { reader.close(); } if (analyzer != null) { analyzer.close(); } if (searcher != null) { searcher.close(); } } return docNameScore; }
From source file:org.apache.hadoop.contrib.index.mapred.TestDistributionPolicy.java
License:Apache License
private void verify(Shard[] shards) throws IOException { // verify the index IndexReader[] readers = new IndexReader[shards.length]; for (int i = 0; i < shards.length; i++) { Directory dir = new FileSystemDirectory(fs, new Path(shards[i].getDirectory()), false, conf); readers[i] = IndexReader.open(dir); }/*from www . j a v a 2s . c o m*/ IndexReader reader = new MultiReader(readers); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.search(new TermQuery(new Term("content", "apache"))); assertEquals(0, hits.length()); hits = searcher.search(new TermQuery(new Term("content", "hadoop"))); assertEquals(numDocsPerRun / 2, hits.length()); int[] counts = new int[numDocsPerRun]; for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); counts[Integer.parseInt(doc.get("id"))]++; } for (int i = 0; i < numDocsPerRun; i++) { if (i % 2 == 0) { assertEquals(0, counts[i]); } else { assertEquals(1, counts[i]); } } searcher.close(); reader.close(); }
From source file:org.apache.hadoop.contrib.index.mapred.TestIndexUpdater.java
License:Apache License
private void run(int numRuns, Shard[] shards) throws IOException { IIndexUpdater updater = new IndexUpdater(); updater.run(conf, new Path[] { inputPath }, outputPath, numMapTasks, shards); // verify the done files Path[] doneFileNames = new Path[shards.length]; int count = 0; FileStatus[] fileStatus = fs.listStatus(outputPath); for (int i = 0; i < fileStatus.length; i++) { FileStatus[] doneFiles = fs.listStatus(fileStatus[i].getPath()); for (int j = 0; j < doneFiles.length; j++) { doneFileNames[count++] = doneFiles[j].getPath(); }//from w ww . j ava2 s. com } assertEquals(shards.length, count); for (int i = 0; i < count; i++) { assertTrue(doneFileNames[i].getName().startsWith(IndexUpdateReducer.DONE.toString())); } // verify the index IndexReader[] readers = new IndexReader[shards.length]; for (int i = 0; i < shards.length; i++) { Directory dir = new FileSystemDirectory(fs, new Path(shards[i].getDirectory()), false, conf); readers[i] = IndexReader.open(dir); } IndexReader reader = new MultiReader(readers); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.search(new TermQuery(new Term("content", "apache"))); assertEquals(numRuns * numDocsPerRun, hits.length()); int[] counts = new int[numDocsPerRun]; for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); counts[Integer.parseInt(doc.get("id"))]++; } for (int i = 0; i < numDocsPerRun; i++) { assertEquals(numRuns, counts[i]); } // max field length is 2, so "dot" is also indexed but not "org" hits = searcher.search(new TermQuery(new Term("content", "dot"))); assertEquals(numRuns, hits.length()); hits = searcher.search(new TermQuery(new Term("content", "org"))); assertEquals(0, hits.length()); searcher.close(); reader.close(); // open and close an index writer with KeepOnlyLastCommitDeletionPolicy // to remove earlier checkpoints for (int i = 0; i < shards.length; i++) { Directory dir = new FileSystemDirectory(fs, new Path(shards[i].getDirectory()), false, conf); IndexWriter writer = new IndexWriter(dir, false, null, new KeepOnlyLastCommitDeletionPolicy()); writer.close(); } // verify the number of segments, must be done after an writer with // KeepOnlyLastCommitDeletionPolicy so that earlier checkpoints are removed for (int i = 0; i < shards.length; i++) { PathFilter cfsFilter = new PathFilter() { public boolean accept(Path path) { return path.getName().endsWith(".cfs"); } }; FileStatus[] cfsFiles = fs.listStatus(new Path(shards[i].getDirectory()), cfsFilter); assertEquals(1, cfsFiles.length); } }
From source file:org.apache.jackrabbit.core.query.lucene.MultiIndex.java
License:Apache License
/** * Creates a new MultiIndex./* w ww . j av a 2 s.c o m*/ * * @param indexDir the base file system * @param handler the search handler * @param excludedIDs Set<NodeId> that contains uuids that should not * be indexed nor further traversed. * @param mapping the namespace mapping to use * @throws IOException if an error occurs */ MultiIndex(File indexDir, SearchIndex handler, Set excludedIDs, NamespaceMappings mapping) throws IOException { this.indexDir = indexDir; this.handler = handler; this.cache = new DocNumberCache(handler.getCacheSize()); this.redoLog = new RedoLog(new File(indexDir, REDO_LOG)); this.excludedIDs = new HashSet(excludedIDs); this.nsMappings = mapping; if (indexNames.exists(indexDir)) { indexNames.read(indexDir); } if (deletable.exists(indexDir)) { deletable.read(indexDir); } // try to remove deletable files if there are any attemptDelete(); // initialize IndexMerger merger = new IndexMerger(this); merger.setMaxMergeDocs(handler.getMaxMergeDocs()); merger.setMergeFactor(handler.getMergeFactor()); merger.setMinMergeDocs(handler.getMinMergeDocs()); IndexingQueueStore store; try { LocalFileSystem fs = new LocalFileSystem(); fs.setRoot(indexDir); fs.init(); store = new IndexingQueueStore(fs, INDEXING_QUEUE_FILE); } catch (FileSystemException e) { IOException ex = new IOException(); ex.initCause(e); throw ex; } // initialize indexing queue this.indexingQueue = new IndexingQueue(store); // open persistent indexes for (int i = 0; i < indexNames.size(); i++) { File sub = new File(indexDir, indexNames.getName(i)); // only open if it still exists // it is possible that indexNames still contains a name for // an index that has been deleted, but indexNames has not been // written to disk. if (!sub.exists()) { log.debug("index does not exist anymore: " + sub.getAbsolutePath()); // move on to next index continue; } PersistentIndex index = new PersistentIndex(indexNames.getName(i), sub, handler.getTextAnalyzer(), handler.getSimilarity(), cache, indexingQueue); index.setMaxMergeDocs(handler.getMaxMergeDocs()); index.setMergeFactor(handler.getMergeFactor()); index.setMinMergeDocs(handler.getMinMergeDocs()); index.setMaxFieldLength(handler.getMaxFieldLength()); index.setUseCompoundFile(handler.getUseCompoundFile()); indexes.add(index); merger.indexAdded(index.getName(), index.getNumDocuments()); } // init volatile index resetVolatileIndex(); // set index format version IndexReader reader = getIndexReader(); try { version = IndexFormatVersion.getVersion(reader); } finally { reader.close(); } indexingQueue.initialize(this); redoLogApplied = redoLog.hasEntries(); // run recovery Recovery.run(this, redoLog); // now that we are ready, start index merger merger.start(); if (redoLogApplied) { // wait for the index merge to finish pending jobs try { merger.waitUntilIdle(); } catch (InterruptedException e) { // move on } flush(); } flushTask = new Timer.Task() { public void run() { // check if there are any indexing jobs finished checkIndexingQueue(); // check if volatile index should be flushed checkFlush(); } }; if (indexNames.size() > 0) { scheduleFlushTask(); } }
From source file:org.apache.jackrabbit.core.query.lucene.MultiIndex.java
License:Apache License
/** * Returns the number of documents in this index. * * @return the number of documents in this index. * @throws IOException if an error occurs while reading from the index. *//*from w ww .ja v a 2 s. co m*/ int numDocs() throws IOException { if (indexNames.size() == 0) { return volatileIndex.getNumDocuments(); } else { IndexReader reader = getIndexReader(); try { return reader.numDocs(); } finally { reader.close(); } } }
From source file:org.apache.jackrabbit.core.query.lucene.IndexMigration.java
License:Apache License
/** * Checks if the given <code>index</code> needs to be migrated. * * @param index the index to check and migration if needed. * @param indexDir the directory where the index is stored. * @throws IOException if an error occurs while migrating the index. *///from w w w . ja v a 2 s. c om public static void migrate(PersistentIndex index, File indexDir) throws IOException { log.debug("Checking {} ...", indexDir.getAbsolutePath()); ReadOnlyIndexReader reader = index.getReadOnlyIndexReader(); try { if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) { // index was created with Jackrabbit 1.5 or higher // no need for migration log.debug("IndexFormatVersion >= V3, no migration needed"); return; } // assert: there is at least one node in the index, otherwise the // index format version would be at least V3 TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, "")); try { Term t = terms.term(); if (t.text().indexOf('\uFFFF') == -1) { log.debug("Index already migrated"); return; } } finally { terms.close(); } } finally { reader.release(); } // if we get here then the index must be migrated log.debug("Index requires migration {}", indexDir.getAbsolutePath()); // make sure readers are closed, otherwise the directory // cannot be deleted index.releaseWriterAndReaders(); File migrationDir = new File(indexDir.getAbsoluteFile().getParentFile(), indexDir.getName() + "_v2.3"); if (migrationDir.exists()) { FileUtil.delete(migrationDir); } if (!migrationDir.mkdirs()) { throw new IOException("failed to create directory " + migrationDir.getAbsolutePath()); } FSDirectory fsDir = FSDirectory.getDirectory(migrationDir, NoLockFactory.getNoLockFactory()); try { IndexWriter writer = new IndexWriter(fsDir, new JackrabbitAnalyzer()); try { IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory())); try { writer.addIndexes(new IndexReader[] { r }); writer.close(); } finally { r.close(); } } finally { writer.close(); } } finally { fsDir.close(); } FileUtil.delete(indexDir); if (!migrationDir.renameTo(indexDir)) { throw new IOException("failed to move migrated directory " + migrationDir.getAbsolutePath()); } log.info("Migrated " + indexDir.getAbsolutePath()); }