List of usage examples for org.apache.lucene.index MultiReader MultiReader
public MultiReader(IndexReader... subReaders) throws IOException
Construct a MultiReader aggregating the named set of (sub)readers.
From source file:net.mojodna.searchable.AbstractMultiSearcher.java
License:Apache License
/** * Search the index with the specified query. Overrides AbstractSearcher's * default behavior./*from ww w. j a v a2 s .c o m*/ * * @param query Query to use. * @param filter Filter to use. * @param offset Offset to begin result set at. * @param count Number of results to return. * @param sort Sort to use. * @return ResultSet containing results. * @throws SearchException */ @Override public ResultSet doSearch(final String query, final Filter filter, final Integer offset, final Integer count, final Sort sort) throws SearchException { MultiReader mr = null; MultiSearcher ms = null; try { // load readers and searchers final Map<Class, IndexReader> readerMap = new HashMap<Class, IndexReader>(); final IndexReader[] readers = new IndexReader[indexPaths.length]; final Searchable[] searchers = new Searchable[indexPaths.length]; int i = 0; for (final String path : indexPaths) { readers[i] = IndexReader.open(path); searchers[i] = new IndexSearcher(readers[i]); readerMap.put(classes[i], readers[i]); i++; } mr = new MultiReader(readers); ms = new MultiSearcher(searchers); final String[] defaultFields; if (null != classes) { final Collection<Object> fields = new HashSet<Object>(); for (final Class clazz : classes) { if (AnnotationUtils.isAnnotationPresent(clazz, DefaultFields.class)) { // load fields specified in @DefaultFields annotation fields.addAll(Arrays.asList(SearchableBeanUtils.getDefaultFieldNames(clazz))); } else { // load fields present in the index corresponding to this class fields.addAll(Arrays.asList(getFieldsPresent(readerMap.get(clazz)))); } } defaultFields = SearchableUtils.toStringArray(fields); } else { // load all fields available from all indexes defaultFields = getFieldsPresent(mr); } // prepare the query using available default fields final Query q = prepareQuery(query, defaultFields); // use the overloaded doSearch method with the MultiSearcher // constructed previously return doSearch(q, filter, ms, offset, count, sort); } catch (final IOException e) { throw new SearchException(e); } finally { try { // attempt to close readers and searchers if (null != mr) mr.close(); if (null != ms) ms.close(); } catch (final IOException e) { throw new SearchException(e); } } }
From source file:net.sf.lucis.core.impl.ManagedMultiSearcherProvider.java
License:Apache License
public LucisSearcher get() { try {// w w w. ja va 2 s .com List<IndexReader> readers = Lists.newArrayListWithCapacity(providers.size()); for (DirectoryProvider p : providers) { readers.add(p.getManagedReader()); } if (readers.isEmpty()) { final IndexReader reader = IndexReader.open(EmptyDirectory.get()); readers.add(reader); } return new DefaultLucisSearcher(new MultiReader(readers.toArray(new IndexReader[readers.size()]))); } catch (Exception e) { throw new IndexNotAvailableException(e); } }
From source file:net.sf.lucis.core.impl.MultiSearcherProvider.java
License:Apache License
public LucisSearcher get() { try {/*from ww w . jav a 2s . co m*/ List<IndexReader> readers = Lists.newArrayListWithCapacity(providers.size()); for (DirectoryProvider p : providers) { Directory d = p.getDirectory(); if (d != null && IndexReader.indexExists(d)) { readers.add(IndexReader.open(d)); } } if (readers.isEmpty()) { final IndexReader reader = IndexReader.open(EmptyDirectory.get()); readers.add(reader); } return new DefaultLucisSearcher(new MultiReader(readers.toArray(new IndexReader[readers.size()]))); } catch (Exception e) { throw new IndexNotAvailableException(e); } }
From source file:net.sourceforge.docfetcher.model.RootScope.java
License:Open Source License
/** * Returns all documents under the given <tt>Scope</tt>s. *///from w w w . j a va 2 s. c o m public static ResultDocument[] listDocuments(Scope... scopes) { // Get the root elements of the given scopes Set<RootScope> rootScopeSet = new HashSet<RootScope>(); for (Scope scope : scopes) rootScopeSet.add(scope.getRootScope()); RootScope[] rootScopes = rootScopeSet.toArray(new RootScope[rootScopeSet.size()]); try { // Get all documents under the root elements IndexReader[] readers = new IndexReader[rootScopes.length]; for (int i = 0; i < rootScopes.length; i++) { Directory dir = new SimpleFSDirectory(rootScopes[i].getIndexDir()); readers[i] = IndexReader.open(dir); } MultiReader multiReader = new MultiReader(readers); ResultDocument[] rootScopeDocs = new ResultDocument[multiReader.numDocs()]; for (int i = 0; i < multiReader.numDocs(); i++) rootScopeDocs[i] = new ResultDocument(multiReader.document(i), 0, null); multiReader.close(); /* * From the documents of the previous step, filter out those that * aren't inside the given scopes, and return the remaining * documents. */ Set<ResultDocument> scopeDocs = new HashSet<ResultDocument>(); for (ResultDocument rootScopeDoc : rootScopeDocs) for (Scope scope : scopes) if (scope.contains(rootScopeDoc.file)) { scopeDocs.add(rootScopeDoc); break; } return scopeDocs.toArray(new ResultDocument[scopeDocs.size()]); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return new ResultDocument[0]; }
From source file:org.apache.hadoop.contrib.index.mapred.TestDistributionPolicy.java
License:Apache License
private void verify(Shard[] shards) throws IOException { // verify the index IndexReader[] readers = new IndexReader[shards.length]; for (int i = 0; i < shards.length; i++) { Directory dir = new FileSystemDirectory(fs, new Path(shards[i].getDirectory()), false, conf); readers[i] = IndexReader.open(dir); }//from w w w. j av a 2s. co m IndexReader reader = new MultiReader(readers); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.search(new TermQuery(new Term("content", "apache"))); assertEquals(0, hits.length()); hits = searcher.search(new TermQuery(new Term("content", "hadoop"))); assertEquals(numDocsPerRun / 2, hits.length()); int[] counts = new int[numDocsPerRun]; for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); counts[Integer.parseInt(doc.get("id"))]++; } for (int i = 0; i < numDocsPerRun; i++) { if (i % 2 == 0) { assertEquals(0, counts[i]); } else { assertEquals(1, counts[i]); } } searcher.close(); reader.close(); }
From source file:org.apache.hadoop.contrib.index.mapred.TestIndexUpdater.java
License:Apache License
private void run(int numRuns, Shard[] shards) throws IOException { IIndexUpdater updater = new IndexUpdater(); updater.run(conf, new Path[] { inputPath }, outputPath, numMapTasks, shards); // verify the done files Path[] doneFileNames = new Path[shards.length]; int count = 0; FileStatus[] fileStatus = fs.listStatus(outputPath); for (int i = 0; i < fileStatus.length; i++) { FileStatus[] doneFiles = fs.listStatus(fileStatus[i].getPath()); for (int j = 0; j < doneFiles.length; j++) { doneFileNames[count++] = doneFiles[j].getPath(); }/*from w w w . j a va2 s . co m*/ } assertEquals(shards.length, count); for (int i = 0; i < count; i++) { assertTrue(doneFileNames[i].getName().startsWith(IndexUpdateReducer.DONE.toString())); } // verify the index IndexReader[] readers = new IndexReader[shards.length]; for (int i = 0; i < shards.length; i++) { Directory dir = new FileSystemDirectory(fs, new Path(shards[i].getDirectory()), false, conf); readers[i] = IndexReader.open(dir); } IndexReader reader = new MultiReader(readers); IndexSearcher searcher = new IndexSearcher(reader); Hits hits = searcher.search(new TermQuery(new Term("content", "apache"))); assertEquals(numRuns * numDocsPerRun, hits.length()); int[] counts = new int[numDocsPerRun]; for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); counts[Integer.parseInt(doc.get("id"))]++; } for (int i = 0; i < numDocsPerRun; i++) { assertEquals(numRuns, counts[i]); } // max field length is 2, so "dot" is also indexed but not "org" hits = searcher.search(new TermQuery(new Term("content", "dot"))); assertEquals(numRuns, hits.length()); hits = searcher.search(new TermQuery(new Term("content", "org"))); assertEquals(0, hits.length()); searcher.close(); reader.close(); // open and close an index writer with KeepOnlyLastCommitDeletionPolicy // to remove earlier checkpoints for (int i = 0; i < shards.length; i++) { Directory dir = new FileSystemDirectory(fs, new Path(shards[i].getDirectory()), false, conf); IndexWriter writer = new IndexWriter(dir, false, null, new KeepOnlyLastCommitDeletionPolicy()); writer.close(); } // verify the number of segments, must be done after an writer with // KeepOnlyLastCommitDeletionPolicy so that earlier checkpoints are removed for (int i = 0; i < shards.length; i++) { PathFilter cfsFilter = new PathFilter() { public boolean accept(Path path) { return path.getName().endsWith(".cfs"); } }; FileStatus[] cfsFiles = fs.listStatus(new Path(shards[i].getDirectory()), cfsFilter); assertEquals(1, cfsFiles.length); } }
From source file:org.apache.luke.client.LukeInspector.java
License:Apache License
/** * open Lucene index and re-init all the sub-widgets * @param name//from w ww .j a v a2 s. c o m * @param force * @param dirImpl * @param ro * @param ramdir * @param keepCommits * @param point * @param tiiDivisor */ public void openIndex(String name, boolean force, String dirImpl, boolean ro, boolean ramdir, boolean keepCommits, IndexCommit point, int tiiDivisor) { pName = name; File baseFileDir = new File(name); ArrayList<Directory> dirs = new ArrayList<Directory>(); Throwable lastException = null; try { Directory d = openDirectory(dirImpl, pName, false); if (IndexWriter.isLocked(d)) { if (!ro) { if (force) { IndexWriter.unlock(d); } else { //errorMsg("Index is locked. Try 'Force unlock' when opening."); d.close(); d = null; return; } } } boolean existsSingle = false; // IR.indexExists doesn't report the cause of error try { new SegmentInfos().read(d); existsSingle = true; } catch (Throwable e) { e.printStackTrace(); lastException = e; // } if (!existsSingle) { // try multi File[] files = baseFileDir.listFiles(); for (File f : files) { if (f.isFile()) { continue; } Directory d1 = openDirectory(dirImpl, f.toString(), false); if (IndexWriter.isLocked(d1)) { if (!ro) { if (force) { IndexWriter.unlock(d1); } else { //errorMsg("Index is locked. Try 'Force unlock' when opening."); d1.close(); d1 = null; return; } } } existsSingle = false; try { new SegmentInfos().read(d1); existsSingle = true; } catch (Throwable e) { lastException = e; e.printStackTrace(); } if (!existsSingle) { d1.close(); continue; } dirs.add(d1); } } else { dirs.add(d); } if (dirs.size() == 0) { if (lastException != null) { //errorMsg("Invalid directory at the location, check console for more information. Last exception:\n" + lastException.toString()); } else { //errorMsg("No valid directory at the location, try another location.\nCheck console for other possible causes."); } return; } if (ramdir) { //showStatus("Loading index into RAMDirectory ..."); Directory dir1 = new RAMDirectory(); IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); IndexWriter iw1 = new IndexWriter(dir1, cfg); iw1.addIndexes((Directory[]) dirs.toArray(new Directory[dirs.size()])); iw1.close(); //showStatus("RAMDirectory loading done!"); if (dir != null) dir.close(); dirs.clear(); dirs.add(dir1); } IndexDeletionPolicy policy; if (keepCommits) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } ArrayList<DirectoryReader> readers = new ArrayList<DirectoryReader>(); for (Directory dd : dirs) { DirectoryReader reader; if (tiiDivisor > 1) { reader = DirectoryReader.open(dd, tiiDivisor); } else { reader = DirectoryReader.open(dd); } readers.add(reader); } if (readers.size() == 1) { ir = readers.get(0); dir = ((DirectoryReader) ir).directory(); } else { ir = new MultiReader((IndexReader[]) readers.toArray(new IndexReader[readers.size()])); } is = new IndexSearcher(ir); // XXX //slowAccess = false; //initOverview(); //initPlugins(); //showStatus("Index successfully open."); } catch (Exception e) { e.printStackTrace(); //errorMsg(e.getMessage()); return; } }
From source file:org.apache.maven.index.context.NexusIndexMultiReader.java
License:Apache License
public synchronized IndexReader acquire() throws IOException { if (searchers != null) { release();//from w w w . j av a2s. c o m throw new IllegalStateException("acquire() called 2nd time without release() in between!"); } this.searchers = new ArrayList<IndexSearcher>(); final ArrayList<IndexReader> contextReaders = new ArrayList<IndexReader>(contexts.size()); for (IndexingContext ctx : contexts) { final IndexSearcher indexSearcher = ctx.acquireIndexSearcher(); searchers.add(indexSearcher); contextReaders.add(indexSearcher.getIndexReader()); } return new MultiReader(contextReaders.toArray(new IndexReader[contextReaders.size()])); }
From source file:org.apache.nutch.searcher.IndexSearcher.java
License:Apache License
/** Construct given a number of indexes. */ public IndexSearcher(Path[] indexDirs, Configuration conf, File blacklistFile) throws IOException { IndexReader[] readers = new IndexReader[indexDirs.length]; this.conf = conf; this.fs = FileSystem.get(conf); for (int i = 0; i < indexDirs.length; i++) { readers[i] = IndexReader.open(getDirectory(indexDirs[i])); }//w w w. j ava 2s. co m init(new MultiReader(readers), conf, blacklistFile); }
From source file:org.apache.nutch.searcher.LuceneSearchBean.java
License:Apache License
private IndexReader getIndexReader(Path pindexesDir) throws IOException { /*/*from ww w . jav a2s .c o m*/ FileSystem fs = FileSystem.get( conf ); Path dir = new Path( conf.get( "searcher.dir", "crawl") ).makeQualified( fs ); LOG.info( "Looking for Nutch indexes in: " + dir ); if ( ! fs.exists( dir ) ) { LOG.warn( "Directory does not exist: " + dir ); LOG.warn( "No Nutch indexes will be found and all queries will return no results." ); return false; } Path pindexesDir = new Path( dir, "pindexes" ).makeQualified(fs); */ LOG.info("Looking for NutchWax parallel indexes in: " + pindexesDir); if (!fs.exists(pindexesDir)) { LOG.warn("Parallel indexes directory does not exist: " + pindexesDir); return null; } if (!fs.getFileStatus(pindexesDir).isDir()) { LOG.warn("Parallel indexes directory is not a directory: " + pindexesDir); return null; } FileStatus[] fstats = fs.listStatus(pindexesDir, HadoopFSUtil.getPassDirectoriesFilter(fs)); Path[] indexDirs = HadoopFSUtil.getPaths(fstats); if (indexDirs.length < 1) { LOG.info("No sub-dirs found in parallel indexes directory: " + pindexesDir); return null; } List<IndexReader> readers = new ArrayList<IndexReader>(indexDirs.length); for (Path indexDir : indexDirs) { fstats = fs.listStatus(indexDir, HadoopFSUtil.getPassDirectoriesFilter(fs)); Path parallelDirs[] = HadoopFSUtil.getPaths(fstats); if (parallelDirs.length < 1) { LOG.info("No sub-directories, skipping: " + indexDir); continue; } ArchiveParallelReader reader = new ArchiveParallelReader(); // Sort the parallelDirs so that we add them in order. Order // matters to the ParallelReader. Arrays.sort(parallelDirs); for (Path p : parallelDirs) { LOG.info("Adding reader for: " + p); reader.add(IndexReader.open(new FsDirectory(fs, p, false, conf))); } readers.add(reader); } if (readers.size() == 0) { LOG.warn("No parallel indexes in: " + pindexesDir); return null; } MultiReader reader = new MultiReader(readers.toArray(new IndexReader[0])); return reader; }