List of usage examples for org.apache.lucene.index MultiReader MultiReader
public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException
Construct a MultiReader aggregating the named set of (sub)readers.
From source file:org.opensolaris.opengrok.search.SearchEngine.java
License:Open Source License
/** * * @param paging whether to use paging (if yes, first X pages will load * faster)// ww w. ja va 2 s .c om * @param root list of projects to search * @throws IOException */ private void searchMultiDatabase(List<Project> root, boolean paging) throws IOException { IndexReader[] subreaders = new IndexReader[root.size()]; File droot = new File(RuntimeEnvironment.getInstance().getDataRootFile(), IndexDatabase.INDEX_DIR); int ii = 0; for (Project project : root) { IndexReader ireader = (DirectoryReader.open(FSDirectory.open(new File(droot, project.getPath())))); subreaders[ii++] = ireader; } MultiReader searchables = new MultiReader(subreaders, true); if (Runtime.getRuntime().availableProcessors() > 1) { int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors()); //TODO there might be a better way for counting this - or we should honor the command line option here too! ExecutorService executor = Executors.newFixedThreadPool(noThreads); searcher = new IndexSearcher(searchables, executor); } else { searcher = new IndexSearcher(searchables); } collector = TopScoreDocCollector.create(hitsPerPage * cachePages, docsScoredInOrder); searcher.search(query, collector); totalHits = collector.getTotalHits(); if (!paging && totalHits > 0) { collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder); searcher.search(query, collector); } hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); docs.add(d); } }
From source file:org.opensolaris.opengrok.web.SearchHelper.java
License:Open Source License
/** * Create the searcher to use wrt. to currently set parameters and the given * projects. Does not produce any {@link #redirect} link. It also does * nothing if {@link #redirect} or {@link #errorMsg} have a * none-{@code null} value. <p> Parameters which should be populated/set at * this time: <ul> <li>{@link #builder}</li> <li>{@link #dataRoot}</li> * <li>{@link #order} (falls back to relevance if unset)</li> * <li>{@link #parallel} (default: false)</li> </ul> Populates/sets: <ul> * <li>{@link #query}</li> <li>{@link #searcher}</li> <li>{@link #sort}</li> * <li>{@link #projects}</li> <li>{@link #errorMsg} if an error occurs</li> * </ul>/*from w w w . j a va 2 s . c o m*/ * * @param projects project to use query. If empty, a none-project opengrok * setup is assumed (i.e. DATA_ROOT/index will be used instead of possible * multiple DATA_ROOT/$project/index). * @return this instance */ public SearchHelper prepareExec(SortedSet<String> projects) { if (redirect != null || errorMsg != null) { return this; } // the Query created by the QueryBuilder try { indexDir = new File(dataRoot, IndexDatabase.INDEX_DIR); query = builder.build(); if (projects == null) { errorMsg = "No project selected!"; return this; } this.projects = projects; if (projects.isEmpty()) { //no project setup FSDirectory dir = FSDirectory.open(indexDir); searcher = new IndexSearcher(DirectoryReader.open(dir)); } else if (projects.size() == 1) { // just 1 project selected FSDirectory dir = FSDirectory.open(new File(indexDir, projects.first())); searcher = new IndexSearcher(DirectoryReader.open(dir)); } else { //more projects IndexReader[] subreaders = new IndexReader[projects.size()]; int ii = 0; //TODO might need to rewrite to Project instead of // String , need changes in projects.jspf too for (String proj : projects) { FSDirectory dir = FSDirectory.open(new File(indexDir, proj)); subreaders[ii++] = DirectoryReader.open(dir); } MultiReader searchables = new MultiReader(subreaders, true); if (parallel) { int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors()); //TODO there might be a better way for counting this executor = Executors.newFixedThreadPool(noThreads); } searcher = parallel ? new IndexSearcher(searchables, executor) : new IndexSearcher(searchables); } // TODO check if below is somehow reusing sessions so we don't // requery again and again, I guess 2min timeout sessions could be // usefull, since you click on the next page within 2mins, if not, // then wait ;) switch (order) { case LASTMODIFIED: sort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true)); break; case BY_PATH: sort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING)); break; default: sort = Sort.RELEVANCE; break; } checker = new DirectSpellChecker(); } catch (ParseException e) { errorMsg = PARSE_ERROR_MSG + e.getMessage(); } catch (FileNotFoundException e) { // errorMsg = "Index database(s) not found: " + e.getMessage(); errorMsg = "Index database(s) not found."; } catch (Exception e) { errorMsg = e.getMessage(); } return this; }
From source file:retrievability.SampledQuery.java
void initSearch() throws Exception { File indexDir = new File( clusteredRetrieval ? prop.getProperty("index.cluster") : prop.getProperty("index")); System.out.println(indexDir); reader = DirectoryReader.open(FSDirectory.open(indexDir)); int nDocs = reader.numDocs(); if (!combinedIndex) { // Retrieve from two separate indices reader.close();// w w w . ja v a2 s .co m // Open the two indices separately... File pureIndexDir = new File(prop.getProperty("splitindex.pure")); IndexReader pureReader = DirectoryReader.open(FSDirectory.open(pureIndexDir)); File mixedIndexDir = new File(prop.getProperty("splitindex.mixed")); IndexReader mixedReader = DirectoryReader.open(FSDirectory.open(mixedIndexDir)); reader = new MultiReader(pureReader, mixedReader); } searcher = new IndexSearcher[sims.length]; for (int i = 0; i < sims.length; i++) { searcher[i] = new IndexSearcher(reader); searcher[i].setSimilarity(sims[i]); } retrScoresForAllSims = new RetrievabilityScore[sims.length][nDocs]; }
From source file:searcher.CollStat.java
void initIndexes() throws Exception { int numIndexes = Integer.parseInt(prop.getProperty("retriever.numindexes", "1")); String indexType = prop.getProperty("index.type", "single"); readers = new IndexReader[numIndexes]; System.out.println("#readers = " + readers.length); if (numIndexes > 1) { for (int i = 0; i < numIndexes; i++) { String indexDirPath = prop.getProperty("subindex." + i); System.out.println("Initializing index " + i + " from " + indexDirPath); indexDir = new File(indexDirPath); readers[i] = DirectoryReader.open(FSDirectory.open(indexDir)); System.out.println("#docs in index " + i + ": " + readers[i].numDocs()); }/*from w w w .java 2 s .c o m*/ if (!indexType.equals("single")) { // baaler camilla System.out.println("Initializing multi-reader"); multiReader = new MultiReader(readers, true); System.out.println("#docs in index: " + multiReader.numDocs()); } } else { String indexDirPath = prop.getProperty("index"); indexDir = new File(indexDirPath); readers[0] = DirectoryReader.open(FSDirectory.open(indexDir)); } }
From source file:ubic.basecode.ontology.search.OntologyIndexer.java
License:Apache License
/** * Find the search index (will not create it) * /*from ww w .j a v a 2s. co m*/ * @param name * @param analyzer * @return Index, or null if there is no index. */ @SuppressWarnings("resource") private static SearchIndex getSubjectIndex(String name, Analyzer analyzer) { log.debug("Loading index: " + name); File indexdir = getIndexPath(name); File indexdirstd = getIndexPath(name + ".std"); try { // we do not put this in the try-with-open because we want these to *stay* open FSDirectory directory = FSDirectory.open(indexdir); FSDirectory directorystd = FSDirectory.open(indexdirstd); if (!IndexReader.indexExists(directory)) { return null; } if (!IndexReader.indexExists(directorystd)) { return null; } IndexReader reader = IndexReader.open(directory); IndexReader readerstd = IndexReader.open(directorystd); MultiReader r = new MultiReader(reader, readerstd); return new SearchIndex(r, analyzer); } catch (IOException e) { log.warn("Index for " + name + " could not be read: " + e.getMessage()); return null; } }
From source file:ubic.basecode.ontology.search.OntologyIndexer.java
License:Apache License
/** * Create an on-disk index from an existing OntModel. Any existing index will be deleted/overwritten. * /*from w w w. jav a2s . c o m*/ * @see {@link http://jena.apache.org/documentation/larq/} * @param datafile or uri * @param name used to refer to this index later * @param model * @return */ @SuppressWarnings("resource") private static synchronized SearchIndex index(String name, OntModel model) { File indexdir = getIndexPath(name); try { StopWatch timer = new StopWatch(); timer.start(); FSDirectory dir = FSDirectory.open(indexdir); log.info("Indexing " + name + " to: " + indexdir); /* * adjust the analyzer ... */ Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_36); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); IndexWriter indexWriter = new IndexWriter(dir, config); indexWriter.deleteAll(); // start with clean slate. assert 0 == indexWriter.numDocs(); IndexBuilderSubject larqSubjectBuilder = new IndexBuilderSubject(indexWriter); StmtIterator listStatements = model.listStatements(new IndexerSelector()); larqSubjectBuilder.indexStatements(listStatements); indexWriter.commit(); log.info(indexWriter.numDocs() + " Statements indexed..."); indexWriter.close(); Directory dirstd = indexStd(name, model); MultiReader r = new MultiReader(IndexReader.open(dir), IndexReader.open(dirstd)); // workaround to get the EnglishAnalyzer. SearchIndex index = new SearchIndex(r, new EnglishAnalyzer(Version.LUCENE_36)); // larqSubjectBuilder.getIndex(); // always returns a StandardAnalyazer assert index.getLuceneQueryParser().getAnalyzer() instanceof EnglishAnalyzer; log.info("Done indexing of " + name + " in " + String.format("%.2f", timer.getTime() / 1000.0) + "s"); return index; } catch (IOException e) { throw new RuntimeException("Indexing failure for " + name, e); } }