Example usage for org.apache.lucene.index MultiReader MultiReader

List of usage examples for org.apache.lucene.index MultiReader MultiReader

Introduction

In this page you can find the example usage for org.apache.lucene.index MultiReader MultiReader.

Prototype

public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) throws IOException 

Source Link

Document

Construct a MultiReader aggregating the named set of (sub)readers.

Usage

From source file:org.opensolaris.opengrok.search.SearchEngine.java

License:Open Source License

/**
 *
 * @param paging whether to use paging (if yes, first X pages will load
 * faster)// ww w.  ja va  2 s  .c  om
 * @param root list of projects to search
 * @throws IOException
 */
private void searchMultiDatabase(List<Project> root, boolean paging) throws IOException {
    IndexReader[] subreaders = new IndexReader[root.size()];
    File droot = new File(RuntimeEnvironment.getInstance().getDataRootFile(), IndexDatabase.INDEX_DIR);
    int ii = 0;
    for (Project project : root) {
        IndexReader ireader = (DirectoryReader.open(FSDirectory.open(new File(droot, project.getPath()))));
        subreaders[ii++] = ireader;
    }
    MultiReader searchables = new MultiReader(subreaders, true);
    if (Runtime.getRuntime().availableProcessors() > 1) {
        int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors()); //TODO there might be a better way for counting this - or we should honor the command line option here too!
        ExecutorService executor = Executors.newFixedThreadPool(noThreads);
        searcher = new IndexSearcher(searchables, executor);
    } else {
        searcher = new IndexSearcher(searchables);
    }
    collector = TopScoreDocCollector.create(hitsPerPage * cachePages, docsScoredInOrder);
    searcher.search(query, collector);
    totalHits = collector.getTotalHits();
    if (!paging && totalHits > 0) {
        collector = TopScoreDocCollector.create(totalHits, docsScoredInOrder);
        searcher.search(query, collector);
    }
    hits = collector.topDocs().scoreDocs;
    for (ScoreDoc hit : hits) {
        int docId = hit.doc;
        Document d = searcher.doc(docId);
        docs.add(d);
    }
}

From source file:org.opensolaris.opengrok.web.SearchHelper.java

License:Open Source License

/**
 * Create the searcher to use wrt. to currently set parameters and the given
 * projects. Does not produce any {@link #redirect} link. It also does
 * nothing if {@link #redirect} or {@link #errorMsg} have a
 * none-{@code null} value. <p> Parameters which should be populated/set at
 * this time: <ul> <li>{@link #builder}</li> <li>{@link #dataRoot}</li>
 * <li>{@link #order} (falls back to relevance if unset)</li>
 * <li>{@link #parallel} (default: false)</li> </ul> Populates/sets: <ul>
 * <li>{@link #query}</li> <li>{@link #searcher}</li> <li>{@link #sort}</li>
 * <li>{@link #projects}</li> <li>{@link #errorMsg} if an error occurs</li>
 * </ul>/*from w  w  w  . j  a  va 2 s  . c  o m*/
 *
 * @param projects project to use query. If empty, a none-project opengrok
 * setup is assumed (i.e. DATA_ROOT/index will be used instead of possible
 * multiple DATA_ROOT/$project/index).
 * @return this instance
 */
public SearchHelper prepareExec(SortedSet<String> projects) {
    if (redirect != null || errorMsg != null) {
        return this;
    }
    // the Query created by the QueryBuilder
    try {
        indexDir = new File(dataRoot, IndexDatabase.INDEX_DIR);
        query = builder.build();
        if (projects == null) {
            errorMsg = "No project selected!";
            return this;
        }
        this.projects = projects;
        if (projects.isEmpty()) {
            //no project setup
            FSDirectory dir = FSDirectory.open(indexDir);
            searcher = new IndexSearcher(DirectoryReader.open(dir));
        } else if (projects.size() == 1) {
            // just 1 project selected
            FSDirectory dir = FSDirectory.open(new File(indexDir, projects.first()));
            searcher = new IndexSearcher(DirectoryReader.open(dir));
        } else {
            //more projects                                
            IndexReader[] subreaders = new IndexReader[projects.size()];
            int ii = 0;
            //TODO might need to rewrite to Project instead of
            // String , need changes in projects.jspf too
            for (String proj : projects) {
                FSDirectory dir = FSDirectory.open(new File(indexDir, proj));
                subreaders[ii++] = DirectoryReader.open(dir);
            }
            MultiReader searchables = new MultiReader(subreaders, true);
            if (parallel) {
                int noThreads = 2 + (2 * Runtime.getRuntime().availableProcessors()); //TODO there might be a better way for counting this
                executor = Executors.newFixedThreadPool(noThreads);
            }
            searcher = parallel ? new IndexSearcher(searchables, executor) : new IndexSearcher(searchables);
        }
        // TODO check if below is somehow reusing sessions so we don't
        // requery again and again, I guess 2min timeout sessions could be
        // usefull, since you click on the next page within 2mins, if not,
        // then wait ;)
        switch (order) {
        case LASTMODIFIED:
            sort = new Sort(new SortField(QueryBuilder.DATE, SortField.Type.STRING, true));
            break;
        case BY_PATH:
            sort = new Sort(new SortField(QueryBuilder.FULLPATH, SortField.Type.STRING));
            break;
        default:
            sort = Sort.RELEVANCE;
            break;
        }
        checker = new DirectSpellChecker();
    } catch (ParseException e) {
        errorMsg = PARSE_ERROR_MSG + e.getMessage();
    } catch (FileNotFoundException e) {
        //          errorMsg = "Index database(s) not found: " + e.getMessage();
        errorMsg = "Index database(s) not found.";
    } catch (Exception e) {
        errorMsg = e.getMessage();
    }
    return this;
}

From source file:retrievability.SampledQuery.java

void initSearch() throws Exception {

    File indexDir = new File(
            clusteredRetrieval ? prop.getProperty("index.cluster") : prop.getProperty("index"));
    System.out.println(indexDir);

    reader = DirectoryReader.open(FSDirectory.open(indexDir));

    int nDocs = reader.numDocs();

    if (!combinedIndex) { // Retrieve from two separate indices
        reader.close();// w w w .  ja v  a2  s  .co m

        // Open the two indices separately...
        File pureIndexDir = new File(prop.getProperty("splitindex.pure"));
        IndexReader pureReader = DirectoryReader.open(FSDirectory.open(pureIndexDir));
        File mixedIndexDir = new File(prop.getProperty("splitindex.mixed"));
        IndexReader mixedReader = DirectoryReader.open(FSDirectory.open(mixedIndexDir));

        reader = new MultiReader(pureReader, mixedReader);
    }

    searcher = new IndexSearcher[sims.length];
    for (int i = 0; i < sims.length; i++) {
        searcher[i] = new IndexSearcher(reader);
        searcher[i].setSimilarity(sims[i]);
    }

    retrScoresForAllSims = new RetrievabilityScore[sims.length][nDocs];
}

From source file:searcher.CollStat.java

void initIndexes() throws Exception {
    int numIndexes = Integer.parseInt(prop.getProperty("retriever.numindexes", "1"));
    String indexType = prop.getProperty("index.type", "single");

    readers = new IndexReader[numIndexes];
    System.out.println("#readers = " + readers.length);

    if (numIndexes > 1) {
        for (int i = 0; i < numIndexes; i++) {
            String indexDirPath = prop.getProperty("subindex." + i);
            System.out.println("Initializing index " + i + " from " + indexDirPath);
            indexDir = new File(indexDirPath);
            readers[i] = DirectoryReader.open(FSDirectory.open(indexDir));
            System.out.println("#docs in index " + i + ": " + readers[i].numDocs());
        }/*from  w  w w  .java  2  s .c o  m*/
        if (!indexType.equals("single")) {
            // baaler camilla
            System.out.println("Initializing multi-reader");
            multiReader = new MultiReader(readers, true);
            System.out.println("#docs in index: " + multiReader.numDocs());
        }
    } else {
        String indexDirPath = prop.getProperty("index");
        indexDir = new File(indexDirPath);
        readers[0] = DirectoryReader.open(FSDirectory.open(indexDir));
    }
}

From source file:ubic.basecode.ontology.search.OntologyIndexer.java

License:Apache License

/**
 * Find the search index (will not create it)
 * /*from ww  w .j a  v  a  2s.  co m*/
 * @param  name
 * @param  analyzer
 * @return          Index, or null if there is no index.
 */
@SuppressWarnings("resource")
private static SearchIndex getSubjectIndex(String name, Analyzer analyzer) {
    log.debug("Loading index: " + name);
    File indexdir = getIndexPath(name);
    File indexdirstd = getIndexPath(name + ".std");
    try {
        // we do not put this in the try-with-open because we want these to *stay* open
        FSDirectory directory = FSDirectory.open(indexdir);
        FSDirectory directorystd = FSDirectory.open(indexdirstd);

        if (!IndexReader.indexExists(directory)) {
            return null;
        }
        if (!IndexReader.indexExists(directorystd)) {
            return null;
        }

        IndexReader reader = IndexReader.open(directory);
        IndexReader readerstd = IndexReader.open(directorystd);
        MultiReader r = new MultiReader(reader, readerstd);
        return new SearchIndex(r, analyzer);

    } catch (IOException e) {
        log.warn("Index for " + name + " could not be read: " + e.getMessage());
        return null;
    }
}

From source file:ubic.basecode.ontology.search.OntologyIndexer.java

License:Apache License

/**
 * Create an on-disk index from an existing OntModel. Any existing index will be deleted/overwritten.
 * /*from  w  w  w. jav  a2s .  c o  m*/
 * @see             {@link http://jena.apache.org/documentation/larq/}
 * @param  datafile or uri
 * @param  name     used to refer to this index later
 * @param  model
 * @return
 */
@SuppressWarnings("resource")
private static synchronized SearchIndex index(String name, OntModel model) {

    File indexdir = getIndexPath(name);

    try {
        StopWatch timer = new StopWatch();
        timer.start();
        FSDirectory dir = FSDirectory.open(indexdir);
        log.info("Indexing " + name + " to: " + indexdir);

        /*
         * adjust the analyzer ...
         */
        Analyzer analyzer = new EnglishAnalyzer(Version.LUCENE_36);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer);
        IndexWriter indexWriter = new IndexWriter(dir, config);
        indexWriter.deleteAll(); // start with clean slate.
        assert 0 == indexWriter.numDocs();

        IndexBuilderSubject larqSubjectBuilder = new IndexBuilderSubject(indexWriter);
        StmtIterator listStatements = model.listStatements(new IndexerSelector());
        larqSubjectBuilder.indexStatements(listStatements);
        indexWriter.commit();
        log.info(indexWriter.numDocs() + " Statements indexed...");
        indexWriter.close();

        Directory dirstd = indexStd(name, model);

        MultiReader r = new MultiReader(IndexReader.open(dir), IndexReader.open(dirstd));

        // workaround to get the EnglishAnalyzer.
        SearchIndex index = new SearchIndex(r, new EnglishAnalyzer(Version.LUCENE_36));
        // larqSubjectBuilder.getIndex(); // always returns a StandardAnalyazer
        assert index.getLuceneQueryParser().getAnalyzer() instanceof EnglishAnalyzer;

        log.info("Done indexing of " + name + " in " + String.format("%.2f", timer.getTime() / 1000.0) + "s");

        return index;
    } catch (IOException e) {
        throw new RuntimeException("Indexing failure for " + name, e);
    }
}