Example usage for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit, Map<String, String> readerAttributes)
        throws IOException

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:collene.TestShakespeare.java

License:Apache License

@Test
public void rest() throws IOException, ParseException {
    File shakespeareDir = new File("src/test/resources/shakespeare");
    File[] files = shakespeareDir.listFiles(new FileFilter() {
        @Override/* ww  w.j ava  2s  .c om*/
        public boolean accept(File pathname) {
            return !pathname.isHidden();
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    long startIndexTime = System.currentTimeMillis();
    final int flushLines = 200;
    int totalLines = 0;
    Collection<Document> documents = new ArrayList<Document>();
    for (File f : files) {
        String play = f.getName();
        int lineNumber = 1;
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
        String line = reader.readLine();
        while (line != null) {
            // index it.

            Document doc = new Document();
            doc.add(new NumericDocValuesField("line", lineNumber));
            doc.add(new Field("play", play, TextField.TYPE_STORED));
            doc.add(new Field("content", line, TextField.TYPE_STORED));
            documents.add(doc);

            totalLines += 1;
            if (totalLines % flushLines == 0) {
                writer.addDocuments(documents);
                documents.clear();
            }

            lineNumber += 1;
            line = reader.readLine();
        }
        reader.close();
    }

    if (documents.size() > 0) {
        writer.addDocuments(documents);
    }
    long endIndexTime = System.currentTimeMillis();

    System.out.println(
            String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime));

    //System.out.println(String.format("%s committed", directory.getClass().getSimpleName()));
    //        writer.forceMerge(1);
    //        System.out.println(String.format("%s merged", directory.getClass().getSimpleName()));

    // let's search!
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer);

    String[] queryTerms = new String[] { "trumpet" };

    for (String term : queryTerms) {
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse(term);
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc doc : docs.scoreDocs) {
            System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex));
        }
    }

    writer.close(true);
    //System.out.println(String.format("%s closed", directory.getClass().getSimpleName()));

    System.out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        System.out.println(s);
    }

    directory.close();
}

From source file:com.bah.lucene.BaseDirectoryTestSuite.java

License:Apache License

@Test
public void testCreateIndex() throws IOException {
    long s = System.nanoTime();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
    FSDirectory control = FSDirectory.open(fileControl);
    Directory dir = getControlDir(control, directory);
    // The serial merge scheduler can be useful for debugging.
    // conf.setMergeScheduler(new SerialMergeScheduler());
    IndexWriter writer = new IndexWriter(dir, conf);
    int numDocs = 10000;
    DirectoryReader reader = null;//from   w w w .j av  a 2  s .  c o m
    for (int i = 0; i < 100; i++) {
        if (reader == null) {
            reader = DirectoryReader.open(writer, true);
        } else {
            DirectoryReader old = reader;
            reader = DirectoryReader.openIfChanged(old, writer, true);
            if (reader == null) {
                reader = old;
            } else {
                old.close();
            }
        }
        assertEquals(i * numDocs, reader.numDocs());
        IndexSearcher searcher = new IndexSearcher(reader);
        NumericRangeQuery<Integer> query = NumericRangeQuery.newIntRange("id", 42, 42, true, true);
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(i, topDocs.totalHits);
        addDocuments(writer, numDocs);
    }
    writer.close(false);
    reader.close();
    long e = System.nanoTime();
    System.out.println("Total time [" + (e - s) / 1000000.0 + " ms]");
}

From source file:com.chimpler.example.FacetLuceneIndexer.java

License:Apache License

public static void main(String args[]) throws Exception {
    //      if (args.length != 3) {
    //         System.err.println("Parameters: [index directory] [taxonomy directory] [json file]");
    //         System.exit(1);
    //      }//from w ww . j ava 2s  .c o m

    String indexDirectory = "index";
    String taxonomyDirectory = "taxonomy";
    String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json";

    IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION,
            new WhitespaceAnalyzer(LUCENE_VERSION));
    writerConfig.setOpenMode(OpenMode.APPEND);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig);

    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)),
            OpenMode.APPEND);

    TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory)));

    String content = IOUtils.toString(new FileInputStream(jsonFileName));
    JSONArray bookArray = new JSONArray(content);

    Field idField = new IntField("id", 0, Store.YES);
    Field titleField = new TextField("title", "", Store.YES);
    Field authorsField = new TextField("authors", "", Store.YES);
    Field bookCategoryField = new TextField("book_category", "", Store.YES);

    indexWriter.deleteAll();

    FacetFields facetFields = new FacetFields(taxonomyWriter);

    for (int i = 0; i < bookArray.length(); i++) {
        Document document = new Document();

        JSONObject book = bookArray.getJSONObject(i);
        int id = book.getInt("id");
        String title = book.getString("title");
        String bookCategory = book.getString("book_category");

        List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();

        String authorsString = "";
        JSONArray authors = book.getJSONArray("authors");
        for (int j = 0; j < authors.length(); j++) {
            String author = authors.getString(j);
            if (j > 0) {
                authorsString += ", ";
            }
            categoryPaths.add(new CategoryPath("author", author));
            authorsString += author;
        }
        categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/'));

        idField.setIntValue(id);
        titleField.setStringValue(title);
        authorsField.setStringValue(authorsString);
        bookCategoryField.setStringValue(bookCategory);

        facetFields.addFields(document, categoryPaths);

        document.add(idField);
        document.add(titleField);
        document.add(authorsField);
        document.add(bookCategoryField);

        indexWriter.addDocument(document);

        System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory,
                authors);
    }

    taxonomyWriter.prepareCommit();
    try {
        taxonomyWriter.commit();
    } catch (Exception e) {
        taxonomyWriter.rollback();
    }

    //      taxonomyWriter.close();
    //      
    //      indexWriter.commit();
    //      indexWriter.close();

    String query = "story";

    IndexReader indexReader = DirectoryReader.open(indexWriter, false);
    IndexReader indexReader2 = DirectoryReader.open(indexWriter, false);
    System.out.println(indexReader == indexReader2);

    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader);
    if (newTaxonomyReader != null) {
        TaxonomyReader tmp = taxonomyReader;
        taxonomyReader = newTaxonomyReader;
        tmp.close();
    } else {
        System.out.println("null");
    }

    ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
    facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100));
    facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100));

    FacetSearchParams searchParams = new FacetSearchParams(facetRequests);

    ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title",
            new StandardAnalyzer(LUCENE_VERSION));
    Query luceneQuery = queryParser.parse(query);

    // Collectors to get top results and facets
    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true);
    FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader);
    indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector));
    System.out.println("Found:");

    for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) {
        Document document = indexReader.document(scoreDoc.doc);
        System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n",
                document.get("id"), document.get("title"), document.get("book_category"),
                document.get("authors"), scoreDoc.score);
    }

    System.out.println("Facets:");
    for (FacetResult facetResult : facetsCollector.getFacetResults()) {
        System.out.println("- " + facetResult.getFacetResultNode().label);
        for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) {
            System.out.printf("    - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value);
            for (FacetResultNode subFacetResultNode : facetResultNode.subResults) {
                System.out.printf("        - %s (%f)\n", subFacetResultNode.label.toString(),
                        subFacetResultNode.value);
            }
        }
    }
    taxonomyReader.close();
    indexReader.close();

    taxonomyWriter.commit();
    taxonomyWriter.close();

    indexWriter.commit();
    indexWriter.close();

}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

/**
 * Constructor for LuceneIndex//from   w  w  w.j a  v  a  2s .  c o m
 * 
 * @param dataDirectory   Path to the directory to create an index directory within.
 * @throws IndexException
 */
public LuceneIndex(Path dataDirectory) throws IndexException {

    //TODO: Check to make sure directory is read/writable
    path = dataDirectory.resolve(INDEXDIR);

    try {
        dir = FSDirectory.open(path);
        analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        writer = new IndexWriter(dir, iwc);

        reader = DirectoryReader.open(writer, false);
        searcher = new IndexSearcher(reader);
        parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer);
    } catch (IOException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public void indexPage(String collection, String url, Instant timestamp, String body) throws IndexException {
    HtmlTextExtractor extractor = new HtmlTextExtractor(body, url);

    Document doc = new IndexDocumentAdapter().setCollection(collection).setUrl(url).setTimestamp(timestamp)
            .setTitle(extractor.getTitle()).setContent(extractor.getContent()).getAsDocument();

    try {/*from  ww  w  . jav a  2s.  c  o m*/
        writer.addDocument(doc);
        writer.commit();
        reader.close();
        reader = DirectoryReader.open(writer, false);
        searcher = new IndexSearcher(reader);
    } catch (IOException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.epam.wilma.message.search.lucene.search.helper.IndexReaderFactory.java

License:Open Source License

/**
 * Creates a new {@link IndexReader} instance using previously opened {@link IndexWriter}
 * to be able to handle real time changes.
 * @param applyAllDeletes If true, all buffered deletes will be applied (made visible) in the returned reader.
 * If false, the deletes are not applied but remain buffered (in IndexWriter) so that they will be applied in the future.
 * @return with the new {@link IndexReader} instance
 * @throws IOException was thrown when {@link IndexReader} creation failed
 *//*from w  ww.  j  av  a2s  .  com*/
public IndexReader create(final boolean applyAllDeletes) throws IOException {
    return DirectoryReader.open(indexWriter, applyAllDeletes);
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * Gets an index searcher for the repository.
 *
 * @param repository//from   ww w.  j a v a2 s .c  om
 * @return
 * @throws IOException
 */
private IndexSearcher getIndexSearcher(String repository) throws IOException {
    IndexSearcher searcher = searchers.get(repository);
    if (searcher == null) {
        IndexWriter writer = getIndexWriter(repository);
        searcher = new IndexSearcher(DirectoryReader.open(writer, true));
        searchers.put(repository, searcher);
    }
    return searcher;
}

From source file:com.gitblit.tickets.TicketIndexer.java

License:Apache License

private IndexSearcher getSearcher() throws IOException {
    if (searcher == null) {
        searcher = new IndexSearcher(DirectoryReader.open(getWriter(), true));
    }/*w  w w  . j  av  a  2s  .  co  m*/
    return searcher;
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

/**
 * Creates and returns a new SearcherManager from the given
 * {@link IndexWriter}./* w w  w.j a  va 2  s .  c  o  m*/
 *
 * @param writer
 *          the IndexWriter to open the IndexReader from.
 * @param applyAllDeletes
 *          If <code>true</code>, all buffered deletes will be applied (made
 *          visible) in the {@link IndexSearcher} / {@link DirectoryReader}.
 *          If <code>false</code>, the deletes may or may not be applied, but
 *          remain buffered (in IndexWriter) so that they will be applied in
 *          the future. Applying deletes can be costly, so if your app can
 *          tolerate deleted documents being returned you might gain some
 *          performance by passing <code>false</code>. See
 *          {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)}.
 * @param searcherFactory
 *          An optional {@link SearcherFactory}. Pass <code>null</code> if you
 *          don't require the searcher to be warmed before going live or other
 *          custom behavior.
 *
 * @throws IOException if there is a low-level I/O error
 */
public WrappableSearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory)
        throws IOException {
    if (searcherFactory == null) {
        searcherFactory = new SearcherFactory();
    }
    this.searcherFactory = searcherFactory;
    current = getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes));
}

From source file:com.o19s.solr.swan.highlight.TermVectorFun.java

License:Apache License

@Test
public void testBlah() throws IOException {
    RAMDirectory ramDir = new RAMDirectory();
    // Index some made up content
    IndexWriterConfig iwf = new IndexWriterConfig(Version.LUCENE_47, new StandardAnalyzer(Version.LUCENE_47));
    IndexWriter writer = new IndexWriter(ramDir, iwf);
    FieldType ft = new FieldType();
    ft.setIndexed(true);//from  w  w w  . j  av  a 2s.  c  om
    ft.setTokenized(true);
    ft.setStored(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.freeze();
    for (int i = 0; i < DOCS.length; i++) {
        Document doc = new Document();
        StringField id = new StringField("id", "doc_" + i, StringField.Store.YES);
        doc.add(id);
        // Store both position and offset information
        Field text = new Field("content", DOCS[i], ft);
        //               Field.Index.ANALYZED,
        //               Field.TermVector.WITH_POSITIONS_OFFSETS);
        doc.add(text);
        writer.addDocument(doc);
    }
    //writer.close();
    // Get a searcher
    AtomicReader dr = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
    IndexSearcher searcher = new IndexSearcher(dr);
    // Do a search using SpanQuery
    SpanTermQuery fleeceQ = new SpanTermQuery(new Term("content", "fleece"));
    TopDocs results = searcher.search(fleeceQ, 10);
    for (int i = 0; i < results.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = results.scoreDocs[i];
        System.out.println("Score Doc: " + scoreDoc);
    }
    IndexReader reader = searcher.getIndexReader();
    Bits acceptDocs = null;
    Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>();
    Spans spans = fleeceQ.getSpans(dr.getContext(), acceptDocs, termContexts);

    while (spans.next()) {
        System.out.println("Doc: " + spans.doc() + " Start: " + spans.start() + " End: " + spans.end());
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("content");
        reader.document(spans.doc(), visitor);
        Terms terms = reader.getTermVector(spans.doc(), "content");
        TermsEnum tenum = terms.iterator(null);
        //         AttributeSource as = tenum.attributes();

        while (tenum.next() != null) {
            System.out.println(tenum.term().utf8ToString());
        }
        for (long pos = 0L; pos < spans.end(); pos++) {
            //            tenum.next();
            //            if (tenum.ord()<pos) continue;
            //            System.out.println(tenum.term());
            //            
        }

        reader.document(spans.doc(), visitor);
        //         String[] values = visitor.getDocument().getValues("content");
        //         List<String> a = new ArrayList<String>();
        //         // build up the window
        //         tvm.start = spans.start() - window;
        //         tvm.end = spans.end() + window;
        //         reader.getTermFreqVector(spans.doc(), "content", tvm);
        //         for (WindowEntry entry : tvm.entries.values()) {
        //            System.out.println("Entry: " + entry);
        //         }
        //         // clear out the entries for the next round
        //         tvm.entries.clear();
    }
}