Example usage for org.apache.lucene.index DirectoryReader open

List of usage examples for org.apache.lucene.index DirectoryReader open

Introduction

In this page you can find the example usage for org.apache.lucene.index DirectoryReader open.

Prototype

public static DirectoryReader open(final IndexCommit commit, Map<String, String> readerAttributes)
        throws IOException 

Source Link

Document

Expert: returns an IndexReader reading the index in the given IndexCommit .

Usage

From source file:collene.TestShakespeare.java

License:Apache License

@Test
public void rest() throws IOException, ParseException {
    File shakespeareDir = new File("src/test/resources/shakespeare");
    File[] files = shakespeareDir.listFiles(new FileFilter() {
        @Override/* ww  w.j ava  2s  .c om*/
        public boolean accept(File pathname) {
            return !pathname.isHidden();
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(directory, config);

    long startIndexTime = System.currentTimeMillis();
    final int flushLines = 200;
    int totalLines = 0;
    Collection<Document> documents = new ArrayList<Document>();
    for (File f : files) {
        String play = f.getName();
        int lineNumber = 1;
        BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f)));
        String line = reader.readLine();
        while (line != null) {
            // index it.

            Document doc = new Document();
            doc.add(new NumericDocValuesField("line", lineNumber));
            doc.add(new Field("play", play, TextField.TYPE_STORED));
            doc.add(new Field("content", line, TextField.TYPE_STORED));
            documents.add(doc);

            totalLines += 1;
            if (totalLines % flushLines == 0) {
                writer.addDocuments(documents);
                documents.clear();
            }

            lineNumber += 1;
            line = reader.readLine();
        }
        reader.close();
    }

    if (documents.size() > 0) {
        writer.addDocuments(documents);
    }
    long endIndexTime = System.currentTimeMillis();

    System.out.println(
            String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime));

    //System.out.println(String.format("%s committed", directory.getClass().getSimpleName()));
    //        writer.forceMerge(1);
    //        System.out.println(String.format("%s merged", directory.getClass().getSimpleName()));

    // let's search!
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer);

    String[] queryTerms = new String[] { "trumpet" };

    for (String term : queryTerms) {
        long searchStart = System.currentTimeMillis();
        Query query = parser.parse(term);
        TopDocs docs = searcher.search(query, 10);
        long searchEnd = System.currentTimeMillis();
        System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(),
                docs.totalHits, searchEnd - searchStart));
        for (ScoreDoc doc : docs.scoreDocs) {
            System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex));
        }
    }

    writer.close(true);
    //System.out.println(String.format("%s closed", directory.getClass().getSimpleName()));

    System.out.println("I think these are the files:");
    for (String s : directory.listAll()) {
        System.out.println(s);
    }

    directory.close();
}

From source file:com.bah.lucene.BaseDirectoryTestSuite.java

License:Apache License

@Test
public void testCreateIndex() throws IOException {
    long s = System.nanoTime();
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer());
    FSDirectory control = FSDirectory.open(fileControl);
    Directory dir = getControlDir(control, directory);
    // The serial merge scheduler can be useful for debugging.
    // conf.setMergeScheduler(new SerialMergeScheduler());
    IndexWriter writer = new IndexWriter(dir, conf);
    int numDocs = 10000;
    DirectoryReader reader = null;//from   w w w .j av  a 2  s .  c o m
    for (int i = 0; i < 100; i++) {
        if (reader == null) {
            reader = DirectoryReader.open(writer, true);
        } else {
            DirectoryReader old = reader;
            reader = DirectoryReader.openIfChanged(old, writer, true);
            if (reader == null) {
                reader = old;
            } else {
                old.close();
            }
        }
        assertEquals(i * numDocs, reader.numDocs());
        IndexSearcher searcher = new IndexSearcher(reader);
        NumericRangeQuery<Integer> query = NumericRangeQuery.newIntRange("id", 42, 42, true, true);
        TopDocs topDocs = searcher.search(query, 10);
        assertEquals(i, topDocs.totalHits);
        addDocuments(writer, numDocs);
    }
    writer.close(false);
    reader.close();
    long e = System.nanoTime();
    System.out.println("Total time [" + (e - s) / 1000000.0 + " ms]");
}

From source file:com.chimpler.example.FacetLuceneIndexer.java

License:Apache License

public static void main(String args[]) throws Exception {
    //      if (args.length != 3) {
    //         System.err.println("Parameters: [index directory] [taxonomy directory] [json file]");
    //         System.exit(1);
    //      }//from w ww . j ava 2s  .c o m

    String indexDirectory = "index";
    String taxonomyDirectory = "taxonomy";
    String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json";

    IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION,
            new WhitespaceAnalyzer(LUCENE_VERSION));
    writerConfig.setOpenMode(OpenMode.APPEND);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig);

    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)),
            OpenMode.APPEND);

    TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory)));

    String content = IOUtils.toString(new FileInputStream(jsonFileName));
    JSONArray bookArray = new JSONArray(content);

    Field idField = new IntField("id", 0, Store.YES);
    Field titleField = new TextField("title", "", Store.YES);
    Field authorsField = new TextField("authors", "", Store.YES);
    Field bookCategoryField = new TextField("book_category", "", Store.YES);

    indexWriter.deleteAll();

    FacetFields facetFields = new FacetFields(taxonomyWriter);

    for (int i = 0; i < bookArray.length(); i++) {
        Document document = new Document();

        JSONObject book = bookArray.getJSONObject(i);
        int id = book.getInt("id");
        String title = book.getString("title");
        String bookCategory = book.getString("book_category");

        List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();

        String authorsString = "";
        JSONArray authors = book.getJSONArray("authors");
        for (int j = 0; j < authors.length(); j++) {
            String author = authors.getString(j);
            if (j > 0) {
                authorsString += ", ";
            }
            categoryPaths.add(new CategoryPath("author", author));
            authorsString += author;
        }
        categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/'));

        idField.setIntValue(id);
        titleField.setStringValue(title);
        authorsField.setStringValue(authorsString);
        bookCategoryField.setStringValue(bookCategory);

        facetFields.addFields(document, categoryPaths);

        document.add(idField);
        document.add(titleField);
        document.add(authorsField);
        document.add(bookCategoryField);

        indexWriter.addDocument(document);

        System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory,
                authors);
    }

    taxonomyWriter.prepareCommit();
    try {
        taxonomyWriter.commit();
    } catch (Exception e) {
        taxonomyWriter.rollback();
    }

    //      taxonomyWriter.close();
    //      
    //      indexWriter.commit();
    //      indexWriter.close();

    String query = "story";

    IndexReader indexReader = DirectoryReader.open(indexWriter, false);
    IndexReader indexReader2 = DirectoryReader.open(indexWriter, false);
    System.out.println(indexReader == indexReader2);

    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader);
    if (newTaxonomyReader != null) {
        TaxonomyReader tmp = taxonomyReader;
        taxonomyReader = newTaxonomyReader;
        tmp.close();
    } else {
        System.out.println("null");
    }

    ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
    facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100));
    facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100));

    FacetSearchParams searchParams = new FacetSearchParams(facetRequests);

    ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title",
            new StandardAnalyzer(LUCENE_VERSION));
    Query luceneQuery = queryParser.parse(query);

    // Collectors to get top results and facets
    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true);
    FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader);
    indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector));
    System.out.println("Found:");

    for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) {
        Document document = indexReader.document(scoreDoc.doc);
        System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n",
                document.get("id"), document.get("title"), document.get("book_category"),
                document.get("authors"), scoreDoc.score);
    }

    System.out.println("Facets:");
    for (FacetResult facetResult : facetsCollector.getFacetResults()) {
        System.out.println("- " + facetResult.getFacetResultNode().label);
        for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) {
            System.out.printf("    - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value);
            for (FacetResultNode subFacetResultNode : facetResultNode.subResults) {
                System.out.printf("        - %s (%f)\n", subFacetResultNode.label.toString(),
                        subFacetResultNode.value);
            }
        }
    }
    taxonomyReader.close();
    indexReader.close();

    taxonomyWriter.commit();
    taxonomyWriter.close();

    indexWriter.commit();
    indexWriter.close();

}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

/**
 * Constructor for LuceneIndex//from   w  w  w.j a  v  a  2s .  c o m
 * 
 * @param dataDirectory   Path to the directory to create an index directory within.
 * @throws IndexException
 */
public LuceneIndex(Path dataDirectory) throws IndexException {

    //TODO: Check to make sure directory is read/writable
    path = dataDirectory.resolve(INDEXDIR);

    try {
        dir = FSDirectory.open(path);
        analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        writer = new IndexWriter(dir, iwc);

        reader = DirectoryReader.open(writer, false);
        searcher = new IndexSearcher(reader);
        parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer);
    } catch (IOException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.difference.historybook.index.lucene.LuceneIndex.java

License:Apache License

@Override
public void indexPage(String collection, String url, Instant timestamp, String body) throws IndexException {
    HtmlTextExtractor extractor = new HtmlTextExtractor(body, url);

    Document doc = new IndexDocumentAdapter().setCollection(collection).setUrl(url).setTimestamp(timestamp)
            .setTitle(extractor.getTitle()).setContent(extractor.getContent()).getAsDocument();

    try {/*from  ww  w  . jav a  2s.  c  o m*/
        writer.addDocument(doc);
        writer.commit();
        reader.close();
        reader = DirectoryReader.open(writer, false);
        searcher = new IndexSearcher(reader);
    } catch (IOException e) {
        LOG.error(e.getLocalizedMessage());
        throw new IndexException(e);
    }
}

From source file:com.epam.wilma.message.search.lucene.search.helper.IndexReaderFactory.java

License:Open Source License

/**
 * Creates a new {@link IndexReader} instance using previously opened {@link IndexWriter}
 * to be able to handle real time changes.
 * @param applyAllDeletes If true, all buffered deletes will be applied (made visible) in the returned reader.
 * If false, the deletes are not applied but remain buffered (in IndexWriter) so that they will be applied in the future.
 * @return with the new {@link IndexReader} instance
 * @throws IOException was thrown when {@link IndexReader} creation failed
 *//*from w  ww.  j  av  a2s  .  com*/
public IndexReader create(final boolean applyAllDeletes) throws IOException {
    return DirectoryReader.open(indexWriter, applyAllDeletes);
}

From source file:com.gitblit.service.LuceneService.java

License:Apache License

/**
 * Gets an index searcher for the repository.
 *
 * @param repository//from   ww w.  j a v a2 s .c  om
 * @return
 * @throws IOException
 */
private IndexSearcher getIndexSearcher(String repository) throws IOException {
    IndexSearcher searcher = searchers.get(repository);
    if (searcher == null) {
        IndexWriter writer = getIndexWriter(repository);
        searcher = new IndexSearcher(DirectoryReader.open(writer, true));
        searchers.put(repository, searcher);
    }
    return searcher;
}

From source file:com.gitblit.tickets.TicketIndexer.java

License:Apache License

private IndexSearcher getSearcher() throws IOException {
    if (searcher == null) {
        searcher = new IndexSearcher(DirectoryReader.open(getWriter(), true));
    }/*w  w w  . j  av  a  2s  .  co  m*/
    return searcher;
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

/**
 * Creates and returns a new SearcherManager from the given
 * {@link IndexWriter}./* w w  w.j a  va 2  s .  c  o  m*/
 *
 * @param writer
 *          the IndexWriter to open the IndexReader from.
 * @param applyAllDeletes
 *          If <code>true</code>, all buffered deletes will be applied (made
 *          visible) in the {@link IndexSearcher} / {@link DirectoryReader}.
 *          If <code>false</code>, the deletes may or may not be applied, but
 *          remain buffered (in IndexWriter) so that they will be applied in
 *          the future. Applying deletes can be costly, so if your app can
 *          tolerate deleted documents being returned you might gain some
 *          performance by passing <code>false</code>. See
 *          {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)}.
 * @param searcherFactory
 *          An optional {@link SearcherFactory}. Pass <code>null</code> if you
 *          don't require the searcher to be warmed before going live or other
 *          custom behavior.
 *
 * @throws IOException if there is a low-level I/O error
 */
public WrappableSearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory)
        throws IOException {
    if (searcherFactory == null) {
        searcherFactory = new SearcherFactory();
    }
    this.searcherFactory = searcherFactory;
    current = getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes));
}

From source file:com.o19s.solr.swan.highlight.TermVectorFun.java

License:Apache License

@Test
public void testBlah() throws IOException {
    RAMDirectory ramDir = new RAMDirectory();
    // Index some made up content
    IndexWriterConfig iwf = new IndexWriterConfig(Version.LUCENE_47, new StandardAnalyzer(Version.LUCENE_47));
    IndexWriter writer = new IndexWriter(ramDir, iwf);
    FieldType ft = new FieldType();
    ft.setIndexed(true);//from  w  w w  . j  av  a 2s.  c  om
    ft.setTokenized(true);
    ft.setStored(true);
    ft.setStoreTermVectorOffsets(true);
    ft.setStoreTermVectors(true);
    ft.setStoreTermVectorPositions(true);
    ft.freeze();
    for (int i = 0; i < DOCS.length; i++) {
        Document doc = new Document();
        StringField id = new StringField("id", "doc_" + i, StringField.Store.YES);
        doc.add(id);
        // Store both position and offset information
        Field text = new Field("content", DOCS[i], ft);
        //               Field.Index.ANALYZED,
        //               Field.TermVector.WITH_POSITIONS_OFFSETS);
        doc.add(text);
        writer.addDocument(doc);
    }
    //writer.close();
    // Get a searcher
    AtomicReader dr = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true));
    IndexSearcher searcher = new IndexSearcher(dr);
    // Do a search using SpanQuery
    SpanTermQuery fleeceQ = new SpanTermQuery(new Term("content", "fleece"));
    TopDocs results = searcher.search(fleeceQ, 10);
    for (int i = 0; i < results.scoreDocs.length; i++) {
        ScoreDoc scoreDoc = results.scoreDocs[i];
        System.out.println("Score Doc: " + scoreDoc);
    }
    IndexReader reader = searcher.getIndexReader();
    Bits acceptDocs = null;
    Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>();
    Spans spans = fleeceQ.getSpans(dr.getContext(), acceptDocs, termContexts);

    while (spans.next()) {
        System.out.println("Doc: " + spans.doc() + " Start: " + spans.start() + " End: " + spans.end());
        DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("content");
        reader.document(spans.doc(), visitor);
        Terms terms = reader.getTermVector(spans.doc(), "content");
        TermsEnum tenum = terms.iterator(null);
        //         AttributeSource as = tenum.attributes();

        while (tenum.next() != null) {
            System.out.println(tenum.term().utf8ToString());
        }
        for (long pos = 0L; pos < spans.end(); pos++) {
            //            tenum.next();
            //            if (tenum.ord()<pos) continue;
            //            System.out.println(tenum.term());
            //            
        }

        reader.document(spans.doc(), visitor);
        //         String[] values = visitor.getDocument().getValues("content");
        //         List<String> a = new ArrayList<String>();
        //         // build up the window
        //         tvm.start = spans.start() - window;
        //         tvm.end = spans.end() + window;
        //         reader.getTermFreqVector(spans.doc(), "content", tvm);
        //         for (WindowEntry entry : tvm.entries.values()) {
        //            System.out.println("Entry: " + entry);
        //         }
        //         // clear out the entries for the next round
        //         tvm.entries.clear();
    }
}