List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit, Map<String, String> readerAttributes) throws IOException
From source file:collene.TestShakespeare.java
License:Apache License
@Test public void rest() throws IOException, ParseException { File shakespeareDir = new File("src/test/resources/shakespeare"); File[] files = shakespeareDir.listFiles(new FileFilter() { @Override/* ww w.j ava 2s .c om*/ public boolean accept(File pathname) { return !pathname.isHidden(); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); long startIndexTime = System.currentTimeMillis(); final int flushLines = 200; int totalLines = 0; Collection<Document> documents = new ArrayList<Document>(); for (File f : files) { String play = f.getName(); int lineNumber = 1; BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line = reader.readLine(); while (line != null) { // index it. Document doc = new Document(); doc.add(new NumericDocValuesField("line", lineNumber)); doc.add(new Field("play", play, TextField.TYPE_STORED)); doc.add(new Field("content", line, TextField.TYPE_STORED)); documents.add(doc); totalLines += 1; if (totalLines % flushLines == 0) { writer.addDocuments(documents); documents.clear(); } lineNumber += 1; line = reader.readLine(); } reader.close(); } if (documents.size() > 0) { writer.addDocuments(documents); } long endIndexTime = System.currentTimeMillis(); System.out.println( String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime)); //System.out.println(String.format("%s committed", directory.getClass().getSimpleName())); // writer.forceMerge(1); // System.out.println(String.format("%s merged", directory.getClass().getSimpleName())); // let's search! IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer); String[] queryTerms = new String[] { "trumpet" }; for (String term : queryTerms) { long searchStart = System.currentTimeMillis(); Query query = parser.parse(term); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc doc : docs.scoreDocs) { System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex)); } } writer.close(true); //System.out.println(String.format("%s closed", directory.getClass().getSimpleName())); System.out.println("I think these are the files:"); for (String s : directory.listAll()) { System.out.println(s); } directory.close(); }
From source file:com.bah.lucene.BaseDirectoryTestSuite.java
License:Apache License
@Test public void testCreateIndex() throws IOException { long s = System.nanoTime(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); FSDirectory control = FSDirectory.open(fileControl); Directory dir = getControlDir(control, directory); // The serial merge scheduler can be useful for debugging. // conf.setMergeScheduler(new SerialMergeScheduler()); IndexWriter writer = new IndexWriter(dir, conf); int numDocs = 10000; DirectoryReader reader = null;//from w w w .j av a 2 s . c o m for (int i = 0; i < 100; i++) { if (reader == null) { reader = DirectoryReader.open(writer, true); } else { DirectoryReader old = reader; reader = DirectoryReader.openIfChanged(old, writer, true); if (reader == null) { reader = old; } else { old.close(); } } assertEquals(i * numDocs, reader.numDocs()); IndexSearcher searcher = new IndexSearcher(reader); NumericRangeQuery<Integer> query = NumericRangeQuery.newIntRange("id", 42, 42, true, true); TopDocs topDocs = searcher.search(query, 10); assertEquals(i, topDocs.totalHits); addDocuments(writer, numDocs); } writer.close(false); reader.close(); long e = System.nanoTime(); System.out.println("Total time [" + (e - s) / 1000000.0 + " ms]"); }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }//from w ww . j ava 2s .c o m String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
/** * Constructor for LuceneIndex//from w w w.j a v a 2s . c o m * * @param dataDirectory Path to the directory to create an index directory within. * @throws IndexException */ public LuceneIndex(Path dataDirectory) throws IndexException { //TODO: Check to make sure directory is read/writable path = dataDirectory.resolve(INDEXDIR); try { dir = FSDirectory.open(path); analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, iwc); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); parser = new QueryParser(IndexDocumentAdapter.FIELD_SEARCH, analyzer); } catch (IOException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.difference.historybook.index.lucene.LuceneIndex.java
License:Apache License
@Override public void indexPage(String collection, String url, Instant timestamp, String body) throws IndexException { HtmlTextExtractor extractor = new HtmlTextExtractor(body, url); Document doc = new IndexDocumentAdapter().setCollection(collection).setUrl(url).setTimestamp(timestamp) .setTitle(extractor.getTitle()).setContent(extractor.getContent()).getAsDocument(); try {/*from ww w . jav a 2s. c o m*/ writer.addDocument(doc); writer.commit(); reader.close(); reader = DirectoryReader.open(writer, false); searcher = new IndexSearcher(reader); } catch (IOException e) { LOG.error(e.getLocalizedMessage()); throw new IndexException(e); } }
From source file:com.epam.wilma.message.search.lucene.search.helper.IndexReaderFactory.java
License:Open Source License
/** * Creates a new {@link IndexReader} instance using previously opened {@link IndexWriter} * to be able to handle real time changes. * @param applyAllDeletes If true, all buffered deletes will be applied (made visible) in the returned reader. * If false, the deletes are not applied but remain buffered (in IndexWriter) so that they will be applied in the future. * @return with the new {@link IndexReader} instance * @throws IOException was thrown when {@link IndexReader} creation failed *//*from w ww. j av a2s . com*/ public IndexReader create(final boolean applyAllDeletes) throws IOException { return DirectoryReader.open(indexWriter, applyAllDeletes); }
From source file:com.gitblit.service.LuceneService.java
License:Apache License
/** * Gets an index searcher for the repository. * * @param repository//from ww w. j a v a2 s .c om * @return * @throws IOException */ private IndexSearcher getIndexSearcher(String repository) throws IOException { IndexSearcher searcher = searchers.get(repository); if (searcher == null) { IndexWriter writer = getIndexWriter(repository); searcher = new IndexSearcher(DirectoryReader.open(writer, true)); searchers.put(repository, searcher); } return searcher; }
From source file:com.gitblit.tickets.TicketIndexer.java
License:Apache License
private IndexSearcher getSearcher() throws IOException { if (searcher == null) { searcher = new IndexSearcher(DirectoryReader.open(getWriter(), true)); }/*w w w . j av a 2s . co m*/ return searcher; }
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
/** * Creates and returns a new SearcherManager from the given * {@link IndexWriter}./* w w w.j a va 2 s . c o m*/ * * @param writer * the IndexWriter to open the IndexReader from. * @param applyAllDeletes * If <code>true</code>, all buffered deletes will be applied (made * visible) in the {@link IndexSearcher} / {@link DirectoryReader}. * If <code>false</code>, the deletes may or may not be applied, but * remain buffered (in IndexWriter) so that they will be applied in * the future. Applying deletes can be costly, so if your app can * tolerate deleted documents being returned you might gain some * performance by passing <code>false</code>. See * {@link DirectoryReader#openIfChanged(DirectoryReader, IndexWriter, boolean)}. * @param searcherFactory * An optional {@link SearcherFactory}. Pass <code>null</code> if you * don't require the searcher to be warmed before going live or other * custom behavior. * * @throws IOException if there is a low-level I/O error */ public WrappableSearcherManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory) throws IOException { if (searcherFactory == null) { searcherFactory = new SearcherFactory(); } this.searcherFactory = searcherFactory; current = getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes)); }
From source file:com.o19s.solr.swan.highlight.TermVectorFun.java
License:Apache License
@Test public void testBlah() throws IOException { RAMDirectory ramDir = new RAMDirectory(); // Index some made up content IndexWriterConfig iwf = new IndexWriterConfig(Version.LUCENE_47, new StandardAnalyzer(Version.LUCENE_47)); IndexWriter writer = new IndexWriter(ramDir, iwf); FieldType ft = new FieldType(); ft.setIndexed(true);//from w w w . j av a 2s. c om ft.setTokenized(true); ft.setStored(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectors(true); ft.setStoreTermVectorPositions(true); ft.freeze(); for (int i = 0; i < DOCS.length; i++) { Document doc = new Document(); StringField id = new StringField("id", "doc_" + i, StringField.Store.YES); doc.add(id); // Store both position and offset information Field text = new Field("content", DOCS[i], ft); // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(text); writer.addDocument(doc); } //writer.close(); // Get a searcher AtomicReader dr = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(writer, true)); IndexSearcher searcher = new IndexSearcher(dr); // Do a search using SpanQuery SpanTermQuery fleeceQ = new SpanTermQuery(new Term("content", "fleece")); TopDocs results = searcher.search(fleeceQ, 10); for (int i = 0; i < results.scoreDocs.length; i++) { ScoreDoc scoreDoc = results.scoreDocs[i]; System.out.println("Score Doc: " + scoreDoc); } IndexReader reader = searcher.getIndexReader(); Bits acceptDocs = null; Map<Term, TermContext> termContexts = new HashMap<Term, TermContext>(); Spans spans = fleeceQ.getSpans(dr.getContext(), acceptDocs, termContexts); while (spans.next()) { System.out.println("Doc: " + spans.doc() + " Start: " + spans.start() + " End: " + spans.end()); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("content"); reader.document(spans.doc(), visitor); Terms terms = reader.getTermVector(spans.doc(), "content"); TermsEnum tenum = terms.iterator(null); // AttributeSource as = tenum.attributes(); while (tenum.next() != null) { System.out.println(tenum.term().utf8ToString()); } for (long pos = 0L; pos < spans.end(); pos++) { // tenum.next(); // if (tenum.ord()<pos) continue; // System.out.println(tenum.term()); // } reader.document(spans.doc(), visitor); // String[] values = visitor.getDocument().getValues("content"); // List<String> a = new ArrayList<String>(); // // build up the window // tvm.start = spans.start() - window; // tvm.end = spans.end() + window; // reader.getTermFreqVector(spans.doc(), "content", tvm); // for (WindowEntry entry : tvm.entries.values()) { // System.out.println("Entry: " + entry); // } // // clear out the entries for the next round // tvm.entries.clear(); } }