List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:com.gmail.mosoft521.luceneDemo.IndexFiles.java
License:Apache License
/** * Index all text files under a directory. *//*from www . ja va 2 s . c om*/ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.google.gerrit.lucene.SubIndex.java
License:Apache License
SubIndex(Directory dir, final String dirName, GerritIndexWriterConfig writerConfig, SearcherFactory searcherFactory) throws IOException { this.dir = dir; IndexWriter delegateWriter;/*from ww w .j a va 2s . co m*/ long commitPeriod = writerConfig.getCommitWithinMs(); if (commitPeriod < 0) { delegateWriter = new IndexWriter(dir, writerConfig.getLuceneConfig()); } else if (commitPeriod == 0) { delegateWriter = new AutoCommitWriter(dir, writerConfig.getLuceneConfig(), true); } else { final AutoCommitWriter autoCommitWriter = new AutoCommitWriter(dir, writerConfig.getLuceneConfig(), false); delegateWriter = autoCommitWriter; new ScheduledThreadPoolExecutor(1, new ThreadFactoryBuilder().setNameFormat("Commit-%d " + dirName).setDaemon(true).build()) .scheduleAtFixedRate(new Runnable() { @Override public void run() { try { if (autoCommitWriter.hasUncommittedChanges()) { autoCommitWriter.manualFlush(); autoCommitWriter.commit(); } } catch (IOException e) { log.error("Error committing Lucene index " + dirName, e); } catch (OutOfMemoryError e) { log.error("Error committing Lucene index " + dirName, e); try { autoCommitWriter.close(); } catch (IOException e2) { log.error("SEVERE: Error closing Lucene index " + dirName + " after OOM; index may be corrupted.", e); } } } }, commitPeriod, commitPeriod, MILLISECONDS); } writer = new TrackingIndexWriter(delegateWriter); searcherManager = new WrappableSearcherManager(writer.getIndexWriter(), true, searcherFactory); notDoneNrtFutures = Sets.newConcurrentHashSet(); reopenThread = new ControlledRealTimeReopenThread<>(writer, searcherManager, 0.500 /* maximum stale age (seconds) */, 0.010 /* minimum stale age (seconds) */); reopenThread.setName("NRT " + dirName); reopenThread.setPriority(Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY)); reopenThread.setDaemon(true); // This must be added after the reopen thread is created. The reopen thread // adds its own listener which copies its internally last-refreshed // generation to the searching generation. removeIfDone() depends on the // searching generation being up to date when calling // reopenThread.waitForGeneration(gen, 0), therefore the reopen thread's // internal listener needs to be called first. // TODO(dborowitz): This may have been fixed by // http://issues.apache.org/jira/browse/LUCENE-5461 searcherManager.addListener(new RefreshListener() { @Override public void beforeRefresh() throws IOException { } @Override public void afterRefresh(boolean didRefresh) throws IOException { for (NrtFuture f : notDoneNrtFutures) { f.removeIfDone(); } } }); reopenThread.start(); }
From source file:com.google.gerrit.server.change.ReviewerSuggestionCache.java
License:Apache License
private IndexSearcher index() throws IOException, OrmException { RAMDirectory idx = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer(CharArraySet.EMPTY_SET)); config.setOpenMode(OpenMode.CREATE); try (IndexWriter writer = new IndexWriter(idx, config)) { for (Account a : db.get().accounts().all()) { if (a.isActive()) { addAccount(writer, a);/*from w w w . java 2 s . c om*/ } } } return new IndexSearcher(DirectoryReader.open(idx)); }
From source file:com.gprasad.searchwithlucene.Indexer.java
private static void createIndex(String indexPath) throws IOException { Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(dir, indexWriterConfig); }
From source file:com.graphhopper.compare.misc.LuceneStorage.java
License:Apache License
public boolean init(boolean forceCreate) { try {//from ww w. ja va 2 s. c o m File file = new File("osm.lucene.test"); if (forceCreate) Helper.deleteDir(file); // germany.osm => 3.6 GB on disc for nodes only, 1.5 GB memory usage at the end of the nodes Directory dir = FSDirectory.open(file); IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer()); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setMaxMergeMB(3000); cfg.setRAMBufferSizeMB(128); cfg.setTermIndexInterval(512); cfg.setMergePolicy(mp); // specify different formats for id fields etc // -> this breaks 16 of our tests!? Lucene Bug? // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // return new Pulsing40PostingsFormat(); // } // }); // cfg.setMaxThreadStates(8); boolean create = !IndexReader.indexExists(dir); cfg.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(dir, cfg); return true; } catch (Exception ex) { logger.error("cannot init lucene storage", ex); return false; } }
From source file:com.greplin.interval.BaseIntervalQueryTest.java
License:Apache License
@Before public void setUp() throws IOException { RAMDirectory ramDirectory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_35, new SimpleAnalyzer(Version.LUCENE_35)); this.indexWriter = new IndexWriter(ramDirectory, config); }
From source file:com.greplin.lucene.filter.PhraseFilterBenchmark.java
License:Apache License
public static void main(String[] argv) { Directory directory = new RAMDirectory(); try {//from w w w. j av a 2 s . com IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_32, new WhitespaceAnalyzer(Version.LUCENE_32))); int done = 0; for (int i = 0; i < NUMBER_OF_SEGMENTS; i++) { int remaining = NUMBER_OF_SEGMENTS - i; int numberOfDocs; if (remaining == 1) { numberOfDocs = TOTAL_DOCS - done; } else { numberOfDocs = RANDOM.nextInt(TOTAL_DOCS - done - remaining) + 1; } done += numberOfDocs; System.out.println("Segment #" + i + " has " + numberOfDocs + " docs"); for (int d = 0; d < numberOfDocs; d++) { int wordCount = RANDOM.nextInt(WORDS_PER_DOC_DEVIATION * 2) + AVERAGE_WORDS_PER_DOC - WORDS_PER_DOC_DEVIATION; Document doc = new Document(); doc.add(new Field("f", Joiner.on(' ').join(words(wordCount)), Field.Store.YES, Field.Index.ANALYZED)); doc.add(new Field("second", RANDOM.nextInt(100) < SECOND_FIELD_MATCH_PERCENTAGE ? "yes" : "no", Field.Store.NO, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.commit(); } writer.close(); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); String[][] queries = new String[TOTAL_QUERIES][]; Term[][] terms = new Term[TOTAL_QUERIES][]; for (int q = 0; q < TOTAL_QUERIES; q++) { queries[q] = words(WORDS_PER_QUERY[RANDOM.nextInt(WORDS_PER_QUERY.length)]); terms[q] = new Term[queries[q].length]; for (int qw = 0; qw < queries[q].length; qw++) { terms[q][qw] = new Term(FIELD, queries[q][qw]); } } // Warm up. new PhraseFilter(FIELD, queries[0]).getDocIdSet(reader); for (int round = 0; round < ROUNDS; round++) { System.out.println(); String name1 = "filter"; String name2 = "query"; long ms1 = 0, ms2 = 0; for (int step = 0; step < 2; step++) { System.gc(); System.gc(); System.gc(); if (step == (round & 1)) { long millis = System.currentTimeMillis(); long hits = 0; for (String[] queryWords : queries) { PhraseFilter pf = new PhraseFilter( new FilterIntersectionProvider(TermsFilter.from(new Term("second", "yes"))), FIELD, queryWords); hits += searcher.search(new FilteredQuery(new MatchAllDocsQuery(), pf), 1).totalHits; } ms1 = System.currentTimeMillis() - millis; System.out.println("Finished " + name1 + " in " + ms1 + "ms with " + hits + " hits"); } else { long millis = System.currentTimeMillis(); long hits = 0; for (Term[] queryTerms : terms) { PhraseQuery pq = new PhraseQuery(); for (Term term : queryTerms) { pq.add(term); } Query query = BooleanQueryBuilder.builder() .must(new TermQuery(new Term("second", "yes"))).must(pq).build(); hits += searcher.search(query, 1).totalHits; } ms2 = System.currentTimeMillis() - millis; System.out.println("Finished " + name2 + " in " + ms2 + "ms with " + hits + " hits"); } } System.out.println(name1 + " took " + (int) ((100.0 * ms1) / ms2) + "% as much time as " + name2); } } catch (IOException e) { e.printStackTrace(); } }
From source file:com.greplin.lucene.query.PredicateBonusQueryTest.java
License:Apache License
@Test public void testBasics() throws Exception { IndexWriter writer = new IndexWriter(this.directory, new IndexWriterConfig(Version.LUCENE_35, new WhitespaceAnalyzer(Version.LUCENE_35))); writer.addDocument(new DocumentBuilder().add("value", "5").build()); writer.close();/*from ww w .j av a 2 s .c o m*/ IndexReader reader = IndexReader.open(this.directory); IndexSearcher searcher = new IndexSearcher(reader); Query query = new ConstantScoreQuery(new TermQuery(new Term("value", "5"))); Assert.assertEquals(1.0, searcher.search(query, 1).getMaxScore(), 0.00001); Query noBonus = new PredicateBonusQuery(query, Predicates.NONE, 10.0f); Assert.assertEquals(1.0, searcher.search(noBonus, 1).getMaxScore(), 0.00001); Query bonus = new PredicateBonusQuery(query, Predicates.ALL, 100.0f); Assert.assertEquals(101.0, searcher.search(bonus, 1).getMaxScore(), 0.00001); Query noMatch = new TermQuery(new Term("value", "not5")); Assert.assertEquals(Double.NaN, searcher.search(noMatch, 1).getMaxScore(), 0.00001); Query noMatchNoBonus = new PredicateBonusQuery(noMatch, Predicates.NONE, 10.0f); Assert.assertEquals(Double.NaN, searcher.search(noMatchNoBonus, 1).getMaxScore(), 0.00001); Query noMatchIgnoresBonus = new PredicateBonusQuery(noMatch, Predicates.ALL, 100.0f); Assert.assertEquals(Double.NaN, searcher.search(noMatchIgnoresBonus, 1).getMaxScore(), 0.00001); }
From source file:com.helger.pd.indexer.lucene.PDLucene.java
License:Apache License
public PDLucene() throws IOException { // Where to store the index files final Path aPath = getLuceneIndexDir().toPath(); m_aDir = FSDirectory.open(aPath);/*from w ww .j ava 2 s.c o m*/ // Analyzer to use m_aAnalyzer = createAnalyzer(); // Create the index writer final IndexWriterConfig aWriterConfig = new IndexWriterConfig(m_aAnalyzer); aWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); m_aIndexWriter = new IndexWriter(m_aDir, aWriterConfig); // Reader and searcher are opened on demand s_aLogger.info("Lucene index operating on " + aPath); }
From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.LuceneIndexer.java
License:Open Source License
/** * Get the index writer in order to perform adding the documents to the index file. Initialize the * index writer if it hasn't been created. * /*from w ww.ja v a2 s.com*/ * @return the index writer which can add the documents to the index * @throws IngestionException */ private IndexWriter getIndexWriter() throws IngestionException { if (indexWriter == null) { try { IndexWriterConfig config = new IndexWriterConfig(SingletonAnalyzer.getAnalyzer()); indexWriter = new IndexWriter(indexDir, config); } catch (IOException e) { logger.fatal(Messages.getString("RetrieveAndRank.DIR_OPEN_FAIL")); //$NON-NLS-1$ throw new IngestionException(e); } } return indexWriter; }