List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:cn.fql.blogspider.IndexMain.java
License:Open Source License
public static void main(String[] args) { String indexPath = "d:/test/index"; String docsPath = "d:/test/docs"; boolean create = true; File docDir = new File(docsPath); Date start = new Date(); try {//from ww w. j av a2 s. c o m System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); //Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); Analyzer analyzer = new IKAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if (create) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println((end.getTime() - start.getTime()) + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:cn.hbu.cs.esearch.index.DiskSearchIndex.java
License:Apache License
/** * Opens an index modifier./* ww w .ja v a 2 s . c om*/ * @param analyzer Analyzer * @return IndexModifer instance */ @Override public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException { if (_indexWriter != null) { return _indexWriter; } Directory directory = _dirMgr.getDirectory(true); log.info("opening index writer at: " + _dirMgr.getPath()); EsearchMergePolicy mergePolicy = new EsearchMergePolicy(); mergePolicy.setMergePolicyParams(_mergePolicyParams); // hao: autocommit is set to false with this constructor IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); _deletionPolicy = new ZoieIndexDeletionPolicy(); config.setIndexDeletionPolicy(_deletionPolicy); config.setMergeScheduler(_mergeScheduler); config.setMergePolicy(mergePolicy); config.setReaderPooling(false); if (similarity != null) { config.setSimilarity(similarity); } config.setRAMBufferSizeMB(5); IndexWriter idxWriter = new IndexWriter(directory, config); // we need retrieve deletionPolicy from IndexWriter since deletionPolicy is deep cloned _deletionPolicy = (ZoieIndexDeletionPolicy) (idxWriter.getConfig().getIndexDeletionPolicy()); _indexWriter = idxWriter; return idxWriter; }
From source file:cn.hbu.cs.esearch.index.RAMSearchIndex.java
License:Apache License
@Override public IndexWriter openIndexWriter(Analyzer analyzer, Similarity similarity) throws IOException { if (_indexWriter != null) { return _indexWriter; }/*from ww w . ja va2s . co m*/ EsearchMergePolicy mergePolicy = new EsearchMergePolicy(); mergePolicy.setMergePolicyParams(_mergePolicyParams); mergePolicy.setUseCompoundFile(false); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(OpenMode.CREATE_OR_APPEND); config.setMergeScheduler(_mergeScheduler); config.setMergePolicy(mergePolicy); config.setReaderPooling(false); if (similarity != null) { config.setSimilarity(similarity); } config.setRAMBufferSizeMB(3); IndexWriter idxWriter = new IndexWriter(_directory, config); _indexWriter = idxWriter; return idxWriter; }
From source file:cn.hbu.cs.esearch.store.LuceneStore.java
License:Apache License
@Override public void open() throws IOException { if (closed) { IndexWriterConfig idxWriterConfig = new IndexWriterConfig(Version.LUCENE_43, new StandardAnalyzer(Version.LUCENE_43)); idxWriterConfig.setMergePolicy(new EsearchMergePolicy()); idxWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); indexWriter = new IndexWriter(directory, idxWriterConfig); updateReader();/*www . ja v a 2 s .c o m*/ closed = false; } }
From source file:cn.larry.search.book.index.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w w w . jav a 2s. co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmxm or -Xmx1g): // // iwc.setRAMBufferSizeMB(.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:collene.Freedb.java
License:Apache License
public static void BuildIndex(Directory directory) throws Exception { String freedbPath = "/Users/gdusbabek/Downloads/freedb-complete-20140701.tar.bz2"; if (directory == null) { System.out.println("Need to specify: { memory | file | cassandra }. Did you misspell something?"); System.exit(-1);/*from w w w . j av a 2s. c om*/ } FreeDbReader reader = new FreeDbReader(new File(freedbPath), 50000); reader.start(); long indexStart = System.currentTimeMillis(); Collection<Document> documents = new ArrayList<Document>(BATCH_SIZE); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); // stop after this many documents. final int maxDocuments = 400000; //Integer.MAX_VALUE; FreeDbEntry entry = reader.next(); int count = 0; while (entry != null && count < maxDocuments) { Document doc = new Document(); String any = entry.toString(); doc.add(new Field("any", any, TextField.TYPE_STORED)); doc.add(new Field("artist", entry.getArtist(), TextField.TYPE_NOT_STORED)); doc.add(new Field("album", entry.getAlbum(), TextField.TYPE_NOT_STORED)); doc.add(new Field("title", entry.getTitle(), TextField.TYPE_NOT_STORED)); doc.add(new Field("genre", entry.getGenre(), TextField.TYPE_NOT_STORED)); doc.add(new Field("year", entry.getYear(), TextField.TYPE_NOT_STORED)); for (int i = 0; i < entry.getTrackCount(); i++) { doc.add(new Field("track", entry.getTrack(i), TextField.TYPE_STORED)); } documents.add(doc); if (VERBOSE) { out.println(any); } if (documents.size() == BATCH_SIZE) { //out.println(String.format("Adding batch at count %d", count)); writer.addDocuments(documents); //out.println("done"); documents.clear(); } count += 1; if (count >= MAX_ENTRIES) { // done indexing. break; } entry = reader.next(); if (count % 100000 == 0) { out.println(String.format("Indexed %d documents", count)); // do a quick morrissey search for fun. // IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(ColDirectory.open( // new CassandraIO(8192, "collene", "cindex").start("127.0.0.1:9042"), // new CassandraIO(8192, "collene", "cmeta").start("127.0.0.1:9042"), // new CassandraIO(8192, "collene", "clock").start("127.0.0.1:9042") // ))); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer); long searchStart = System.currentTimeMillis(); Query query = parser.parse("morrissey"); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc d : docs.scoreDocs) { out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex)); } } } if (documents.size() > 0) { out.println(String.format("Adding batch at count %d", count)); writer.addDocuments(documents); out.println("done"); documents.clear(); // do a quick morrissey search for fun. IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "any", analyzer); long searchStart = System.currentTimeMillis(); Query query = parser.parse("morrissey"); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc d : docs.scoreDocs) { out.println(String.format("%d %.2f %d", d.doc, d.score, d.shardIndex)); } } long indexTime = System.currentTimeMillis() - indexStart; out.println(String.format("Indexed %d things in %d ms (%s)", count, indexTime, directory.toString())); // long startMerge = System.currentTimeMillis(); // writer.forceMerge(1, true); // long endMerge = System.currentTimeMillis(); // out.println(String.format("merge took %d ms", endMerge-startMerge)); out.println("I think these are the files:"); for (String s : directory.listAll()) { out.println(s); } writer.close(true); directory.close(); }
From source file:collene.TestIndexing.java
License:Apache License
@Test public void test() throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); // write it out. IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < 100; i++) { Collection<Document> documents = new ArrayList<Document>(); Document doc = new Document(); doc.add(new Field("key", "aaa_" + i, TextField.TYPE_STORED)); doc.add(new Field("not", "notaaa", TextField.TYPE_NOT_STORED)); doc.add(new Field("meta", "aaa_meta_aaa_" + i, TextField.TYPE_STORED)); documents.add(doc);/*from w ww. j av a 2s . c o m*/ writer.addDocuments(documents); writer.commit(); writer.forceMerge(1); writer.forceMergeDeletes(true); } // now read it back. IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "key", analyzer); Query query = parser.parse("aaa_4"); TopDocs docs = searcher.search(query, 1); int idToDelete = docs.scoreDocs[0].doc; Assert.assertTrue(docs.totalHits > 0); query = parser.parse("fersoius"); docs = searcher.search(query, 1); Assert.assertFalse(docs.totalHits > 0); // delete that document. DirectoryReader reader = DirectoryReader.open(writer, true); writer.tryDeleteDocument(reader, idToDelete); reader.close(); writer.close(); // list files Set<String> files = new HashSet<String>(); System.out.println("Listing files for " + directory.toString()); for (String file : directory.listAll()) { files.add(file); System.out.println(" " + file); } if (strictFileChecking) { System.out.println("String file checking..."); Sets.SetView<String> difference = Sets.difference(expectedFiles, files); Assert.assertEquals(Joiner.on(",").join(difference), 0, difference.size()); } reader = DirectoryReader.open(directory); searcher = new IndexSearcher(reader); query = parser.parse("aaa_4"); docs = searcher.search(query, 1); reader.close(); Assert.assertFalse(docs.totalHits > 0); directory.close(); }
From source file:collene.TestLuceneAssumptions.java
License:Apache License
@Test public void testCanSeeUpdatesAfterAdd() throws Exception { // this verifies that any reader can see updates after documents are added. File fdir = TestUtil.getRandomTempDir(); pleaseDelete.add(fdir);/*w w w.ja v a2 s . c om*/ Directory dir = FSDirectory.open(fdir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, config); Document doc0 = new Document(); Document doc1 = new Document(); doc0.add(new Field("f0", "aaa", TextField.TYPE_STORED)); doc1.add(new Field("f0", "bbb", TextField.TYPE_STORED)); List<Document> docs = Lists.newArrayList(doc0, doc1); writer.addDocuments(docs, analyzer); IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "f0", new StandardAnalyzer(Version.LUCENE_4_9)); Query query = parser.parse("bbb"); TopDocs topDocs = searcher.search(query, 10); Assert.assertEquals(1, topDocs.totalHits); Assert.assertEquals(1, topDocs.scoreDocs.length); writer.close(); dir.close(); }
From source file:collene.TestLuceneAssumptions.java
License:Apache License
@Test public void listAfterEachStep() throws Exception { File fdir = TestUtil.getRandomTempDir(); pleaseDelete.add(fdir);//www. jav a 2 s . c om Directory dir = FSDirectory.open(fdir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); //System.out.println("Before creating writer"); dump(fdir, dir); IndexWriter writer = new IndexWriter(dir, config); //System.out.println("After creating writer"); dump(fdir, dir); List<Document> docs = new ArrayList<Document>(); for (int i = 0; i < 50000; i++) { Document doc = new Document(); for (int f = 0; f < 5; f++) { doc.add(new Field("field_" + f, TestUtil.randomString(128), TextField.TYPE_STORED)); } docs.add(doc); } writer.addDocuments(docs, analyzer); docs.clear(); //System.out.println("After doc add 0"); dump(fdir, dir); for (int i = 0; i < 50000; i++) { Document doc = new Document(); for (int f = 0; f < 5; f++) { doc.add(new Field("field_" + f, TestUtil.randomString(128), TextField.TYPE_STORED)); } docs.add(doc); } writer.addDocuments(docs, analyzer); docs.clear(); //System.out.println("After doc add 1"); dump(fdir, dir); writer.commit(); //System.out.println("After commit"); dump(fdir, dir); writer.forceMerge(1, true); //System.out.println("Right after merge"); dump(fdir, dir); try { Thread.currentThread().sleep(5000); } catch (Exception ex) { } //System.out.println("After sleeping after merge"); dump(fdir, dir); writer.close(); //System.out.println("After writer close"); dump(fdir, dir); dir.close(); //System.out.println("After dir close"); dump(fdir, dir); }
From source file:collene.TestShakespeare.java
License:Apache License
@Test public void rest() throws IOException, ParseException { File shakespeareDir = new File("src/test/resources/shakespeare"); File[] files = shakespeareDir.listFiles(new FileFilter() { @Override/*from w w w . j a v a2 s . com*/ public boolean accept(File pathname) { return !pathname.isHidden(); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(directory, config); long startIndexTime = System.currentTimeMillis(); final int flushLines = 200; int totalLines = 0; Collection<Document> documents = new ArrayList<Document>(); for (File f : files) { String play = f.getName(); int lineNumber = 1; BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(f))); String line = reader.readLine(); while (line != null) { // index it. Document doc = new Document(); doc.add(new NumericDocValuesField("line", lineNumber)); doc.add(new Field("play", play, TextField.TYPE_STORED)); doc.add(new Field("content", line, TextField.TYPE_STORED)); documents.add(doc); totalLines += 1; if (totalLines % flushLines == 0) { writer.addDocuments(documents); documents.clear(); } lineNumber += 1; line = reader.readLine(); } reader.close(); } if (documents.size() > 0) { writer.addDocuments(documents); } long endIndexTime = System.currentTimeMillis(); System.out.println( String.format("Index for %s took %d ms", directory.toString(), endIndexTime - startIndexTime)); //System.out.println(String.format("%s committed", directory.getClass().getSimpleName())); // writer.forceMerge(1); // System.out.println(String.format("%s merged", directory.getClass().getSimpleName())); // let's search! IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(writer, false)); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "content", analyzer); String[] queryTerms = new String[] { "trumpet" }; for (String term : queryTerms) { long searchStart = System.currentTimeMillis(); Query query = parser.parse(term); TopDocs docs = searcher.search(query, 10); long searchEnd = System.currentTimeMillis(); System.out.println(String.format("%s %d total hits in %d", directory.getClass().getSimpleName(), docs.totalHits, searchEnd - searchStart)); for (ScoreDoc doc : docs.scoreDocs) { System.out.println(String.format("%d %.2f %d", doc.doc, doc.score, doc.shardIndex)); } } writer.close(true); //System.out.println(String.format("%s closed", directory.getClass().getSimpleName())); System.out.println("I think these are the files:"); for (String s : directory.listAll()) { System.out.println(s); } directory.close(); }