List of usage examples for org.apache.lucene.benchmark.byTask.feeds DocMaker close
@Override public void close() throws IOException
From source file:info.boytsov.lucene.CreateIndex.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { printUsage();/*from w w w .j a v a 2 s .c o m*/ System.exit(1); } String indexType = args[0]; String indexSource = args[1]; int commitInterval = 1000000; if (args.length >= 4) { commitInterval = Integer.parseInt(args[3]); } System.out.println("Commiting after indexing " + commitInterval + " docs"); File outputDir = new File(args[2]); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { System.out.println("couldn't create " + outputDir.getAbsolutePath()); return; } } if (!outputDir.isDirectory()) { System.out.println(outputDir.getAbsolutePath() + " is not a directory!"); return; } if (!outputDir.canWrite()) { System.out.println("Can't write to " + outputDir.getAbsolutePath()); return; } FSDirectory dir = FSDirectory.open(outputDir); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);// default // stop // words IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);// overwrites // if // needed IndexWriter indexWriter = new IndexWriter(dir, config); DocMaker docMaker = new DocMaker(); Properties properties = new Properties(); properties.setProperty("content.source.forever", "false"); // will // parse // each // document // only // once properties.setProperty("doc.index.props", "true"); // We want to store small-size fields like URL or even title ... properties.setProperty("doc.stored", "true"); // but not the large one (great savings, 3x reduction in space)! properties.setProperty("doc.body.stored", "false"); ContentSource source = CreateSource(indexType, indexSource, properties); if (source == null) { System.err.println("Failed to create a source: " + indexType + "(" + indexSource + ")"); printUsage(); System.exit(1); } Config c = new Config(properties); source.setConfig(c); source.resetInputs();// though this does not seem needed, it is // (gets the file opened?) docMaker.setConfig(c, source); int count = 0; System.out.println("Starting Indexing of " + indexType + " source " + indexSource); long start = System.currentTimeMillis(); Document doc; try { while ((doc = docMaker.makeDocument()) != null) { indexWriter.addDocument(doc); ++count; if (count % 5000 == 0) { System.out.println( "Indexed " + count + " documents in " + (System.currentTimeMillis() - start) + " ms"); } if (count % commitInterval == 0) { indexWriter.commit(); System.out.println("Committed"); } } } catch (org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException nmd) { System.out.println("Caught NoMoreDataException! -- Finishing"); // All done } long finish = System.currentTimeMillis(); System.out.println("Indexing " + count + " documents took " + (finish - start) + " ms"); System.out.println("Total data processed: " + source.getTotalBytesCount() + " bytes"); System.out.println("Index should be located at " + dir.getDirectory().getAbsolutePath()); docMaker.close(); indexWriter.commit(); indexWriter.close(); }