List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:edu.virginia.cs.index.UserIndexer.java
/** * Creates the initial index files on disk * * @param indexPath// ww w . j a va 2s . com * @return * @throws IOException */ private static IndexWriter setupIndex(String indexPath) throws IOException { Analyzer analyzer = new SpecialAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(OpenMode.CREATE); config.setRAMBufferSizeMB(2048.0); FSDirectory dir = FSDirectory.open(new File(indexPath)); IndexWriter writer = new IndexWriter(dir, config); return writer; }
From source file:ensemble.compiletime.search.BuildEnsembleSearchIndex.java
License:Open Source License
public static void buildSearchIndex(List<Sample> allSamples, String javaDocBaseUrl, String javafxDocumentationHome, File indexDir) { try {/*from w w w . j av a2s . c o m*/ List<Document> docs = new ArrayList<>(); List<Callable<List<Document>>> tasks = new ArrayList<>(); // create callables to collect data System.out.println("Creating Documents for Samples..."); docs.addAll(indexSamples(allSamples)); System.out.println("Creating tasks for getting all documentation..."); tasks.addAll(indexJavaDocAllClasses(javaDocBaseUrl)); tasks.addAll(indexAllDocumentation(javafxDocumentationHome)); // execute all the tasks in 32 threads, collecting all the documents to write System.out.println("Executing tasks getting all documentation..."); try { ThreadPoolExecutor executor = new ThreadPoolExecutor(32, 32, 30, TimeUnit.SECONDS, new LinkedBlockingQueue()); executor.setThreadFactory(new ThreadFactory() { int index = 0; @Override public Thread newThread(Runnable r) { Thread thread = new Thread(r, "Thread-" + (++index)); thread.setDaemon(true); return thread; } }); List<Future<List<Document>>> results = executor.invokeAll(tasks); for (Future<List<Document>> future : results) { docs.addAll(future.get()); } } catch (ExecutionException | InterruptedException ex) { Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex); } // create index System.out.println("Indexing to directory '" + indexDir + "'..."); Directory dir = FSDirectory.open(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); try (IndexWriter writer = new IndexWriter(dir, iwc)) { // write all docs System.out.println("Writing [" + docs.size() + "] documents to index...."); writer.addDocuments(docs); // optimize the writen index System.out.println("Optimizing search index...."); writer.optimize(); System.out.println("NUMBER OF INDEXED DOCUMENTS = [" + writer.numDocs() + "]"); } // write file listing all the search index files, so we know what // is in the jar file at runtime try (FileWriter listAllOut = new FileWriter(new File(indexDir, "listAll.txt"))) { for (String fileName : dir.listAll()) { if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file Long length = dir.fileLength(fileName); listAllOut.write(fileName); listAllOut.write(':'); listAllOut.write(length.toString()); listAllOut.write('\n'); } } listAllOut.flush(); } System.out.println("Finished writing search index to directory '" + indexDir); } catch (IOException ex) { Logger.getLogger(BuildEnsembleSearchIndex.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:ensemble.search.BuildEnsembleSearchIndex.java
License:Open Source License
public static void main(String[] args) throws Exception { /* File samplesFilesDir = new File("build/classes/ensemble/"); File indexDir = new File("build/classes/ensemble/search/index"); File docDir = new File("../../../artifacts/sdk/docs/api"); File samplesDir = new File("C:\Code\Sandbox\sandbox-repo\learning-java\fx\src\main\java\ensemble\samples"); *//* w ww. j ava 2 s . c o m*/ File samplesFilesDir = new File("build/classes/ensemble/"); File indexDir = new File("build/classes/ensemble/search/index"); File docDir = new File("../../../artifacts/sdk/docs/api"); File samplesDir = new File("src/ensemble/samples"); // create index ///System.out.println("Indexing to directory '" + indexDir + "'..."); long start = System.currentTimeMillis(); Directory dir = FSDirectory.open(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); // generate and write index of all java doc and samples IndexWriter writer = new IndexWriter(dir, iwc); List<String> samplesFileList = new ArrayList<String>(); indexSamples(writer, samplesDir, samplesFileList); try { indexJavaDocAllClasses(writer, docDir); } catch (Exception e) { System.out.println( "\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n" + "Ensemble search will not include the API documentation.\n"); } writer.close(); // create a listAll.txt file that is used FileWriter listAllOut = new FileWriter(new File(indexDir, "listAll.txt")); for (String fileName : dir.listAll()) { if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file Long length = dir.fileLength(fileName); listAllOut.write(fileName); listAllOut.write(':'); listAllOut.write(length.toString()); listAllOut.write('\n'); } } listAllOut.flush(); listAllOut.close(); FileWriter sampleFilesCache = new FileWriter(new File(samplesFilesDir, "samplesAll.txt")); for (String oneSample : samplesFileList) { sampleFilesCache.write(oneSample); sampleFilesCache.write('\n'); } sampleFilesCache.flush(); sampleFilesCache.close(); // print time taken ///System.out.println(System.currentTimeMillis() - start + " total milliseconds"); }
From source file:es.ua.corpus.indexer.Indexer.java
protected void _initializeComponents(String indexPath) throws IOException { File indexDir = new File(indexPath); Directory directory = FSDirectory.open(indexDir); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(directory, config); }
From source file:es.ua.labidiomas.corpus.index.Indexer.java
protected void _initializeComponents(String indexPath) throws IOException { File indexDir = new File(indexPath); directory = FSDirectory.open(indexDir); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setWriteLockTimeout(5000l);// ww w .j a v a 2 s .c o m writer = new IndexWriter(directory, config); }
From source file:es.ua.labidiomas.corpus.index.Indexer.java
private void deleteNgrams(String textID, String lang, String fileSeparator) throws IOException { for (int i = 1; i <= 4; i++) { File indexDir = new File( indexPath + fileSeparator + "ngrams" + fileSeparator + i + fileSeparator + lang); Directory directory = null;//from w ww . ja v a 2 s .c o m IndexWriter indexEraser = null; try { directory = FSDirectory.open(indexDir); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); config.setWriteLockTimeout(5000l); indexEraser = new IndexWriter(directory, config); Term term = new Term("textID", textID); indexEraser.deleteDocuments(term); indexEraser.commit(); } finally { if (directory != null) { directory.close(); } if (indexEraser != null) { indexEraser.close(); } } } }
From source file:es.unizar.iaaa.crawler.butler.index.IndexFiles.java
License:Apache License
/** * Index all text files under a directory. *//*from w w w . j a va 2s . c o m*/ public void index(String indexPath, File file) { if (file.exists()) { Date start = new Date(); try { LOGGER.info("Indexing to directory '" + indexPath); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_4, analyzer); iwc.setOpenMode(OpenMode.CREATE); // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, file); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); LOGGER.info(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { LOGGER.error(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } else { LOGGER.info(file.getName() + " don't exists"); } }
From source file:eu.eexcess.sourceselection.redde.indexer.BinaryIndexResource.java
License:Apache License
/** * opens the sample index for writing; overwrites existing one * //from w w w.jav a 2 s . c o m * @param ramBufferSizeMB * determines the amount of RAM that may be used for buffering * @throws IOException * if unable to open/create index */ void openOutIndex(double ramBufferSizeMB) throws IOException { try { Directory indexDirectory = FSDirectory.open(new File(outIndexPath)); Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig writerConfig = new IndexWriterConfig(luceneVersion, analyzer); writerConfig.setOpenMode(OpenMode.CREATE); writerConfig.setRAMBufferSizeMB(ramBufferSizeMB); outIndexWriter = new IndexWriter(indexDirectory, writerConfig); } catch (IOException e) { logger.log(Level.SEVERE, "unable to open/create index at [" + outIndexPath + "]", e); throw e; } }
From source file:eu.eexcess.sourceselection.redde.indexer.TrecToLuceneIndexBuilder.java
License:Apache License
/** * Builds/overwrites existing Lucene index using TREC documents as source *///from w w w .jav a2 s . c om public void index() { Date startTimestamp = new Date(); final File documentsDirectory = new File(documentsPath); if (!documentsDirectory.exists() || !documentsDirectory.canRead()) { logger.severe("cannot access document directory [" + documentsDirectory.getAbsolutePath() + "]"); } else { try { logger.info("processing directory [" + documentsPath + "] to index [" + indexPath + "]"); Directory indexDirectory = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new EnglishAnalyzer(); IndexWriterConfig writerConfig = new IndexWriterConfig(luceneVersion, analyzer); writerConfig.setOpenMode(OpenMode.CREATE); writerConfig.setRAMBufferSizeMB(ramBufferSize); IndexWriter indexWriter = new IndexWriter(indexDirectory, writerConfig); indexDocs(indexWriter, documentsDirectory); indexWriter.commit(); indexWriter.close(); Date stopTimestamp = new Date(); logger.info("processed [" + dirsCount + "] dirs [" + filesCount + "] files [" + documentsTotal + "] documents [" + filesSkipped + "] files skipped in [" + (stopTimestamp.getTime() - startTimestamp.getTime()) + "] ms]"); } catch (IOException e) { logger.log(Level.SEVERE, "failed indexing documents", e); } } }
From source file:examples.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;// ww w . j av a 2 s . c om boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }