List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:natural.language.qa.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" // TODO: Change the link with every release (or: fill in some less error-prone alternative here...) + "See http://lucene.apache.org/java/3_1/demo.html for details."; String indexPath = "index"; String docsPath = null;/*from w w w . j a va 2 s .c o m*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call optimize here. This can be // a costly operation, so generally it's only worth // it when your index is relatively static (ie you're // done adding documents to it): // // writer.optimize(); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:net.asteasolutions.cinusuidi.sluncho.documentIndex.DocumentIndex.java
public void init() throws IOException { Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, iwc); }
From source file:net.bobah.mail.Indexer.java
License:Apache License
private void runEx() throws Exception { final File dir = new File(config.getProperty("net.bobah.mail.local.folder")); if (!dir.exists() || !dir.isDirectory()) { throw new IllegalArgumentException(String.format("\"%s\" does not exist or is not a directory", dir)); }//w w w . j av a2 s . c om Collection<File> files = findFiles(dir, new FileFilter() { @Override public boolean accept(File file) { return file.getName().endsWith(".eml"); } }, new Comparator<File>() { @Override public int compare(File l, File r) { return Long.compare(l.lastModified(), r.lastModified()); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); final File indexDir = new File(dir, "index"); final boolean indexExisted = indexDir.exists(); if (!indexExisted) indexDir.mkdirs(); final Directory idx = FSDirectory.open(indexDir); final IndexWriter writer = new IndexWriter(idx, iwc); final IndexReader reader = indexExisted ? DirectoryReader.open(idx) : null; final IndexSearcher searcher = indexExisted ? new IndexSearcher(reader) : null; //final AtomicLong counter = new AtomicLong(0l); try { for (final File file : files) { executor.submit(new Runnable() { @Override public void run() { try { index(file, writer, searcher); //if (counter.incrementAndGet() % 100 == 0) writer.commit(); // TODO: VL: make batch size configurable } catch (Exception e) { throw new RuntimeException(e); } } }); } shutdownExecutor(executor, log); // TODO: VL: delete stale documents from the index writer.commit(); log.info("committed index updates"); searcher.search(new MatchAllDocsQuery(), new Collector() { @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void setNextReader(AtomicReaderContext unused) throws IOException { } @Override public void collect(int docID) throws IOException { Document doc = reader.document(docID); final String path = doc.get("path"); if (path != null) { try { final File file = new File(path); if (!file.exists()) { log.info("deleting index for {}", doc.get("id")); writer.deleteDocuments(new Term("id", doc.get("id"))); } } catch (SecurityException e) { log.error("exception", e); } } } @Override public boolean acceptsDocsOutOfOrder() { return true; } }); writer.commit(); log.info("committed index deletions"); } finally { try { // close writer without commit (see explicit commits above) writer.rollback(); } catch (IOException e) { log.error("exception while closing writer", e); } } }
From source file:net.faustinelli.javafx.ensemble.search.BuildEnsembleSearchIndex.java
License:Open Source License
public static void main(String[] args) throws Exception { File samplesFilesDir = new File("build/classes/ensemble/"); File indexDir = new File("build/classes/ensemble/search/index"); File docDir = new File("../../../artifacts/sdk/docs/api"); File samplesDir = new File("src/ensemble/samples"); // create index ///System.out.println("Indexing to directory '" + indexDir + "'..."); long start = System.currentTimeMillis(); Directory dir = FSDirectory.open(indexDir); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); iwc.setOpenMode(OpenMode.CREATE); // generate and write index of all java doc and samples IndexWriter writer = new IndexWriter(dir, iwc); List<String> samplesFileList = new ArrayList<String>(); indexSamples(writer, samplesDir, samplesFileList); try {/* ww w .j av a 2 s . c o m*/ indexJavaDocAllClasses(writer, docDir); } catch (Exception e) { System.out.println( "\nWarning: We were not able to locate the JavaFX API documentation for your build environment.\n" + "Ensemble search will not include the API documentation.\n"); } writer.close(); // create a listAll.txt file that is used FileWriter listAllOut = new FileWriter(new File(indexDir, "listAll.txt")); for (String fileName : dir.listAll()) { if (!"listAll.txt".equals(fileName)) { // don't include the "listAll.txt" file Long length = dir.fileLength(fileName); listAllOut.write(fileName); listAllOut.write(':'); listAllOut.write(length.toString()); listAllOut.write('\n'); } } listAllOut.flush(); listAllOut.close(); FileWriter sampleFilesCache = new FileWriter( new File(samplesFilesDir, "net/faustinelli/javafx/ensemble/samplesAll.txt")); for (String oneSample : samplesFileList) { sampleFilesCache.write(oneSample); sampleFilesCache.write('\n'); } sampleFilesCache.flush(); sampleFilesCache.close(); // print time taken ///System.out.println(System.currentTimeMillis() - start + " total milliseconds"); }
From source file:net.mad.ads.db.db.index.AdDBLuceneIndex.java
License:Open Source License
@Override public void open() throws IOException { index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_33, new KeywordAnalyzer()); config.setOpenMode(OpenMode.CREATE); writer = new IndexWriter(index, config); this.reader = IndexReader.open(this.writer, true); this.searcher = new IndexSearcher(this.reader); }
From source file:net.oneandone.pommes.model.Database.java
License:Apache License
public void remove(List<String> prefixes) throws IOException { IndexWriter writer;/* ww w . j av a 2s . c o m*/ IndexWriterConfig config; close(); config = new IndexWriterConfig(Version.LUCENE_4_9, null); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(getIndexLuceneDirectory(), config); for (String prefix : prefixes) { writer.deleteDocuments(new PrefixQuery(new Term(ORIGIN, prefix))); } writer.close(); }
From source file:net.oneandone.pommes.model.Database.java
License:Apache License
public void index(Iterator<Document> iterator) throws IOException { IndexWriter writer;/*from ww w . j ava 2 s . com*/ IndexWriterConfig config; Document doc; close(); // no analyzer, I have String fields only config = new IndexWriterConfig(Version.LUCENE_4_9, null); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(getIndexLuceneDirectory(), config); while (iterator.hasNext()) { doc = iterator.next(); writer.updateDocument(new Term(ORIGIN, doc.get(ORIGIN)), doc); } writer.close(); }
From source file:net.ontopia.infoset.fulltext.impl.lucene.LuceneIndexer.java
License:Apache License
protected IndexWriter getWriter() { if (writer == null) { try {//from w ww .j a v a 2s . co m if (reader != null) { reader.close(); reader = null; } IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, config); } catch (IOException e) { throw new OntopiaRuntimeException(e); } } return writer; }
From source file:net.riezebos.thoth.content.search.Indexer.java
License:Apache License
/** * Get the actual implementation of the indexWriter * * @param wipeIndex// w w w.j ava2 s.com * @return * @throws IOException */ protected IndexWriter getWriter(boolean wipeIndex) throws IOException { Directory dir = FSDirectory.open(Paths.get(indexFolder)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (wipeIndex) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); return writer; }
From source file:net.semanticmetadata.lire.indexing.parallel.ParallelIndexer.java
License:Open Source License
public void run() { IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, new StandardAnalyzer()); config.setOpenMode(openMode); config.setCodec(new LireCustomCodec()); try {/*from w w w .jav a 2 s. c o m*/ if (imageDirectory != null) System.out.println("Getting all images in " + imageDirectory + "."); writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config); if (imageList == null) { files = FileUtils.getAllImages(new File(imageDirectory), true); } else { files = new LinkedList<String>(); BufferedReader br = new BufferedReader(new FileReader(imageList)); String line = null; while ((line = br.readLine()) != null) { if (line.trim().length() > 3) files.add(line.trim()); } } numImages = files.size(); System.out.printf("Indexing %d images\n", files.size()); Thread p = new Thread(new Producer()); p.start(); LinkedList<Thread> threads = new LinkedList<Thread>(); long l = System.currentTimeMillis(); for (int i = 0; i < numberOfThreads; i++) { Thread c = new Thread(new Consumer()); c.start(); threads.add(c); } Thread m = new Thread(new Monitoring()); m.start(); for (Iterator<Thread> iterator = threads.iterator(); iterator.hasNext();) { iterator.next().join(); } long l1 = System.currentTimeMillis() - l; int seconds = (int) (l1 / 1000); int minutes = seconds / 60; seconds = seconds % 60; // System.out.println("Analyzed " + overallCount + " images in " + seconds + " seconds, ~" + ((overallCount>0)?(l1 / overallCount):"n.a.") + " ms each."); System.out.printf("Analyzed %d images in %03d:%02d ~ %3.2f ms each.\n", overallCount, minutes, seconds, ((overallCount > 0) ? ((float) l1 / (float) overallCount) : -1f)); writer.commit(); writer.forceMerge(1); writer.close(); threadFinished = true; // add local feature hist here } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } }