List of usage examples for org.apache.lucene.index IndexWriter commit
@Override public final long commit() throws IOException
Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.
From source file:framework.retrieval.engine.index.create.impl.RIndexWriter.java
License:Apache License
/** * ?/*from w w w . j a v a 2 s . co m*/ * @param indexPathType * @param terms */ public void deleteDocument(String indexPathType, List<Term> terms) { if (terms == null || terms.size() <= 0) { return; } // RetrievalIndexLock.getInstance().lock(indexPathType); IndexWriter indexWriter = null; try { int length = terms.size(); RetrievalUtil.debugLog(log, "" + length + "" + indexPathType); try { indexWriter = getIndexWriter(indexPathType); indexWriter.deleteDocuments(terms.toArray(new Term[length])); } catch (Exception e) { e.printStackTrace(); } } finally { if (indexWriter != null) { try { indexWriter.commit(); } catch (Exception e) { e.printStackTrace(); } try { indexWriter.close(); } catch (Exception e) { RetrievalUtil.errorLog(log, e); } } // RetrievalIndexLock.getInstance().unlock(indexPathType); } }
From source file:framework.retrieval.engine.index.impl.RIndexManager.java
License:Apache License
/** * // w w w . j a va 2 s .com */ public void create() { RetrievalIndexLock.getInstance().lock(indexPathType); IndexWriter indexWriterNormal = null; try { indexWriterNormal = new IndexWriter( RetrievalDirectoryProvider.getDirectory(luceneProperties.getIndexBasePath(), indexPathType), analyzerFactory.createIndexAnalyzer(), false, MaxFieldLength.UNLIMITED); } catch (CorruptIndexException e) { throw new RetrievalCreateIndexException(e); } catch (LockObtainFailedException e) { indexWriteProvider.unlockDir(indexPathType); } catch (IOException e) { IndexWriter indexWriter = indexWriteProvider.createNewIndexWriter(indexPathType); try { indexWriter.commit(); } catch (Exception e1) { } try { indexWriter.close(); } catch (Exception e1) { } } finally { if (indexWriterNormal != null) { try { indexWriterNormal.commit(); } catch (Exception e) { } } if (indexWriterNormal != null) { try { indexWriterNormal.close(); } catch (Exception e) { } } RetrievalIndexLock.getInstance().unlock(indexPathType); } }
From source file:framework.retrieval.engine.index.impl.RIndexManager.java
License:Apache License
/** * ?/* ww w .ja v a 2 s .c o m*/ */ public void reCreate() { RetrievalIndexLock.getInstance().lock(indexPathType); luceneProperties.getIndexWriterPool().remove(indexPathType); try { IndexWriter indexWriter = indexWriteProvider.createNewIndexWriter(indexPathType); try { indexWriter.commit(); } catch (Exception e1) { } try { indexWriter.close(); } catch (Exception e1) { } } catch (Exception e) { RetrievalUtil.errorLog(log, e); } finally { RetrievalIndexLock.getInstance().unlock(indexPathType); } }
From source file:framework.retrieval.engine.index.impl.RIndexManager.java
License:Apache License
/** * /*from w w w . j a v a 2s .c o m*/ */ public void optimize() { RetrievalIndexLock.getInstance().lock(indexPathType); IndexWriter indexWriter = indexWriteProvider.createNormalIndexWriter(indexPathType); try { try { indexWriter.optimize(); } catch (Exception e) { RetrievalUtil.errorLog(log, e); } try { indexWriter.commit(); } catch (Exception e) { RetrievalUtil.errorLog(log, e); } try { indexWriter.close(); } catch (Exception e) { RetrievalUtil.errorLog(log, e); } } finally { RetrievalIndexLock.getInstance().unlock(indexPathType); } }
From source file:gate.creole.ir.lucene.LuceneIndexManager.java
License:Open Source License
/** Creates index directory and indexing all * documents in the corpus. *//*from ww w. j a v a 2s . c o m*/ @Override public void createIndex() throws IndexException { if (indexDefinition == null) throw new GateRuntimeException("Index definition is null!"); if (corpus == null) throw new GateRuntimeException("Corpus is null!"); String location = indexDefinition.getIndexLocation(); try { File file = new File(location); if (file.exists()) { if (file.isDirectory() && file.listFiles().length > 0) { throw new IndexException(location + " is not empty directory"); } if (!file.isDirectory()) { throw new IndexException("Only empty directory can be index path"); } } /* Niraj */ // ok so lets put the corpus index feature corpus.getFeatures().put(CORPUS_INDEX_FEATURE, CORPUS_INDEX_FEATURE_VALUE); /* End */ IndexWriter writer = new IndexWriter(FSDirectory.open(new File(location)), new IndexWriterConfig(Version.LUCENE_31, new SimpleAnalyzer(Version.LUCENE_30)) .setOpenMode(OpenMode.CREATE)); /*IndexWriter writer = new IndexWriter( FSDirectory.open(new File(location)), new SimpleAnalyzer(Version.LUCENE_30), true, new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH) );*/ for (int i = 0; i < corpus.size(); i++) { boolean isLoaded = corpus.isDocumentLoaded(i); gate.Document gateDoc = corpus.get(i); writer.addDocument(getLuceneDoc(gateDoc)); if (!isLoaded) { corpus.unloadDocument(gateDoc); gate.Factory.deleteResource(gateDoc); } } //for (all documents) writer.commit(); writer.close(); corpus.sync(); } catch (java.io.IOException ioe) { throw new IndexException(ioe.getMessage()); } catch (gate.persist.PersistenceException pe) { pe.printStackTrace(); } }
From source file:gate.creole.ir.lucene.LuceneIndexManager.java
License:Open Source License
/** Optimize existing index. */ @Override//from w ww. j av a 2 s . c o m public void optimizeIndex() throws IndexException { if (indexDefinition == null) throw new GateRuntimeException("Index definition is null!"); try { IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDefinition.getIndexLocation())), new IndexWriterConfig(Version.LUCENE_31, new SimpleAnalyzer(Version.LUCENE_30)) .setOpenMode(OpenMode.APPEND)); /*IndexWriter writer = new IndexWriter( FSDirectory.open(new File(indexDefinition.getIndexLocation())), new SimpleAnalyzer(Version.LUCENE_30), false, new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));*/ //writer.optimize(); writer.forceMerge(1, true); writer.commit(); writer.close(); } catch (java.io.IOException ioe) { throw new IndexException(ioe.getMessage()); } }
From source file:index.IndexCoreMeta.java
private static void readFile(IndexWriter writer, String filename) throws FileNotFoundException, JSONException, IOException { FileInputStream inputStream = null; Scanner sc = null;/*from w ww. j av a 2s . c om*/ try { int linecount = 0; inputStream = new FileInputStream(filename); sc = new Scanner(inputStream, "UTF-8"); //String hash = sc.nextLine(); while (sc.hasNextLine()) { String id = ""; String title = "NA"; String date = ""; String abs = "NA"; String[] authors = null; Document doc = new Document(); linecount++; String line = sc.nextLine(); try { JSONObject obj = new JSONObject(line); //System.out.println(obj.length()); // id = obj.get("identifier").toString(); doc.add(new TextField("id", id, Field.Store.YES)); //String type=obj.get("dc:type").toString(); //document.addField("type", type); try { title = obj.get("bibo:shortTitle").toString(); doc.add(new TextField("title", title, Field.Store.YES)); // date = obj.get("dc:date").toString(); // doc.add(new TextField("date", date, Field.Store.YES)); } catch (Exception e2) { } try { abs = obj.get("bibo:abstract").toString(); doc.add(new TextField("abstract", abs, Field.Store.YES)); //System.out.println(linecount + "," + abs); } catch (Exception e) { } // JSONArray arr = obj.getJSONArray("bibo:AuthorList"); // if (arr != null) { // for (int i = 0; i < arr.length(); i++) { // doc.add(new TextField("author", arr.get(i).toString(), Field.Store.YES)); // //System.out.println(arr.get(i).toString()); // } if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { //System.out.println("adding " + linecount); writer.addDocument(doc); } else { //System.out.println("updating "); //writer.updateDocument(new Term("path", file.toString()), doc); } } catch (Exception e3) { } } // note that Scanner suppresses exceptions if (sc.ioException() != null) { throw sc.ioException(); } } finally { if (inputStream != null) { inputStream.close(); } if (sc != null) { sc.close(); } } writer.commit(); }
From source file:info.boytsov.lucene.CreateIndex.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 3 && args.length != 4) { printUsage();// w w w .j a va 2 s .c om System.exit(1); } String indexType = args[0]; String indexSource = args[1]; int commitInterval = 1000000; if (args.length >= 4) { commitInterval = Integer.parseInt(args[3]); } System.out.println("Commiting after indexing " + commitInterval + " docs"); File outputDir = new File(args[2]); if (!outputDir.exists()) { if (!outputDir.mkdirs()) { System.out.println("couldn't create " + outputDir.getAbsolutePath()); return; } } if (!outputDir.isDirectory()) { System.out.println(outputDir.getAbsolutePath() + " is not a directory!"); return; } if (!outputDir.canWrite()) { System.out.println("Can't write to " + outputDir.getAbsolutePath()); return; } FSDirectory dir = FSDirectory.open(outputDir); StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);// default // stop // words IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);// overwrites // if // needed IndexWriter indexWriter = new IndexWriter(dir, config); DocMaker docMaker = new DocMaker(); Properties properties = new Properties(); properties.setProperty("content.source.forever", "false"); // will // parse // each // document // only // once properties.setProperty("doc.index.props", "true"); // We want to store small-size fields like URL or even title ... properties.setProperty("doc.stored", "true"); // but not the large one (great savings, 3x reduction in space)! properties.setProperty("doc.body.stored", "false"); ContentSource source = CreateSource(indexType, indexSource, properties); if (source == null) { System.err.println("Failed to create a source: " + indexType + "(" + indexSource + ")"); printUsage(); System.exit(1); } Config c = new Config(properties); source.setConfig(c); source.resetInputs();// though this does not seem needed, it is // (gets the file opened?) docMaker.setConfig(c, source); int count = 0; System.out.println("Starting Indexing of " + indexType + " source " + indexSource); long start = System.currentTimeMillis(); Document doc; try { while ((doc = docMaker.makeDocument()) != null) { indexWriter.addDocument(doc); ++count; if (count % 5000 == 0) { System.out.println( "Indexed " + count + " documents in " + (System.currentTimeMillis() - start) + " ms"); } if (count % commitInterval == 0) { indexWriter.commit(); System.out.println("Committed"); } } } catch (org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException nmd) { System.out.println("Caught NoMoreDataException! -- Finishing"); // All done } long finish = System.currentTimeMillis(); System.out.println("Indexing " + count + " documents took " + (finish - start) + " ms"); System.out.println("Total data processed: " + source.getTotalBytesCount() + " bytes"); System.out.println("Index should be located at " + dir.getDirectory().getAbsolutePath()); docMaker.close(); indexWriter.commit(); indexWriter.close(); }
From source file:io.anserini.embeddings.IndexW2V.java
License:Apache License
public void indexEmbeddings() throws IOException, InterruptedException { LOG.info("Starting indexer..."); long startTime = System.currentTimeMillis(); final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(); final IndexWriterConfig config = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(directory, config); BufferedReader bRdr = new BufferedReader(new FileReader(args.input)); String line = null;/*from w ww. j a v a2 s.c o m*/ bRdr.readLine(); Document document = new Document(); ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); int cnt = 0; while ((line = bRdr.readLine()) != null) { String[] termEmbedding = line.trim().split("\t"); document.add(new StringField(LuceneDocumentGenerator.FIELD_ID, termEmbedding[0], Field.Store.NO)); String[] parts = termEmbedding[1].split(" "); for (int i = 0; i < parts.length; ++i) { byteStream.write(ByteBuffer.allocate(4).putFloat(Float.parseFloat(parts[i])).array()); } document.add(new StoredField(FIELD_BODY, byteStream.toByteArray())); byteStream.flush(); byteStream.reset(); writer.addDocument(document); document.clear(); cnt++; if (cnt % 100000 == 0) { LOG.info(cnt + " terms indexed"); } } LOG.info(String.format("Total of %s terms added", cnt)); try { writer.commit(); writer.forceMerge(1); } finally { try { writer.close(); } catch (IOException e) { LOG.error(e); } } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); }
From source file:io.anserini.index.IndexClueWeb09b.java
License:Apache License
public int indexWithThreads(int numThreads) throws IOException, InterruptedException { System.out.println(/*from ww w . j a va2s . c o m*/ "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'..."); final Directory dir = FSDirectory.open(indexPath); final IndexWriterConfig iwc = new IndexWriterConfig(analyzer()); iwc.setSimilarity(new BM25Similarity()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); iwc.setUseCompoundFile(false); iwc.setMergeScheduler(new ConcurrentMergeScheduler()); final IndexWriter writer = new IndexWriter(dir, iwc); final ExecutorService executor = Executors.newFixedThreadPool(numThreads); List<Path> warcFiles = discoverWarcFiles(docDir); if (doclimit > 0 && warcFiles.size() < doclimit) warcFiles = warcFiles.subList(0, doclimit); for (Path f : warcFiles) executor.execute(new IndexerThread(writer, f)); //add some delay to let some threads spawn by scheduler Thread.sleep(30000); executor.shutdown(); // Disable new tasks from being submitted try { // Wait for existing tasks to terminate while (!executor.awaitTermination(5, TimeUnit.MINUTES)) { Thread.sleep(1000); } } catch (InterruptedException ie) { // (Re-)Cancel if current thread also interrupted executor.shutdownNow(); // Preserve interrupt status Thread.currentThread().interrupt(); } int numIndexed = writer.maxDoc(); try { writer.commit(); if (optimize) writer.forceMerge(1); } finally { writer.close(); } return numIndexed; }