Example usage for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:framework.retrieval.engine.index.create.impl.RIndexWriter.java

License:Apache License

/**
 * ?/*from  w w w . j a  v  a  2 s . co m*/
 * @param indexPathType
 * @param terms
 */
public void deleteDocument(String indexPathType, List<Term> terms) {

    if (terms == null || terms.size() <= 0) {
        return;
    }

    //      RetrievalIndexLock.getInstance().lock(indexPathType);

    IndexWriter indexWriter = null;
    try {
        int length = terms.size();

        RetrievalUtil.debugLog(log, "" + length + "" + indexPathType);
        try {
            indexWriter = getIndexWriter(indexPathType);
            indexWriter.deleteDocuments(terms.toArray(new Term[length]));
        } catch (Exception e) {
            e.printStackTrace();
        }
    } finally {
        if (indexWriter != null) {
            try {
                indexWriter.commit();
            } catch (Exception e) {
                e.printStackTrace();
            }
            try {
                indexWriter.close();
            } catch (Exception e) {
                RetrievalUtil.errorLog(log, e);
            }
        }
        //         RetrievalIndexLock.getInstance().unlock(indexPathType);
    }
}

From source file:framework.retrieval.engine.index.impl.RIndexManager.java

License:Apache License

/**
 * // w  w w .  j a  va 2  s  .com
 */
public void create() {
    RetrievalIndexLock.getInstance().lock(indexPathType);

    IndexWriter indexWriterNormal = null;

    try {
        indexWriterNormal = new IndexWriter(
                RetrievalDirectoryProvider.getDirectory(luceneProperties.getIndexBasePath(), indexPathType),
                analyzerFactory.createIndexAnalyzer(), false, MaxFieldLength.UNLIMITED);
    } catch (CorruptIndexException e) {
        throw new RetrievalCreateIndexException(e);
    } catch (LockObtainFailedException e) {
        indexWriteProvider.unlockDir(indexPathType);
    } catch (IOException e) {
        IndexWriter indexWriter = indexWriteProvider.createNewIndexWriter(indexPathType);
        try {
            indexWriter.commit();
        } catch (Exception e1) {

        }
        try {
            indexWriter.close();
        } catch (Exception e1) {

        }
    } finally {
        if (indexWriterNormal != null) {
            try {
                indexWriterNormal.commit();
            } catch (Exception e) {

            }
        }
        if (indexWriterNormal != null) {
            try {
                indexWriterNormal.close();
            } catch (Exception e) {

            }
        }
        RetrievalIndexLock.getInstance().unlock(indexPathType);
    }

}

From source file:framework.retrieval.engine.index.impl.RIndexManager.java

License:Apache License

/**
 * ?/* ww  w  .ja  v a 2  s .c o  m*/
 */
public void reCreate() {

    RetrievalIndexLock.getInstance().lock(indexPathType);

    luceneProperties.getIndexWriterPool().remove(indexPathType);

    try {

        IndexWriter indexWriter = indexWriteProvider.createNewIndexWriter(indexPathType);
        try {
            indexWriter.commit();
        } catch (Exception e1) {

        }
        try {
            indexWriter.close();
        } catch (Exception e1) {

        }

    } catch (Exception e) {
        RetrievalUtil.errorLog(log, e);
    } finally {
        RetrievalIndexLock.getInstance().unlock(indexPathType);
    }

}

From source file:framework.retrieval.engine.index.impl.RIndexManager.java

License:Apache License

/**
 * /*from  w w w . j a v a 2s  .c o  m*/
 */
public void optimize() {

    RetrievalIndexLock.getInstance().lock(indexPathType);

    IndexWriter indexWriter = indexWriteProvider.createNormalIndexWriter(indexPathType);

    try {
        try {
            indexWriter.optimize();
        } catch (Exception e) {
            RetrievalUtil.errorLog(log, e);
        }
        try {
            indexWriter.commit();
        } catch (Exception e) {
            RetrievalUtil.errorLog(log, e);
        }
        try {
            indexWriter.close();
        } catch (Exception e) {
            RetrievalUtil.errorLog(log, e);
        }
    } finally {
        RetrievalIndexLock.getInstance().unlock(indexPathType);
    }

}

From source file:gate.creole.ir.lucene.LuceneIndexManager.java

License:Open Source License

/** Creates index directory and indexing all
 *  documents in the corpus. *//*from ww  w. j  a  v a  2s  .  c o  m*/
@Override
public void createIndex() throws IndexException {
    if (indexDefinition == null)
        throw new GateRuntimeException("Index definition is null!");
    if (corpus == null)
        throw new GateRuntimeException("Corpus is null!");

    String location = indexDefinition.getIndexLocation();
    try {
        File file = new File(location);
        if (file.exists()) {
            if (file.isDirectory() && file.listFiles().length > 0) {
                throw new IndexException(location + " is not empty directory");
            }
            if (!file.isDirectory()) {
                throw new IndexException("Only empty directory can be index path");
            }
        }

        /* Niraj */
        // ok so lets put the corpus index feature
        corpus.getFeatures().put(CORPUS_INDEX_FEATURE, CORPUS_INDEX_FEATURE_VALUE);
        /* End */

        IndexWriter writer = new IndexWriter(FSDirectory.open(new File(location)),
                new IndexWriterConfig(Version.LUCENE_31, new SimpleAnalyzer(Version.LUCENE_30))
                        .setOpenMode(OpenMode.CREATE));

        /*IndexWriter writer = new IndexWriter(
                FSDirectory.open(new File(location)),
                new SimpleAnalyzer(Version.LUCENE_30), 
                true,
                new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH)
                );*/

        for (int i = 0; i < corpus.size(); i++) {
            boolean isLoaded = corpus.isDocumentLoaded(i);
            gate.Document gateDoc = corpus.get(i);
            writer.addDocument(getLuceneDoc(gateDoc));
            if (!isLoaded) {
                corpus.unloadDocument(gateDoc);
                gate.Factory.deleteResource(gateDoc);
            }
        } //for (all documents)

        writer.commit();
        writer.close();
        corpus.sync();
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe.getMessage());
    } catch (gate.persist.PersistenceException pe) {
        pe.printStackTrace();
    }
}

From source file:gate.creole.ir.lucene.LuceneIndexManager.java

License:Open Source License

/** Optimize existing index. */
@Override//from w ww. j av a  2  s  .  c  o m
public void optimizeIndex() throws IndexException {
    if (indexDefinition == null)
        throw new GateRuntimeException("Index definition is null!");
    try {

        IndexWriter writer = new IndexWriter(FSDirectory.open(new File(indexDefinition.getIndexLocation())),
                new IndexWriterConfig(Version.LUCENE_31, new SimpleAnalyzer(Version.LUCENE_30))
                        .setOpenMode(OpenMode.APPEND));

        /*IndexWriter writer = new IndexWriter(
                FSDirectory.open(new File(indexDefinition.getIndexLocation())),
                new SimpleAnalyzer(Version.LUCENE_30), 
                false,
                new IndexWriter.MaxFieldLength(IndexWriter.DEFAULT_MAX_FIELD_LENGTH));*/

        //writer.optimize();
        writer.forceMerge(1, true);

        writer.commit();
        writer.close();
    } catch (java.io.IOException ioe) {
        throw new IndexException(ioe.getMessage());
    }
}

From source file:index.IndexCoreMeta.java

private static void readFile(IndexWriter writer, String filename)
        throws FileNotFoundException, JSONException, IOException {

    FileInputStream inputStream = null;
    Scanner sc = null;/*from w  ww.  j  av a 2s  . c  om*/
    try {

        int linecount = 0;
        inputStream = new FileInputStream(filename);
        sc = new Scanner(inputStream, "UTF-8");
        //String hash = sc.nextLine();
        while (sc.hasNextLine()) {
            String id = "";
            String title = "NA";
            String date = "";
            String abs = "NA";
            String[] authors = null;
            Document doc = new Document();

            linecount++;
            String line = sc.nextLine();
            try {
                JSONObject obj = new JSONObject(line);
                //System.out.println(obj.length());
                //                
                id = obj.get("identifier").toString();
                doc.add(new TextField("id", id, Field.Store.YES));
                //String type=obj.get("dc:type").toString();
                //document.addField("type", type);
                try {
                    title = obj.get("bibo:shortTitle").toString();
                    doc.add(new TextField("title", title, Field.Store.YES));
                    //                date = obj.get("dc:date").toString();
                    //                doc.add(new TextField("date", date, Field.Store.YES));
                } catch (Exception e2) {
                }

                try {

                    abs = obj.get("bibo:abstract").toString();
                    doc.add(new TextField("abstract", abs, Field.Store.YES));

                    //System.out.println(linecount + "," + abs);

                } catch (Exception e) {
                }
                //                JSONArray arr = obj.getJSONArray("bibo:AuthorList");
                //                if (arr != null) {
                //                    for (int i = 0; i < arr.length(); i++) {
                //                        doc.add(new TextField("author", arr.get(i).toString(), Field.Store.YES));
                //                        //System.out.println(arr.get(i).toString());
                //                    }
                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                    //System.out.println("adding " + linecount);
                    writer.addDocument(doc);

                } else {
                    //System.out.println("updating ");
                    //writer.updateDocument(new Term("path", file.toString()), doc);
                }

            }

            catch (Exception e3) {
            }

        }

        // note that Scanner suppresses exceptions
        if (sc.ioException() != null) {
            throw sc.ioException();
        }

    }

    finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (sc != null) {
            sc.close();
        }
    }

    writer.commit();

}

From source file:info.boytsov.lucene.CreateIndex.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 3 && args.length != 4) {
        printUsage();// w  w  w .j a va  2  s  .c  om
        System.exit(1);
    }
    String indexType = args[0];
    String indexSource = args[1];
    int commitInterval = 1000000;

    if (args.length >= 4) {
        commitInterval = Integer.parseInt(args[3]);
    }

    System.out.println("Commiting after indexing " + commitInterval + " docs");

    File outputDir = new File(args[2]);
    if (!outputDir.exists()) {
        if (!outputDir.mkdirs()) {
            System.out.println("couldn't create " + outputDir.getAbsolutePath());
            return;
        }
    }
    if (!outputDir.isDirectory()) {
        System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
        return;
    }
    if (!outputDir.canWrite()) {
        System.out.println("Can't write to " + outputDir.getAbsolutePath());
        return;
    }

    FSDirectory dir = FSDirectory.open(outputDir);

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);// default
                                                                        // stop
                                                                        // words
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);// overwrites
                                                          // if
                                                          // needed
    IndexWriter indexWriter = new IndexWriter(dir, config);

    DocMaker docMaker = new DocMaker();
    Properties properties = new Properties();
    properties.setProperty("content.source.forever", "false"); // will
                                                               // parse
                                                               // each
                                                               // document
                                                               // only
                                                               // once
    properties.setProperty("doc.index.props", "true");
    // We want to store small-size fields like URL or even title  ...
    properties.setProperty("doc.stored", "true");
    // but not the large one (great savings, 3x reduction in space)!
    properties.setProperty("doc.body.stored", "false");

    ContentSource source = CreateSource(indexType, indexSource, properties);

    if (source == null) {
        System.err.println("Failed to create a source: " + indexType + "(" + indexSource + ")");
        printUsage();
        System.exit(1);
    }

    Config c = new Config(properties);
    source.setConfig(c);
    source.resetInputs();// though this does not seem needed, it is
                         // (gets the file opened?)
    docMaker.setConfig(c, source);
    int count = 0;
    System.out.println("Starting Indexing of " + indexType + " source " + indexSource);

    long start = System.currentTimeMillis();
    Document doc;
    try {
        while ((doc = docMaker.makeDocument()) != null) {
            indexWriter.addDocument(doc);
            ++count;
            if (count % 5000 == 0) {
                System.out.println(
                        "Indexed " + count + " documents in " + (System.currentTimeMillis() - start) + " ms");
            }
            if (count % commitInterval == 0) {
                indexWriter.commit();
                System.out.println("Committed");
            }
        }
    } catch (org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException nmd) {
        System.out.println("Caught NoMoreDataException! -- Finishing"); // All done
    }
    long finish = System.currentTimeMillis();
    System.out.println("Indexing " + count + " documents took " + (finish - start) + " ms");
    System.out.println("Total data processed: " + source.getTotalBytesCount() + " bytes");
    System.out.println("Index should be located at " + dir.getDirectory().getAbsolutePath());
    docMaker.close();
    indexWriter.commit();
    indexWriter.close();

}

From source file:io.anserini.embeddings.IndexW2V.java

License:Apache License

public void indexEmbeddings() throws IOException, InterruptedException {
    LOG.info("Starting indexer...");
    long startTime = System.currentTimeMillis();
    final WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(directory, config);

    BufferedReader bRdr = new BufferedReader(new FileReader(args.input));
    String line = null;/*from w  ww. j  a  v  a2  s.c  o m*/
    bRdr.readLine();

    Document document = new Document();
    ByteArrayOutputStream byteStream = new ByteArrayOutputStream();
    int cnt = 0;

    while ((line = bRdr.readLine()) != null) {
        String[] termEmbedding = line.trim().split("\t");
        document.add(new StringField(LuceneDocumentGenerator.FIELD_ID, termEmbedding[0], Field.Store.NO));
        String[] parts = termEmbedding[1].split(" ");

        for (int i = 0; i < parts.length; ++i) {
            byteStream.write(ByteBuffer.allocate(4).putFloat(Float.parseFloat(parts[i])).array());
        }
        document.add(new StoredField(FIELD_BODY, byteStream.toByteArray()));

        byteStream.flush();
        byteStream.reset();
        writer.addDocument(document);
        document.clear();
        cnt++;

        if (cnt % 100000 == 0) {
            LOG.info(cnt + " terms indexed");
        }
    }

    LOG.info(String.format("Total of %s terms added", cnt));

    try {
        writer.commit();
        writer.forceMerge(1);
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            LOG.error(e);
        }
    }

    LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
}

From source file:io.anserini.index.IndexClueWeb09b.java

License:Apache License

public int indexWithThreads(int numThreads) throws IOException, InterruptedException {

    System.out.println(/*from   ww  w . j a va2s . c o m*/
            "Indexing with " + numThreads + " threads to directory '" + indexPath.toAbsolutePath() + "'...");

    final Directory dir = FSDirectory.open(indexPath);

    final IndexWriterConfig iwc = new IndexWriterConfig(analyzer());

    iwc.setSimilarity(new BM25Similarity());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    iwc.setUseCompoundFile(false);
    iwc.setMergeScheduler(new ConcurrentMergeScheduler());

    final IndexWriter writer = new IndexWriter(dir, iwc);

    final ExecutorService executor = Executors.newFixedThreadPool(numThreads);

    List<Path> warcFiles = discoverWarcFiles(docDir);
    if (doclimit > 0 && warcFiles.size() < doclimit)
        warcFiles = warcFiles.subList(0, doclimit);

    for (Path f : warcFiles)
        executor.execute(new IndexerThread(writer, f));

    //add some delay to let some threads spawn by scheduler
    Thread.sleep(30000);
    executor.shutdown(); // Disable new tasks from being submitted

    try {
        // Wait for existing tasks to terminate
        while (!executor.awaitTermination(5, TimeUnit.MINUTES)) {
            Thread.sleep(1000);
        }
    } catch (InterruptedException ie) {
        // (Re-)Cancel if current thread also interrupted
        executor.shutdownNow();
        // Preserve interrupt status
        Thread.currentThread().interrupt();
    }

    int numIndexed = writer.maxDoc();

    try {
        writer.commit();
        if (optimize)
            writer.forceMerge(1);
    } finally {
        writer.close();
    }

    return numIndexed;
}