Example usage for org.apache.lucene.index IndexWriter commit

List of usage examples for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException 

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * Creates a set of reference objects and stores it in a new index (hashFunctionsFileName "<indexPath>-ro"). Then creates ordered
 * lists of reference object positions for each data item in the index with given feature.
 * Finally a new index (hashFunctionsFileName "<indexPath>-ms") is created where all the original documents as well as the new data
 * are stored./*  w ww.j a v  a  2  s  .  c  om*/
 *
 * @param indexPath the path to the original index
 * @throws IOException
 */
public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
        throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
        referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
        System.err.println("WARNING: There are deleted docs in your index. You should "
                + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
        count++;
        Document document = reader.document(i);
        document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
        iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
            sb.append(hits.doc(j).getValues("ro-id")[0]);
            sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);

}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * We assume that the initial indexing has been done and a set of reference objects has been
 * found and indexed in the separate fileList. However further documents were added and they
 * now need to get a ranked list of reference objects. So we (i) get all these new documents
 * missing the field "ro-order" and (ii) add this field.
 *
 * @param indexPath the index to update/*  w  w  w .j  a  va  2s  . c om*/
 * @throws IOException
 */
public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        if (document.getField("ro-order") == null) { // if the field is not here we create it.
            ImageSearchHits hits = searcher.search(document, readerRo);
            sb.delete(0, sb.length());
            for (int j = 0; j < numReferenceObjectsUsed; j++) {
                sb.append(hits.doc(j).getValues("ro-id")[0]);
                sb.append(' ');
            }
            // System.out.println(sb.toString());
            document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);
            countUpdated++;
        }

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

        // debug:
        System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
}

From source file:net.semanticmetadata.lire.indexing.tools.Indexor.java

License:Open Source License

public void run() {
    // do it ...//  w  ww .  j  av a2  s .c om
    try {
        //            IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex, LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
        IndexWriterConfig config = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION,
                new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        config.setCodec(new LireCustomCodec());
        IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
        for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext();) {
            File inputFile = iterator.next();
            if (verbose)
                System.out.println("Processing " + inputFile.getPath() + ".");
            readFile(indexWriter, inputFile);
            if (verbose)
                System.out.println("Indexing finished.");
        }
        indexWriter.commit();
        indexWriter.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:net.semanticmetadata.lire.indexing.tools.ProximityHashingIndexor.java

License:Open Source License

public void run() {
    // do it ...//from  w ww  . ja v a2 s .  co m
    try {
        IndexWriter indexWriter = LuceneUtils.createIndexWriter(indexPath, overwriteIndex,
                LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
        for (Iterator<File> iterator = inputFiles.iterator(); iterator.hasNext();) {
            File inputFile = iterator.next();
            if (verbose)
                System.out.println("Processing " + inputFile.getPath() + ".");
            if (verbose)
                System.out.println("Counting images.");
            run = 0;
            readFile(indexWriter, inputFile);
            if (verbose)
                System.out.printf("%d images found in the data file.\n", docCount);
            int numberOfRepresentatives = 1000; // TODO: clever selection.
            // select a number of representative "fixed stars" randomly from file
            if (numberOfRepresentatives > Math.sqrt(docCount))
                numberOfRepresentatives = (int) Math.sqrt(docCount);
            if (verbose)
                System.out.printf("Selecting %d representative images for hashing.\n", numberOfRepresentatives);
            representativesID = new HashSet<Integer>(numberOfRepresentatives);
            while (representativesID.size() < numberOfRepresentatives) {
                representativesID.add((int) Math.floor(Math.random() * (docCount - 1)));
            }
            representatives = new ArrayList<LireFeature>(numberOfRepresentatives);
            docCount = 0;
            run = 1;
            if (verbose)
                System.out.println("Now getting representatives from the data file.");
            readFile(indexWriter, inputFile);
            docCount = 0;
            run = 2;
            if (verbose)
                System.out.println("Finally we start the indexing process, please wait ...");
            readFile(indexWriter, inputFile);
            if (verbose)
                System.out.println("Indexing finished.");
        }
        indexWriter.commit();
        indexWriter.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:net.semanticmetadata.lire.RuntimeTest.java

License:Open Source License

public void testCreateIndex() throws IOException {
    ChainedDocumentBuilder builder = new ChainedDocumentBuilder();
    builder.addBuilder(DocumentBuilderFactory.getColorLayoutBuilder());
    builder.addBuilder(DocumentBuilderFactory.getEdgeHistogramBuilder());
    builder.addBuilder(DocumentBuilderFactory.getScalableColorBuilder());

    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
    for (String identifier : testFiles) {
        System.out.println("Indexing file " + identifier);
        Document doc = builder.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
        iw.addDocument(doc);//from  w ww  .  j av  a 2s .  c om
    }
    iw.commit();
    iw.close();
}

From source file:net.semanticmetadata.lire.RuntimeTest.java

License:Open Source License

public void testCreateCorrelogramIndex() throws IOException {
    String[] testFiles = new String[] { "img01.jpg", "img02.jpg", "img03.jpg", "img04.jpg", "img05.jpg",
            "img06.jpg", "img07.jpg", "img08.jpg", "img09.jpg", "img10.jpg" };
    String testFilesPath = "./lire/src/test/resources/small/";

    DocumentBuilder builder = DocumentBuilderFactory.getAutoColorCorrelogramDocumentBuilder();
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-small", true);
    long ms = System.currentTimeMillis();
    for (String identifier : testFiles) {
        Document doc = builder.createDocument(new FileInputStream(testFilesPath + identifier), identifier);
        iw.addDocument(doc);//ww w .ja  v  a  2 s .  c  o m
    }
    System.out.println("Time taken: " + ((System.currentTimeMillis() - ms) / testFiles.length) + " ms");
    iw.commit();
    iw.close();
}

From source file:net.semanticmetadata.lire.RuntimeTest.java

License:Open Source License

public void testCreateCEDDIndex() throws IOException {
    ArrayList<String> images = FileUtils.getAllImages(new File("E:/temp/flickrphotos/flickrphotos"), true);

    ChainedDocumentBuilder builder = new ChainedDocumentBuilder();
    builder.addBuilder(DocumentBuilderFactory.getCEDDDocumentBuilder());
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-cedd", true);
    int count = 0;
    long ms = System.currentTimeMillis();
    for (String identifier : images) {
        try {//from  w  w w .j  a v a  2s  .  c o m
            Document doc = builder.createDocument(new FileInputStream(identifier), identifier);
            iw.addDocument(doc);
        } catch (Exception e) {
            System.err.print("\n ;-( ");//e.printStackTrace();
        }
        count++;
        if (count % 1000 == 0)
            System.out.println((100 * count) / images.size() + "% ");
    }
    System.out.println("Time taken: " + ((System.currentTimeMillis() - ms) / testFiles.length) + " ms");
    iw.commit();
    iw.close();
}

From source file:net.sf.jtmt.summarizers.LuceneSummarizer.java

License:Apache License

/**
 * Builds the index.// w  w w .j  a  v a  2 s  .c  o m
 *
 * @param ramdir the ramdir
 * @param text the text
 * @throws Exception the exception
 */
private void buildIndex(Directory ramdir, String text) throws Exception {
    if (paragraphTokenizer == null || sentenceTokenizer == null) {
        throw new IllegalArgumentException("Please call init() to instantiate tokenizers");
    }
    IndexWriter writer = new IndexWriter(ramdir, analyzer, MaxFieldLength.UNLIMITED);
    paragraphTokenizer.setText(text);
    String paragraph = null;
    int pno = 0;
    while ((paragraph = paragraphTokenizer.nextParagraph()) != null) {
        sentenceTokenizer.setText(paragraph);
        String sentence = null;
        int sno = 0;
        while ((sentence = sentenceTokenizer.nextSentence()) != null) {
            Document doc = new Document();
            doc.add(new Field("text", sentence, Store.YES, Index.ANALYZED));
            doc.setBoost(computeDeboost(pno, sno));
            writer.addDocument(doc);
            sno++;
        }
        pno++;
    }
    writer.commit();
    writer.close();
}

From source file:net.sf.logsaw.index.internal.LuceneIndexServiceImpl.java

License:Open Source License

private void truncate(ILogResource log, IndexWriter writer) throws CoreException {
    try {// w w w  .j  a  v a 2s. co m
        writer.deleteAll();
        writer.commit();
    } catch (Exception e) {
        // Unexpected exception; wrap with CoreException
        throw new CoreException(new Status(IStatus.ERROR, IndexPlugin.PLUGIN_ID,
                NLS.bind(Messages.LuceneIndexService_error_failedToTruncateIndex,
                        new Object[] { log.getName(), e.getLocalizedMessage() }),
                e));
    }
}

From source file:net.sf.lucis.core.impl.DefaultWriter.java

License:Apache License

public <T, P> IndexStatus write(Store<T> store, Batch<T, P> batch) throws InterruptedException {
    Preconditions.checkNotNull(store, "A destination store must be provided.");
    if (batch == null) {
        return null;
    }/*from   w w  w  .  jav  a  2 s .c  o  m*/
    try {
        final IndexWriterConfig config = config();
        final T oldCP = store.getCheckpoint();
        final T newCP = batch.getCheckpoint();
        if (Objects.equal(oldCP, newCP)) {
            return null;
        }
        throwIfInterrupted();
        if (!batch.isEmpty()) {
            final Analyzer analyzer = config.getAnalyzer();
            // Check whether the index must be created
            final Directory directory = store.getDirectory();
            config.setOpenMode(batch.isRecreate() ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
            final IndexWriter writer = new IndexWriter(directory, config);
            boolean ok = false;
            try {
                // Deletions
                if (!batch.isRecreate()) {
                    for (Term term : batch.getDeletions()) {
                        throwIfInterrupted();
                        writer.deleteDocuments(term);
                    }
                }
                // Additions
                for (Addition addition : batch.getAdditions()) {
                    throwIfInterrupted();
                    final Analyzer aa = addition.getAnalyzer();
                    writer.addDocument(addition.getDocument(), aa != null ? aa : analyzer);
                }
                // Commit
                throwIfInterrupted();
                writer.commit();
                ok = true;
                // No optimize until policy is defined.
                // writer.optimize();
            } finally {
                if (!ok) {
                    rollback(writer);
                }
                writer.close();
            }
        }
        store.setCheckpoint(newCP);
        return IndexStatus.OK;
    } catch (InterruptedException ie) {
        throw ie;
    } catch (LockObtainFailedException le) {
        log().error(le, "Unable to lock index");
        return IndexStatus.LOCKED;
    } catch (CorruptIndexException ce) {
        log().error(ce, "Corrupt index");
        return IndexStatus.CORRUPT;
    } catch (IOException ioe) {
        log().error(ioe, "I/O Error while writing");
        return IndexStatus.IOERROR;
    } catch (Exception e) {
        log().error(e, "Exception while writing");
        return IndexStatus.ERROR;
    }
}