Example usage for org.apache.lucene.index IndexWriter deleteDocuments

List of usage examples for org.apache.lucene.index IndexWriter deleteDocuments

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter deleteDocuments.

Prototype

public long deleteDocuments(Query... queries) throws IOException 

Source Link

Document

Deletes the document(s) matching any of the provided queries.

Usage

From source file:org.apache.nifi.provenance.lucene.DeleteIndexAction.java

License:Apache License

@Override
public File execute(final File expiredFile) throws IOException {
    // count the number of records and determine the max event id that we are deleting.
    final long numDeleted = 0;
    long maxEventId = -1L;
    try (final RecordReader reader = RecordReaders.newRecordReader(expiredFile, repository.getAllLogFiles(),
            Integer.MAX_VALUE)) {
        maxEventId = reader.getMaxEventId();
    } catch (final IOException ioe) {
        logger.warn("Failed to obtain max ID present in journal file {}", expiredFile.getAbsolutePath());
    }//from  w ww  .jav  a 2s  .c om

    // remove the records from the index
    final List<File> indexDirs = indexConfiguration.getIndexDirectories(expiredFile);
    for (final File indexingDirectory : indexDirs) {
        final Term term = new Term(FieldNames.STORAGE_FILENAME,
                LuceneUtil.substringBefore(expiredFile.getName(), "."));

        boolean deleteDir = false;
        final EventIndexWriter writer = indexManager.borrowIndexWriter(indexingDirectory);
        try {
            final IndexWriter indexWriter = writer.getIndexWriter();
            indexWriter.deleteDocuments(term);
            indexWriter.commit();
            final int docsLeft = indexWriter.numDocs();
            deleteDir = docsLeft <= 0;
            logger.debug("After expiring {}, there are {} docs left for index {}", expiredFile, docsLeft,
                    indexingDirectory);
        } finally {
            indexManager.returnIndexWriter(writer);
        }

        // we've confirmed that all documents have been removed. Delete the index directory.
        if (deleteDir) {
            indexManager.removeIndex(indexingDirectory);
            indexConfiguration.removeIndexDirectory(indexingDirectory);

            deleteDirectory(indexingDirectory);
            logger.info("Removed empty index directory {}", indexingDirectory);
        }
    }

    // Update the minimum index to 1 more than the max Event ID in this file.
    if (maxEventId > -1L) {
        indexConfiguration.setMinIdIndexed(maxEventId + 1L);
    }

    logger.info("Deleted Indices for Expired Provenance File {} from {} index files; {} documents removed",
            expiredFile, indexDirs.size(), numDeleted);
    return expiredFile;
}

From source file:org.apache.ofbiz.content.search.DocumentIndexer.java

License:Apache License

@Override
public void run() {
    IndexWriter indexWriter = null;
    int uncommittedDocs = 0;
    while (true) {
        LuceneDocument ofbizDocument;/*from  w w w  .  j  a  v a  2  s  .c o m*/
        try {
            // Execution will pause here until the queue receives a LuceneDocument for indexing
            ofbizDocument = documentIndexQueue.take();
        } catch (InterruptedException e) {
            Debug.logError(e, module);
            if (indexWriter != null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            break;
        }
        Term documentIdentifier = ofbizDocument.getDocumentIdentifier();
        Document document = ofbizDocument.prepareDocument(this.delegator);
        if (indexWriter == null) {
            try {
                StandardAnalyzer analyzer = new StandardAnalyzer();
                analyzer.setVersion(SearchWorker.getLuceneVersion());
                indexWriter = new IndexWriter(this.indexDirectory, new IndexWriterConfig(analyzer));
            } catch (CorruptIndexException e) {
                Debug.logError("Corrupted lucene index: " + e.getMessage(), module);
                break;
            } catch (LockObtainFailedException e) {
                Debug.logError("Could not obtain Lock on lucene index " + e.getMessage(), module);
                // TODO: put the thread to sleep waiting for the locked to be released
                break;
            } catch (IOException e) {
                Debug.logError(e.getMessage(), module);
                break;
            }
        }
        try {
            if (document == null) {
                indexWriter.deleteDocuments(documentIdentifier);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": deleted Lucene document: " + ofbizDocument, module);
            } else {
                indexWriter.updateDocument(documentIdentifier, document);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": indexed Lucene document: " + ofbizDocument, module);
            }
        } catch (Exception e) {
            Debug.logError(e, getName() + ": error processing Lucene document: " + ofbizDocument, module);
            if (documentIndexQueue.peek() == null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            continue;
        }
        uncommittedDocs++;
        if (uncommittedDocs == UNCOMMITTED_DOC_LIMIT || documentIndexQueue.peek() == null) {
            // limit reached or queue empty, time to commit
            try {
                indexWriter.commit();
            } catch (IOException e) {
                Debug.logError(e, module);
            }
            uncommittedDocs = 0;
        }
        if (documentIndexQueue.peek() == null) {
            try {
                indexWriter.close();
                indexWriter = null;
            } catch (IOException e) {
                Debug.logError(e, module);
            }
        }
    }
}

From source file:org.apache.oodt.cas.filemgr.catalog.LuceneCatalog.java

License:Apache License

private synchronized void removeProductDocument(Product product) throws CatalogException {

    try {//from   w  w  w  . ja  va2 s. com
        reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        LOG.log(Level.FINE,
                "LuceneCatalog: remove document from index for product: [" + product.getProductId() + "]");
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        IndexWriter writer = new IndexWriter(indexDir, config);
        writer.deleteDocuments(new Term("product_id", product.getProductId()));
        writer.close();

    } catch (IOException e) {
        LOG.log(Level.WARNING, "Exception removing product: [" + product.getProductName()
                + "] from index: Message: " + e.getMessage());
        throw new CatalogException(e.getMessage(), e);
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignore) {
            }

        }

    }
}

From source file:org.apache.oodt.cas.workflow.instrepo.LuceneWorkflowInstanceRepository.java

License:Apache License

private synchronized void removeWorkflowInstanceDocument(WorkflowInstance inst)
        throws InstanceRepositoryException {
    IndexReader reader = null;/*from w  ww  .j  a  v  a  2s  . c  o m*/
    try {
        reader = DirectoryReader.open(indexDir);
    } catch (IOException e) {
        e.printStackTrace();
    }
    try {
        reader = DirectoryReader.open(indexDir);
        IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());

        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        LogMergePolicy lmp = new LogDocMergePolicy();
        lmp.setMergeFactor(mergeFactor);
        config.setMergePolicy(lmp);

        IndexWriter writer = new IndexWriter(indexDir, config);
        LOG.log(Level.FINE, "LuceneWorkflowEngine: remove document from index for workflow instance: ["
                + inst.getId() + "]");
        writer.deleteDocuments(new Term("workflow_inst_id", inst.getId()));
        writer.close();
    } catch (IOException e) {
        LOG.log(Level.SEVERE, e.getMessage());
        LOG.log(Level.WARNING, "Exception removing workflow instance: [" + inst.getId()
                + "] from index: Message: " + e.getMessage());
        throw new InstanceRepositoryException(e.getMessage());
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (Exception ignore) {
            }

        }

    }
}

From source file:org.apache.roller.weblogger.business.search.operations.RebuildWebsiteIndexOperation.java

License:Apache License

public void doRun() {

    Date start = new Date();

    // since this operation can be run on a separate thread we must treat
    // the weblog object passed in as a detached object which is proned to
    // lazy initialization problems, so requery for the object now
    if (this.website != null) {
        mLogger.debug("Reindexining weblog " + website.getHandle());
        try {//ww w .  jav a2  s. co m
            this.website = roller.getWeblogManager().getWeblog(this.website.getId());
        } catch (WebloggerException ex) {
            mLogger.error("Error getting website object", ex);
            return;
        }
    } else {
        mLogger.debug("Reindexining entire site");
    }

    IndexWriter writer = beginWriting();

    try {
        if (writer != null) {

            // Delete Doc
            Term tWebsite = null;
            if (website != null) {
                tWebsite = IndexUtil.getTerm(FieldConstants.WEBSITE_HANDLE, website.getHandle());
            }
            if (tWebsite != null) {
                writer.deleteDocuments(tWebsite);
            } else {
                Term all = IndexUtil.getTerm(FieldConstants.CONSTANT, FieldConstants.CONSTANT_V);
                writer.deleteDocuments(all);
            }

            // Add Doc
            WeblogEntryManager weblogManager = roller.getWeblogEntryManager();
            WeblogEntrySearchCriteria wesc = new WeblogEntrySearchCriteria();
            wesc.setWeblog(website);
            wesc.setStatus(PubStatus.PUBLISHED);
            List<WeblogEntry> entries = weblogManager.getWeblogEntries(wesc);

            mLogger.debug("Entries to index: " + entries.size());

            for (WeblogEntry entry : entries) {
                writer.addDocument(getDocument(entry));
                mLogger.debug(
                        MessageFormat.format("Indexed entry {0}: {1}", entry.getPubTime(), entry.getAnchor()));
            }

            // release the database connection
            roller.release();
        }
    } catch (Exception e) {
        mLogger.error("ERROR adding/deleting doc to index", e);
    } finally {
        endWriting();
        if (roller != null) {
            roller.release();
        }
    }

    Date end = new Date();
    double length = (end.getTime() - start.getTime()) / (double) RollerConstants.SEC_IN_MS;

    if (website == null) {
        mLogger.info("Completed rebuilding index for all users in '" + length + "' secs");
    } else {
        mLogger.info("Completed rebuilding index for website handle: '" + website.getHandle() + "' in '"
                + length + "' seconds");
    }
}

From source file:org.apache.roller.weblogger.business.search.operations.ReIndexEntryOperation.java

License:Apache License

public void doRun() {

    // since this operation can be run on a separate thread we must treat
    // the weblog object passed in as a detached object which is prone to
    // lazy initialization problems, so requery for the object now
    try {//ww  w.j  a v a  2s. c o m
        WeblogEntryManager wMgr = roller.getWeblogEntryManager();
        this.data = wMgr.getWeblogEntry(this.data.getId());
    } catch (WebloggerException ex) {
        mLogger.error("Error getting weblogentry object", ex);
        return;
    }

    IndexWriter writer = beginWriting();
    try {
        if (writer != null) {

            // Delete Doc
            Term term = new Term(FieldConstants.ID, data.getId());
            writer.deleteDocuments(term);

            // Add Doc
            writer.addDocument(getDocument(data));
        }
    } catch (IOException e) {
        mLogger.error("Problems adding/deleting doc to index", e);
    } finally {
        if (roller != null) {
            roller.release();
        }
        endWriting();
    }
}

From source file:org.apache.roller.weblogger.business.search.operations.RemoveEntryOperation.java

License:Apache License

public void doRun() {

    // since this operation can be run on a separate thread we must treat
    // the weblog object passed in as a detached object which is proned to
    // lazy initialization problems, so requery for the object now
    try {/*from ww  w.  j a v  a  2s . c o m*/
        WeblogEntryManager wMgr = roller.getWeblogEntryManager();
        this.data = wMgr.getWeblogEntry(this.data.getId());
    } catch (WebloggerException ex) {
        mLogger.error("Error getting weblogentry object", ex);
        return;
    }

    IndexWriter writer = beginWriting();
    try {
        if (writer != null) {
            Term term = new Term(FieldConstants.ID, data.getId());
            writer.deleteDocuments(term);
        }
    } catch (IOException e) {
        mLogger.error("Error deleting doc from index", e);
    } finally {
        endWriting();
    }
}

From source file:org.apache.roller.weblogger.business.search.operations.RemoveWebsiteIndexOperation.java

License:Apache License

public void doRun() {
    Date start = new Date();

    // since this operation can be run on a separate thread we must treat
    // the weblog object passed in as a detached object which is proned to
    // lazy initialization problems, so requery for the object now
    try {//ww  w  .  j a  v a2s  . c  om
        this.website = roller.getWeblogManager().getWeblog(this.website.getId());
    } catch (WebloggerException ex) {
        mLogger.error("Error getting website object", ex);
        return;
    }

    IndexWriter writer = beginWriting();
    try {
        if (writer != null) {
            String handle = null;
            if (website != null) {
                handle = website.getHandle();
            }
            Term tHandle = IndexUtil.getTerm(FieldConstants.WEBSITE_HANDLE, handle);

            if (tHandle != null) {
                writer.deleteDocuments(tHandle);
            }
        }
    } catch (IOException e) {
        mLogger.info("Problems deleting doc from index", e);
    } finally {
        endWriting();
    }

    Date end = new Date();
    double length = (end.getTime() - start.getTime()) / (double) RollerConstants.SEC_IN_MS;

    if (website != null) {
        mLogger.info("Completed deleting indices for website '" + website.getName() + "' in '" + length
                + "' seconds");
    }
}

From source file:org.apache.servicemix.nmr.audit.lucene.LuceneIndexer.java

License:Apache License

/**
 * Drop objects from Lucene index/*w  w  w  .j a  v a 2 s  .  c  o m*/
 */
protected void remove(String[] ids) throws IOException {
    synchronized (directory) {
        if (ids != null && ids.length > 0) {
            IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(),
                    IndexWriter.MaxFieldLength.LIMITED);
            try {
                for (int i = 0; i < ids.length; i++) {
                    writer.deleteDocuments(new Term(LuceneAuditor.FIELD_ID, ids[i]));
                }
                writer.commit();
            } finally {
                writer.close();
            }
        }
    }
}

From source file:org.apache.solr.codecs.test.testDeleteDocs.java

License:Apache License

public static void main(String[] args) {
    try {/*w  ww  . j a  v a2  s  .c  om*/
        plaintextDir = assureDirectoryExists(new File(INDEX_ROOT_FOLDER));

        //----------- index documents -------
        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_10_0);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_4_10_0, analyzer);
        // recreate the index on each execution
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        //config.setCodec(new SimpleTextCodec()); 

        Properties props = new Properties();
        FileInputStream fstream = new FileInputStream(
                "C:\\work\\search_engine\\codec\\solr410\\solr_codectest\\collection1\\conf\\kvstore.properties");
        props.load(fstream);
        fstream.close();
        ONSQLKVstoreHandler.getInstance().setKVStore("omega", props);
        ONSQLCodec codec = new ONSQLCodec();
        config.setCodec(codec);
        config.setUseCompoundFile(false);
        Directory luceneDir = new ONSQLWrapperDirectory(new File(INDEX_ROOT_FOLDER));
        IndexWriter writer = new IndexWriter(luceneDir, config);
        QueryParser queryParser = new QueryParser(Version.LUCENE_4_10_0, "title", analyzer);
        String search_word = "fourth";
        Query query = queryParser.parse(search_word);
        writer.deleteDocuments(query);
        writer.commit();
        writer.close();
        searchIndex("title", search_word);
    } catch (Throwable te) {
        te.printStackTrace();
    }
}