Example usage for org.apache.lucene.index IndexWriter deleteDocuments

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter deleteDocuments.

Prototype

public long deleteDocuments(Query... queries) throws IOException

Source Link

Document

Deletes the document(s) matching any of the provided queries.

Usage

From source file:org.netbeans.modules.jackpot30.indexing.index.Indexer.java

License:Open Source License

@Override
public void delete(@NonNull Indexable indexable) {
    IndexWriter luceneWriter = access.getIndexWriter(root, cacheRoot, INDEX_NAME);
    String relative = access.getRelativePath(indexable);

    try {/*ww  w .  j  a va 2 s .  c  o m*/
        luceneWriter.deleteDocuments(new Term("languagePath", relative));
    } catch (CorruptIndexException ex) {
        Logger.getLogger(Indexer.class.getName()).log(Level.WARNING, null, ex);
    } catch (IOException ex) {
        Logger.getLogger(Indexer.class.getName()).log(Level.WARNING, null, ex);
    }
}

From source file:org.niord.core.message.MessageLuceneIndex.java

License:Apache License

/**
 * Deletes the given message from the index
 *
 * @param message the message to delete//from   w w w .ja v  a  2  s  .  co m
 */
private void deleteMessageFromIndex(IndexWriter writer, Message message) {
    try {
        Term idTerm = new Term(LUCENE_ID_FIELD, message.getId().toString());
        writer.deleteDocuments(idTerm);
    } catch (IOException e) {
        log.debug("Error deleting message " + message.getId());
    }
}

From source file:org.ofbiz.content.search.DocumentIndexer.java

License:Apache License

@Override
public void run() {
    IndexWriter indexWriter = null;
    int uncommittedDocs = 0;
    while (true) {
        LuceneDocument ofbizDocument;/*w ww .  java2s.co  m*/
        try {
            // Execution will pause here until the queue receives a LuceneDocument for indexing
            ofbizDocument = documentIndexQueue.take();
        } catch (InterruptedException e) {
            Debug.logError(e, module);
            if (indexWriter != null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            break;
        }
        Term documentIdentifier = ofbizDocument.getDocumentIdentifier();
        Document document = ofbizDocument.prepareDocument(this.delegator);
        if (indexWriter == null) {
            try {
                indexWriter = new IndexWriter(this.indexDirectory, new IndexWriterConfig(
                        SearchWorker.LUCENE_VERSION, new StandardAnalyzer(SearchWorker.LUCENE_VERSION)));
            } catch (CorruptIndexException e) {
                Debug.logError("Corrupted lucene index: " + e.getMessage(), module);
                break;
            } catch (LockObtainFailedException e) {
                Debug.logError("Could not obtain Lock on lucene index " + e.getMessage(), module);
                // TODO: put the thread to sleep waiting for the locked to be released
                break;
            } catch (IOException e) {
                Debug.logError(e.getMessage(), module);
                break;
            }
        }
        try {
            if (document == null) {
                indexWriter.deleteDocuments(documentIdentifier);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": deleted Lucene document: " + ofbizDocument, module);
            } else {
                indexWriter.updateDocument(documentIdentifier, document);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": indexed Lucene document: " + ofbizDocument, module);
            }
        } catch (Exception e) {
            Debug.logError(e, getName() + ": error processing Lucene document: " + ofbizDocument, module);
            if (documentIndexQueue.peek() == null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            continue;
        }
        uncommittedDocs++;
        if (uncommittedDocs == UNCOMMITTED_DOC_LIMIT || documentIndexQueue.peek() == null) {
            // limit reached or queue empty, time to commit
            try {
                indexWriter.commit();
            } catch (IOException e) {
                Debug.logError(e, module);
            }
            uncommittedDocs = 0;
        }
        if (documentIndexQueue.peek() == null) {
            try {
                indexWriter.close();
                indexWriter = null;
            } catch (IOException e) {
                Debug.logError(e, module);
            }
        }
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

@Override
public void deleteDocument(String resourceUrl) {
    IndexWriter writer = null;
    try {/*from  w  ww.ja  v  a2  s  .co m*/
        Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
        writer = permanentIndexWriter.getAndLock();
        writer.deleteDocuments(uuidTerm);
    } catch (IOException e) {
        log.error("", e);
    } finally {
        permanentIndexWriter.release(writer);
    }
}

From source file:org.opengrok.indexer.index.IndexAnalysisSettingsAccessor.java

License:Open Source License

/**
 * Writes a document to contain the serialized version of {@code settings},
 * with a {@link QueryBuilder#OBJUID} value set to
 * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. An existing version of the
 * document is first deleted./* ww w.  j a  v a  2 s.c o m*/
 * @param writer a defined, target instance
 * @param settings a defined instance
 * @throws IOException if I/O error occurs while writing Lucene
 */
public void write(IndexWriter writer, IndexAnalysisSettings settings) throws IOException {
    byte[] objser = settings.serialize();

    writer.deleteDocuments(new Term(QueryBuilder.OBJUID, INDEX_ANALYSIS_SETTINGS_OBJUID));

    Document doc = new Document();
    StringField uidfield = new StringField(QueryBuilder.OBJUID, INDEX_ANALYSIS_SETTINGS_OBJUID, Field.Store.NO);
    doc.add(uidfield);
    doc.add(new StoredField(QueryBuilder.OBJSER, objser));
    doc.add(new StoredField(QueryBuilder.OBJVER, INDEX_ANALYSIS_SETTINGS_OBJVER));
    writer.addDocument(doc);
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

public synchronized void removeStatement(Statement statement) throws IOException {
    Value object = statement.getObject();
    if (!(object instanceof Literal)) {
        return;/*w ww  . j a  va  2s  . c  o m*/
    }

    IndexWriter writer = null;
    boolean updated = false;

    // fetch the Document representing this Resource
    String resourceId = getResourceID(statement.getSubject());
    String contextId = getContextID(statement.getContext());
    String id = formIdString(resourceId, contextId);
    Term idTerm = new Term(ID_FIELD_NAME, id);

    Document document = getDocument(idTerm);

    if (document != null) {
        // determine the values used in the index for this triple
        String fieldName = statement.getPredicate().toString();
        String text = ((Literal) object).getLabel();

        // see if this triple occurs in this Document
        if (hasProperty(fieldName, text, document)) {
            // if the Document only has one predicate field, we can remove the
            // document
            int nrProperties = numberOfPropertyFields(document);
            if (nrProperties == 0) {
                logger.info("encountered document with zero properties, should have been deleted: {}",
                        resourceId);
            } else if (nrProperties == 1) {
                writer = getIndexWriter();
                writer.deleteDocuments(idTerm);
                updated = true;
            } else {
                // there are more triples encoded in this Document: remove the
                // document and add a new Document without this triple
                Document newDocument = new Document();
                addID(id, newDocument);
                addResourceID(resourceId, newDocument);
                addContext(contextId, newDocument);

                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    String oldFieldName = oldField.name();
                    String oldValue = oldField.stringValue();

                    if (isPropertyField(oldFieldName)
                            && !(fieldName.equals(oldFieldName) && text.equals(oldValue))) {
                        addProperty(oldFieldName, oldValue, newDocument);
                    }
                }

                writer = getIndexWriter();
                writer.updateDocument(idTerm, newDocument);
                updated = true;
            }
        }
    }

    if (updated) {
        // make sure that these updates are visible for new
        // IndexReaders/Searchers
        writer.commit();

        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Add many statements at the same time, remove many statements at the same
 * time. Ordering by resource has to be done inside this method. The passed
 * added/removed sets are disjunct, no statement can be in both
 * // w  ww  . java2  s  .  com
 * @param added
 *        all added statements, can have multiple subjects
 * @param removed
 *        all removed statements, can have multiple subjects
 */
public synchronized void addRemoveStatements(Collection<Statement> added, Collection<Statement> removed)
        throws Exception {
    // Buffer per resource
    MapOfListMaps<Resource, String, Statement> rsAdded = new MapOfListMaps<Resource, String, Statement>();
    MapOfListMaps<Resource, String, Statement> rsRemoved = new MapOfListMaps<Resource, String, Statement>();

    HashSet<Resource> resources = new HashSet<Resource>();
    for (Statement s : added) {
        rsAdded.add(s.getSubject(), getContextID(s.getContext()), s);
        resources.add(s.getSubject());
    }
    for (Statement s : removed) {
        rsRemoved.add(s.getSubject(), getContextID(s.getContext()), s);
        resources.add(s.getSubject());
    }

    logger.debug("Removing " + removed.size() + " statements, adding " + added.size() + " statements");

    IndexWriter writer = getIndexWriter();

    // for each resource, add/remove
    for (Resource resource : resources) {
        Map<String, List<Statement>> stmtsToRemove = rsRemoved.get(resource);
        Map<String, List<Statement>> stmtsToAdd = rsAdded.get(resource);

        Set<String> contextsToUpdate = new HashSet<String>(stmtsToAdd.keySet());
        contextsToUpdate.addAll(stmtsToRemove.keySet());

        Map<String, Document> docsByContext = new HashMap<String, Document>();
        // is the resource in the store?
        // fetch the Document representing this Resource
        String resourceId = getResourceID(resource);
        Term uriTerm = new Term(URI_FIELD_NAME, resourceId);
        List<Document> documents = getDocuments(uriTerm);

        for (Document doc : documents) {
            docsByContext.put(this.getContextID(doc), doc);
        }

        for (String contextId : contextsToUpdate) {
            String id = formIdString(resourceId, contextId);

            Term idTerm = new Term(ID_FIELD_NAME, id);
            Document document = docsByContext.get(contextId);
            if (document == null) {
                // there are no such Documents: create one now
                document = new Document();
                addID(id, document);
                addResourceID(resourceId, document);
                addContext(contextId, document);
                // add all statements, remember the contexts
                // HashSet<Resource> contextsToAdd = new HashSet<Resource>();
                List<Statement> list = stmtsToAdd.get(contextId);
                if (list != null) {
                    for (Statement s : list) {
                        addProperty(s, document);
                    }
                }

                // add it to the index
                writer.addDocument(document);

                // THERE SHOULD BE NO DELETED TRIPLES ON A NEWLY ADDED RESOURCE
                if (stmtsToRemove.containsKey(contextId))
                    logger.info(
                            "Statements are marked to be removed that should not be in the store, for resource {} and context {}. Nothing done.",
                            resource, contextId);
            } else {
                // update the Document

                // create a copy of the old document; updating the retrieved
                // Document instance works ok for stored properties but indexed
                // data
                // gets lots when doing an IndexWriter.updateDocument with it
                Document newDocument = new Document();

                // buffer the removed literal statements
                ListMap<String, String> removedOfResource = null;
                {
                    List<Statement> removedStatements = stmtsToRemove.get(contextId);
                    if (removedStatements != null && !removedStatements.isEmpty()) {
                        removedOfResource = new ListMap<String, String>();
                        for (Statement r : removedStatements) {
                            if (r.getObject() instanceof Literal) {
                                // remove value from both property field and the
                                // corresponding text field
                                String label = ((Literal) r.getObject()).getLabel();
                                removedOfResource.put(r.getPredicate().toString(), label);
                                removedOfResource.put(TEXT_FIELD_NAME, label);
                            }
                        }
                    }
                }

                // add all existing fields (including id, uri, context, and text)
                // but without adding the removed ones
                // keep the predicate/value pairs to ensure that the statement
                // cannot be added twice
                SetMap<String, String> copiedProperties = new SetMap<String, String>();
                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    // do not copy removed statements to the new version of the
                    // document
                    if (removedOfResource != null) {
                        // which fields were removed?
                        List<String> objectsRemoved = removedOfResource.get(oldField.name());
                        if ((objectsRemoved != null) && (objectsRemoved.contains(oldField.stringValue())))
                            continue;
                    }
                    newDocument.add(oldField);
                    copiedProperties.put(oldField.name(), oldField.stringValue());
                }

                // add all statements to this document, except for those which
                // are already there
                {
                    List<Statement> addedToResource = stmtsToAdd.get(contextId);
                    String val;
                    if (addedToResource != null && !addedToResource.isEmpty()) {
                        for (Statement s : addedToResource) {
                            val = getLiteralPropertyValueAsString(s);
                            if (val != null) {
                                if (!copiedProperties.containsKeyValuePair(s.getPredicate().stringValue(),
                                        val)) {
                                    addProperty(s, newDocument);
                                }
                            }
                        }
                    }
                }

                // update the index with the cloned document, if it contains any
                // meaningful non-system properties
                int nrProperties = numberOfPropertyFields(newDocument);
                if (nrProperties > 0) {
                    writer.updateDocument(idTerm, newDocument);
                } else {
                    writer.deleteDocuments(idTerm);
                }
            }
        }
    }
    // make sure that these updates are visible for new
    // IndexReaders/Searchers
    writer.commit();

    // the old IndexReaders/Searchers are not outdated
    invalidateReaders();

}

From source file:org.opensolaris.opengrok.index.IndexAnalysisSettingsAccessor.java

License:Open Source License

/**
 * Writes a document to contain the serialized version of {@code settings},
 * with a {@link QueryBuilder#OBJUID} value set to
 * {@link #INDEX_ANALYSIS_SETTINGS_OBJUID}. An existing version of the
 * document is first deleted.//w w  w  .j a  va  2 s  .com
 * @param writer a defined, target instance
 * @param settings a defined instance
 * @throws IOException if I/O error occurs while writing Lucene
 */
public void write(IndexWriter writer, IndexAnalysisSettings settings) throws IOException {
    byte[] objser = settings.serialize();

    writer.deleteDocuments(new Term(QueryBuilder.OBJUID, INDEX_ANALYSIS_SETTINGS_OBJUID));

    Document doc = new Document();
    StringField uidfield = new StringField(QueryBuilder.OBJUID, INDEX_ANALYSIS_SETTINGS_OBJUID, Field.Store.NO);
    doc.add(uidfield);
    doc.add(new StoredField(QueryBuilder.OBJSER, objser));
    writer.addDocument(doc);
}

From source file:org.punksearch.crawler.IndexOperator.java

License:Open Source License

public static void deleteByHost(String dir, String host, String hostName) {
    try {/*w w  w.  j a v a  2s .c om*/
        IndexWriter iw = createIndexWriter(dir);

        final BooleanQuery delQuery = new BooleanQuery();
        delQuery.add(new TermQuery(new Term(IndexFields.HOST, host)), BooleanClause.Occur.SHOULD);
        delQuery.add(new TermQuery(new Term(IndexFields.HOST_NAME, hostName)), BooleanClause.Occur.SHOULD);

        iw.deleteDocuments(delQuery);
        iw.close();
    } catch (IOException ex) {
        log.error("Exception during merging index directories", ex);
        throw new RuntimeException(ex);
    }
}

From source file:org.punksearch.crawler.IndexOperator.java

License:Open Source License

public static void deleteByAge(String dirPath, float days) {
    try {// ww  w.j a  v a 2  s .  c o  m
        final Directory dir = LuceneUtils.dir(dirPath);
        boolean indexExists = IndexReader.indexExists(dir);
        if (!indexExists) {
            return;
        }

        final IndexWriter iw = createIndexWriter(dirPath);
        final IndexReader ir = IndexReader.open(dir);
        IndexSearcher is = new IndexSearcher(ir);

        long min = 0;
        long max = System.currentTimeMillis() - Math.round(days * 1000 * 3600 * 24);

        final TermRangeQuery oldDocsQuery = new TermRangeQuery(IndexFields.INDEXED,
                DateTools.timeToString(min, DateTools.Resolution.MILLISECOND),
                DateTools.timeToString(max, DateTools.Resolution.MILLISECOND), true, false);

        final int docsInReader = ir.numDocs();
        final TopDocs topDocs = is.search(oldDocsQuery, Math.max(1, docsInReader));
        log.info("Deleting by age from index directory. Items to delete: " + topDocs.totalHits);

        iw.deleteDocuments(oldDocsQuery);

        iw.close();
    } catch (IOException ex) {
        log.error("Exception during deleting by age from index directory", ex);
        throw new RuntimeException(ex);
    }
}