Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:org.neo4j.index.impl.lucene.CommitContext.java

License:Open Source License

private void applyDocuments(IndexWriter writer, IndexType type, Map<Long, DocumentContext> documents)
        throws IOException {
    for (Map.Entry<Long, DocumentContext> entry : documents.entrySet()) {
        DocumentContext context = entry.getValue();
        if (context.exists) {
            if (LuceneDataSource.documentIsEmpty(context.document)) {
                writer.deleteDocuments(type.idTerm(context.entityId));
            } else {
                writer.updateDocument(type.idTerm(context.entityId), context.document);
            }//from w w  w.  ja  va2  s  . co m
        } else {
            writer.addDocument(context.document);
        }
    }
}

From source file:org.ofbiz.content.search.DocumentIndexer.java

License:Apache License

@Override
public void run() {
    IndexWriter indexWriter = null;
    int uncommittedDocs = 0;
    while (true) {
        LuceneDocument ofbizDocument;/*from w w w .j a v  a  2  s .c  o  m*/
        try {
            // Execution will pause here until the queue receives a LuceneDocument for indexing
            ofbizDocument = documentIndexQueue.take();
        } catch (InterruptedException e) {
            Debug.logError(e, module);
            if (indexWriter != null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            break;
        }
        Term documentIdentifier = ofbizDocument.getDocumentIdentifier();
        Document document = ofbizDocument.prepareDocument(this.delegator);
        if (indexWriter == null) {
            try {
                indexWriter = new IndexWriter(this.indexDirectory, new IndexWriterConfig(
                        SearchWorker.LUCENE_VERSION, new StandardAnalyzer(SearchWorker.LUCENE_VERSION)));
            } catch (CorruptIndexException e) {
                Debug.logError("Corrupted lucene index: " + e.getMessage(), module);
                break;
            } catch (LockObtainFailedException e) {
                Debug.logError("Could not obtain Lock on lucene index " + e.getMessage(), module);
                // TODO: put the thread to sleep waiting for the locked to be released
                break;
            } catch (IOException e) {
                Debug.logError(e.getMessage(), module);
                break;
            }
        }
        try {
            if (document == null) {
                indexWriter.deleteDocuments(documentIdentifier);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": deleted Lucene document: " + ofbizDocument, module);
            } else {
                indexWriter.updateDocument(documentIdentifier, document);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": indexed Lucene document: " + ofbizDocument, module);
            }
        } catch (Exception e) {
            Debug.logError(e, getName() + ": error processing Lucene document: " + ofbizDocument, module);
            if (documentIndexQueue.peek() == null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            continue;
        }
        uncommittedDocs++;
        if (uncommittedDocs == UNCOMMITTED_DOC_LIMIT || documentIndexQueue.peek() == null) {
            // limit reached or queue empty, time to commit
            try {
                indexWriter.commit();
            } catch (IOException e) {
                Debug.logError(e, module);
            }
            uncommittedDocs = 0;
        }
        if (documentIndexQueue.peek() == null) {
            try {
                indexWriter.close();
                indexWriter = null;
            } catch (IOException e) {
                Debug.logError(e, module);
            }
        }
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

/**
 * Add or update a lucene document in the permanent index.
 * @param uuid/*from   w w w . j ava  2s. c  om*/
 * @param document
 */
@Override
public void addDocuments(List<Document> documents) {
    if (documents == null || documents.isEmpty())
        return;//nothing to do

    IndexWriter writer = null;
    try {
        DirectoryReader currentReader = getReader();
        IndexSearcher searcher = new IndexSearcher(currentReader);
        writer = permanentIndexWriter.getAndLock();

        for (Document document : documents) {
            if (document != null) {
                String resourceUrl = document.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
                Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
                TopDocs hits = searcher.search(new TermQuery(uuidTerm), 10);
                if (hits.totalHits > 0) {
                    writer.updateDocument(uuidTerm, document);
                } else {
                    writer.addDocument(document);
                }
            }
        }
    } catch (IOException e) {
        log.error("", e);
    } finally {
        permanentIndexWriter.release(writer);
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

@Override
public void addDocument(Document document, IndexWriter writer) {
    try {/*w  w  w .  j a v a  2  s  .  c o  m*/
        String resourceUrl = document.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
        Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
        DirectoryReader currentReader = getReader();
        IndexSearcher searcher = new IndexSearcher(currentReader);
        TopDocs hits = searcher.search(new TermQuery(uuidTerm), 10);
        if (hits.totalHits > 0) {
            writer.updateDocument(uuidTerm, document);
        } else {
            writer.addDocument(document);
        }
    } catch (IOException e) {
        log.error("", e);
    }
}

From source file:org.openeclass.lucene.demo.IndexCourses.java

License:Open Source License

private static void indexCourses(IndexWriter writer, Connection con) throws SQLException, IOException {

    PreparedStatement sql = con// w  w  w.j a va2s  .  co m
            .prepareStatement("SELECT id, title, keywords, code, public_code, prof_names, created FROM course");
    ResultSet rs = sql.executeQuery();
    int c = 0;

    while (rs.next()) {

        Long id = rs.getLong(1);
        String title = rs.getString(2);
        String keys = rs.getString(3);
        String code = rs.getString(4);
        String publicCode = rs.getString(5);
        String profNames = rs.getString(6);
        //Timestamp created = rs.getTimestamp(7);

        Document doc = new Document();

        Field idField = new Field("course_id", id.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        doc.add(idField);

        Field titleField = new Field("title", title, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(titleField);

        Field keysField = new Field("keywords", keys, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(keysField);

        Field codeField = new Field("code", code, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(codeField);

        Field publicCodeField = new Field("public_code", publicCode, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(publicCodeField);

        Field profsField = new Field("prof_names", profNames, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(profsField);

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            writer.addDocument(doc);
        } else {
            writer.updateDocument(new Term("course_id", id.toString()), doc);
        }

        c++;
    }

    System.out.println("total db rows: " + c);
    rs.close();
    sql.close();
}

From source file:org.openerproject.targetproperties.svector.indexing.CustomLuceneIndexer.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from  ww  w  .  j av  a 2  s. c  om
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                log.warn(fnfe);
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Indexes the specified Statement.//w  ww .j a  va  2s . c  o m
 */
public synchronized void addStatement(Statement statement) throws IOException {
    // determine stuff to store
    Value object = statement.getObject();
    if (!(object instanceof Literal)) {
        return;
    }

    String field = statement.getPredicate().toString();
    String text = ((Literal) object).getLabel();
    String context = getContextID(statement.getContext());
    boolean updated = false;
    IndexWriter writer = null;

    // fetch the Document representing this Resource
    String resourceId = getResourceID(statement.getSubject());
    String contextId = getContextID(statement.getContext());

    String id = formIdString(resourceId, contextId);
    Term idTerm = new Term(ID_FIELD_NAME, id);
    Document document = getDocument(idTerm);

    if (document == null) {
        // there is no such Document: create one now
        document = new Document();
        addID(id, document);
        addResourceID(resourceId, document);
        // add context
        addContext(context, document);

        addProperty(field, text, document);

        // add it to the index
        writer = getIndexWriter();
        writer.addDocument(document);
        updated = true;
    } else {
        // update this Document when this triple has not been stored already
        if (!hasProperty(field, text, document)) {
            // create a copy of the old document; updating the retrieved
            // Document instance works ok for stored properties but indexed data
            // gets lots when doing an IndexWriter.updateDocument with it
            Document newDocument = new Document();

            // add all existing fields (including id, uri, context, and text)
            for (Object oldFieldObject : document.getFields()) {
                Field oldField = (Field) oldFieldObject;
                newDocument.add(oldField);
            }

            // add the new triple to the cloned document
            addProperty(field, text, newDocument);

            // update the index with the cloned document
            writer = getIndexWriter();
            writer.updateDocument(idTerm, newDocument);
            updated = true;
        }
    }

    if (updated) {
        // make sure that these updates are visible for new
        // IndexReaders/Searchers
        writer.commit();

        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

public synchronized void removeStatement(Statement statement) throws IOException {
    Value object = statement.getObject();
    if (!(object instanceof Literal)) {
        return;//from   www.j  a va2  s  .  c o  m
    }

    IndexWriter writer = null;
    boolean updated = false;

    // fetch the Document representing this Resource
    String resourceId = getResourceID(statement.getSubject());
    String contextId = getContextID(statement.getContext());
    String id = formIdString(resourceId, contextId);
    Term idTerm = new Term(ID_FIELD_NAME, id);

    Document document = getDocument(idTerm);

    if (document != null) {
        // determine the values used in the index for this triple
        String fieldName = statement.getPredicate().toString();
        String text = ((Literal) object).getLabel();

        // see if this triple occurs in this Document
        if (hasProperty(fieldName, text, document)) {
            // if the Document only has one predicate field, we can remove the
            // document
            int nrProperties = numberOfPropertyFields(document);
            if (nrProperties == 0) {
                logger.info("encountered document with zero properties, should have been deleted: {}",
                        resourceId);
            } else if (nrProperties == 1) {
                writer = getIndexWriter();
                writer.deleteDocuments(idTerm);
                updated = true;
            } else {
                // there are more triples encoded in this Document: remove the
                // document and add a new Document without this triple
                Document newDocument = new Document();
                addID(id, newDocument);
                addResourceID(resourceId, newDocument);
                addContext(contextId, newDocument);

                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    String oldFieldName = oldField.name();
                    String oldValue = oldField.stringValue();

                    if (isPropertyField(oldFieldName)
                            && !(fieldName.equals(oldFieldName) && text.equals(oldValue))) {
                        addProperty(oldFieldName, oldValue, newDocument);
                    }
                }

                writer = getIndexWriter();
                writer.updateDocument(idTerm, newDocument);
                updated = true;
            }
        }
    }

    if (updated) {
        // make sure that these updates are visible for new
        // IndexReaders/Searchers
        writer.commit();

        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Add many statements at the same time, remove many statements at the same
 * time. Ordering by resource has to be done inside this method. The passed
 * added/removed sets are disjunct, no statement can be in both
 * //from   w  w w .j  a  v  a 2  s. c  o  m
 * @param added
 *        all added statements, can have multiple subjects
 * @param removed
 *        all removed statements, can have multiple subjects
 */
public synchronized void addRemoveStatements(Collection<Statement> added, Collection<Statement> removed)
        throws Exception {
    // Buffer per resource
    MapOfListMaps<Resource, String, Statement> rsAdded = new MapOfListMaps<Resource, String, Statement>();
    MapOfListMaps<Resource, String, Statement> rsRemoved = new MapOfListMaps<Resource, String, Statement>();

    HashSet<Resource> resources = new HashSet<Resource>();
    for (Statement s : added) {
        rsAdded.add(s.getSubject(), getContextID(s.getContext()), s);
        resources.add(s.getSubject());
    }
    for (Statement s : removed) {
        rsRemoved.add(s.getSubject(), getContextID(s.getContext()), s);
        resources.add(s.getSubject());
    }

    logger.debug("Removing " + removed.size() + " statements, adding " + added.size() + " statements");

    IndexWriter writer = getIndexWriter();

    // for each resource, add/remove
    for (Resource resource : resources) {
        Map<String, List<Statement>> stmtsToRemove = rsRemoved.get(resource);
        Map<String, List<Statement>> stmtsToAdd = rsAdded.get(resource);

        Set<String> contextsToUpdate = new HashSet<String>(stmtsToAdd.keySet());
        contextsToUpdate.addAll(stmtsToRemove.keySet());

        Map<String, Document> docsByContext = new HashMap<String, Document>();
        // is the resource in the store?
        // fetch the Document representing this Resource
        String resourceId = getResourceID(resource);
        Term uriTerm = new Term(URI_FIELD_NAME, resourceId);
        List<Document> documents = getDocuments(uriTerm);

        for (Document doc : documents) {
            docsByContext.put(this.getContextID(doc), doc);
        }

        for (String contextId : contextsToUpdate) {
            String id = formIdString(resourceId, contextId);

            Term idTerm = new Term(ID_FIELD_NAME, id);
            Document document = docsByContext.get(contextId);
            if (document == null) {
                // there are no such Documents: create one now
                document = new Document();
                addID(id, document);
                addResourceID(resourceId, document);
                addContext(contextId, document);
                // add all statements, remember the contexts
                // HashSet<Resource> contextsToAdd = new HashSet<Resource>();
                List<Statement> list = stmtsToAdd.get(contextId);
                if (list != null) {
                    for (Statement s : list) {
                        addProperty(s, document);
                    }
                }

                // add it to the index
                writer.addDocument(document);

                // THERE SHOULD BE NO DELETED TRIPLES ON A NEWLY ADDED RESOURCE
                if (stmtsToRemove.containsKey(contextId))
                    logger.info(
                            "Statements are marked to be removed that should not be in the store, for resource {} and context {}. Nothing done.",
                            resource, contextId);
            } else {
                // update the Document

                // create a copy of the old document; updating the retrieved
                // Document instance works ok for stored properties but indexed
                // data
                // gets lots when doing an IndexWriter.updateDocument with it
                Document newDocument = new Document();

                // buffer the removed literal statements
                ListMap<String, String> removedOfResource = null;
                {
                    List<Statement> removedStatements = stmtsToRemove.get(contextId);
                    if (removedStatements != null && !removedStatements.isEmpty()) {
                        removedOfResource = new ListMap<String, String>();
                        for (Statement r : removedStatements) {
                            if (r.getObject() instanceof Literal) {
                                // remove value from both property field and the
                                // corresponding text field
                                String label = ((Literal) r.getObject()).getLabel();
                                removedOfResource.put(r.getPredicate().toString(), label);
                                removedOfResource.put(TEXT_FIELD_NAME, label);
                            }
                        }
                    }
                }

                // add all existing fields (including id, uri, context, and text)
                // but without adding the removed ones
                // keep the predicate/value pairs to ensure that the statement
                // cannot be added twice
                SetMap<String, String> copiedProperties = new SetMap<String, String>();
                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    // do not copy removed statements to the new version of the
                    // document
                    if (removedOfResource != null) {
                        // which fields were removed?
                        List<String> objectsRemoved = removedOfResource.get(oldField.name());
                        if ((objectsRemoved != null) && (objectsRemoved.contains(oldField.stringValue())))
                            continue;
                    }
                    newDocument.add(oldField);
                    copiedProperties.put(oldField.name(), oldField.stringValue());
                }

                // add all statements to this document, except for those which
                // are already there
                {
                    List<Statement> addedToResource = stmtsToAdd.get(contextId);
                    String val;
                    if (addedToResource != null && !addedToResource.isEmpty()) {
                        for (Statement s : addedToResource) {
                            val = getLiteralPropertyValueAsString(s);
                            if (val != null) {
                                if (!copiedProperties.containsKeyValuePair(s.getPredicate().stringValue(),
                                        val)) {
                                    addProperty(s, newDocument);
                                }
                            }
                        }
                    }
                }

                // update the index with the cloned document, if it contains any
                // meaningful non-system properties
                int nrProperties = numberOfPropertyFields(newDocument);
                if (nrProperties > 0) {
                    writer.updateDocument(idTerm, newDocument);
                } else {
                    writer.deleteDocuments(idTerm);
                }
            }
        }
    }
    // make sure that these updates are visible for new
    // IndexReaders/Searchers
    writer.commit();

    // the old IndexReaders/Searchers are not outdated
    invalidateReaders();

}

From source file:org.segrada.search.lucene.LuceneSearchEngine.java

License:Apache License

@Override
public synchronized boolean index(String id, String className, String title, String subTitles, String content,
        String[] tagIds, Integer color, String iconFileIdentifier, float weight) {
    try {// w  ww .ja  va 2  s. co m
        // init index writer config
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, this.analyzer);

        // create new index writer
        IndexWriter iWriter = new IndexWriter(directory, indexWriterConfig);
        Document doc = new Document();

        doc.add(new Field("id", id, simpleIndexType));
        doc.add(new Field("className", className, simpleIndexType));

        Field field;
        if (title != null) {
            field = new Field("title", title, indexedTextType);
            field.setBoost(10f * weight);
            doc.add(field);
        }

        if (subTitles != null) {
            field = new Field("subTitles", subTitles, indexedTextType);
            field.setBoost(6f * weight);
            doc.add(field);
        }

        // add content
        if (content == null)
            content = "";
        field = new Field("content", content, indexedTextType);
        field.setBoost(weight);
        doc.add(field);

        // add tagIds
        if (tagIds != null)
            for (String tagId : tagIds) {
                field = new Field("tag", tagId, simpleIndexType);
                field.setBoost(weight);
                doc.add(field);
            }

        // add color and icon - just stored
        if (color != null) {
            field = new IntField("color", color, IntField.TYPE_STORED);
            doc.add(field);
        }

        // add color and icon - just stored
        if (iconFileIdentifier != null) {
            field = new Field("iconFileIdentifier", iconFileIdentifier, TextField.TYPE_STORED);
            doc.add(field);
        }

        // create or update document
        iWriter.updateDocument(new Term("id", id), doc);
        iWriter.close();
    } catch (Exception e) {
        logger.error("Could not index document " + id, e);
        return false;
    }

    return true;
}