Example usage for org.apache.lucene.index IndexWriter updateDocument

List of usage examples for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Usage

From source file:org.neo4j.index.impl.lucene.CommitContext.java

License:Open Source License

private void applyDocuments(IndexWriter writer, IndexType type, Map<Long, DocumentContext> documents)
        throws IOException {
    for (Map.Entry<Long, DocumentContext> entry : documents.entrySet()) {
        DocumentContext context = entry.getValue();
        if (context.exists) {
            if (LuceneDataSource.documentIsEmpty(context.document)) {
                writer.deleteDocuments(type.idTerm(context.entityId));
            } else {
                writer.updateDocument(type.idTerm(context.entityId), context.document);
            }//from w w  w.  ja  va2  s  . co m
        } else {
            writer.addDocument(context.document);
        }
    }
}

From source file:org.ofbiz.content.search.DocumentIndexer.java

License:Apache License

@Override
public void run() {
    IndexWriter indexWriter = null;
    int uncommittedDocs = 0;
    while (true) {
        LuceneDocument ofbizDocument;/*from w w w .j a v  a  2  s .c  o  m*/
        try {
            // Execution will pause here until the queue receives a LuceneDocument for indexing
            ofbizDocument = documentIndexQueue.take();
        } catch (InterruptedException e) {
            Debug.logError(e, module);
            if (indexWriter != null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            break;
        }
        Term documentIdentifier = ofbizDocument.getDocumentIdentifier();
        Document document = ofbizDocument.prepareDocument(this.delegator);
        if (indexWriter == null) {
            try {
                indexWriter = new IndexWriter(this.indexDirectory, new IndexWriterConfig(
                        SearchWorker.LUCENE_VERSION, new StandardAnalyzer(SearchWorker.LUCENE_VERSION)));
            } catch (CorruptIndexException e) {
                Debug.logError("Corrupted lucene index: " + e.getMessage(), module);
                break;
            } catch (LockObtainFailedException e) {
                Debug.logError("Could not obtain Lock on lucene index " + e.getMessage(), module);
                // TODO: put the thread to sleep waiting for the locked to be released
                break;
            } catch (IOException e) {
                Debug.logError(e.getMessage(), module);
                break;
            }
        }
        try {
            if (document == null) {
                indexWriter.deleteDocuments(documentIdentifier);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": deleted Lucene document: " + ofbizDocument, module);
            } else {
                indexWriter.updateDocument(documentIdentifier, document);
                if (Debug.infoOn())
                    Debug.logInfo(getName() + ": indexed Lucene document: " + ofbizDocument, module);
            }
        } catch (Exception e) {
            Debug.logError(e, getName() + ": error processing Lucene document: " + ofbizDocument, module);
            if (documentIndexQueue.peek() == null) {
                try {
                    indexWriter.close();
                    indexWriter = null;
                } catch (IOException ioe) {
                    Debug.logError(ioe, module);
                }
            }
            continue;
        }
        uncommittedDocs++;
        if (uncommittedDocs == UNCOMMITTED_DOC_LIMIT || documentIndexQueue.peek() == null) {
            // limit reached or queue empty, time to commit
            try {
                indexWriter.commit();
            } catch (IOException e) {
                Debug.logError(e, module);
            }
            uncommittedDocs = 0;
        }
        if (documentIndexQueue.peek() == null) {
            try {
                indexWriter.close();
                indexWriter = null;
            } catch (IOException e) {
                Debug.logError(e, module);
            }
        }
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

/**
 * Add or update a lucene document in the permanent index.
 * @param uuid/*from   w w w . j ava  2s. c  om*/
 * @param document
 */
@Override
public void addDocuments(List<Document> documents) {
    if (documents == null || documents.isEmpty())
        return;//nothing to do

    IndexWriter writer = null;
    try {
        DirectoryReader currentReader = getReader();
        IndexSearcher searcher = new IndexSearcher(currentReader);
        writer = permanentIndexWriter.getAndLock();

        for (Document document : documents) {
            if (document != null) {
                String resourceUrl = document.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
                Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
                TopDocs hits = searcher.search(new TermQuery(uuidTerm), 10);
                if (hits.totalHits > 0) {
                    writer.updateDocument(uuidTerm, document);
                } else {
                    writer.addDocument(document);
                }
            }
        }
    } catch (IOException e) {
        log.error("", e);
    } finally {
        permanentIndexWriter.release(writer);
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

@Override
public void addDocument(Document document, IndexWriter writer) {
    try {/*w  w  w .  j a v a  2  s  .  c o  m*/
        String resourceUrl = document.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME);
        Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl);
        DirectoryReader currentReader = getReader();
        IndexSearcher searcher = new IndexSearcher(currentReader);
        TopDocs hits = searcher.search(new TermQuery(uuidTerm), 10);
        if (hits.totalHits > 0) {
            writer.updateDocument(uuidTerm, document);
        } else {
            writer.addDocument(document);
        }
    } catch (IOException e) {
        log.error("", e);
    }
}

From source file:org.openeclass.lucene.demo.IndexCourses.java

License:Open Source License

private static void indexCourses(IndexWriter writer, Connection con) throws SQLException, IOException {

    PreparedStatement sql = con// w  w  w.j a va2s  .  co m
            .prepareStatement("SELECT id, title, keywords, code, public_code, prof_names, created FROM course");
    ResultSet rs = sql.executeQuery();
    int c = 0;

    while (rs.next()) {

        Long id = rs.getLong(1);
        String title = rs.getString(2);
        String keys = rs.getString(3);
        String code = rs.getString(4);
        String publicCode = rs.getString(5);
        String profNames = rs.getString(6);
        //Timestamp created = rs.getTimestamp(7);

        Document doc = new Document();

        Field idField = new Field("course_id", id.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED);
        doc.add(idField);

        Field titleField = new Field("title", title, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(titleField);

        Field keysField = new Field("keywords", keys, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(keysField);

        Field codeField = new Field("code", code, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(codeField);

        Field publicCodeField = new Field("public_code", publicCode, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(publicCodeField);

        Field profsField = new Field("prof_names", profNames, Field.Store.YES, Field.Index.ANALYZED);
        doc.add(profsField);

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            writer.addDocument(doc);
        } else {
            writer.updateDocument(new Term("course_id", id.toString()), doc);
        }

        c++;
    }

    System.out.println("total db rows: " + c);
    rs.close();
    sql.close();
}

From source file:org.openerproject.targetproperties.svector.indexing.CustomLuceneIndexer.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from  ww  w  .  j av  a 2  s. c  om
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                log.warn(fnfe);
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Indexes the specified Statement.//w  ww .j a  va  2s . c  o m
 */
public synchronized void addStatement(Statement statement) throws IOException {
    // determine stuff to store
    Value object = statement.getObject();
    if (!(object instanceof Literal)) {
        return;
    }

    String field = statement.getPredicate().toString();
    String text = ((Literal) object).getLabel();
    String context = getContextID(statement.getContext());
    boolean updated = false;
    IndexWriter writer = null;

    // fetch the Document representing this Resource
    String resourceId = getResourceID(statement.getSubject());
    String contextId = getContextID(statement.getContext());

    String id = formIdString(resourceId, contextId);
    Term idTerm = new Term(ID_FIELD_NAME, id);
    Document document = getDocument(idTerm);

    if (document == null) {
        // there is no such Document: create one now
        document = new Document();
        addID(id, document);
        addResourceID(resourceId, document);
        // add context
        addContext(context, document);

        addProperty(field, text, document);

        // add it to the index
        writer = getIndexWriter();
        writer.addDocument(document);
        updated = true;
    } else {
        // update this Document when this triple has not been stored already
        if (!hasProperty(field, text, document)) {
            // create a copy of the old document; updating the retrieved
            // Document instance works ok for stored properties but indexed data
            // gets lots when doing an IndexWriter.updateDocument with it
            Document newDocument = new Document();

            // add all existing fields (including id, uri, context, and text)
            for (Object oldFieldObject : document.getFields()) {
                Field oldField = (Field) oldFieldObject;
                newDocument.add(oldField);
            }

            // add the new triple to the cloned document
            addProperty(field, text, newDocument);

            // update the index with the cloned document
            writer = getIndexWriter();
            writer.updateDocument(idTerm, newDocument);
            updated = true;
        }
    }

    if (updated) {
        // make sure that these updates are visible for new
        // IndexReaders/Searchers
        writer.commit();

        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

public synchronized void removeStatement(Statement statement) throws IOException {
    Value object = statement.getObject();
    if (!(object instanceof Literal)) {
        return;//from   www.j  a va2  s  .  c o  m
    }

    IndexWriter writer = null;
    boolean updated = false;

    // fetch the Document representing this Resource
    String resourceId = getResourceID(statement.getSubject());
    String contextId = getContextID(statement.getContext());
    String id = formIdString(resourceId, contextId);
    Term idTerm = new Term(ID_FIELD_NAME, id);

    Document document = getDocument(idTerm);

    if (document != null) {
        // determine the values used in the index for this triple
        String fieldName = statement.getPredicate().toString();
        String text = ((Literal) object).getLabel();

        // see if this triple occurs in this Document
        if (hasProperty(fieldName, text, document)) {
            // if the Document only has one predicate field, we can remove the
            // document
            int nrProperties = numberOfPropertyFields(document);
            if (nrProperties == 0) {
                logger.info("encountered document with zero properties, should have been deleted: {}",
                        resourceId);
            } else if (nrProperties == 1) {
                writer = getIndexWriter();
                writer.deleteDocuments(idTerm);
                updated = true;
            } else {
                // there are more triples encoded in this Document: remove the
                // document and add a new Document without this triple
                Document newDocument = new Document();
                addID(id, newDocument);
                addResourceID(resourceId, newDocument);
                addContext(contextId, newDocument);

                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    String oldFieldName = oldField.name();
                    String oldValue = oldField.stringValue();

                    if (isPropertyField(oldFieldName)
                            && !(fieldName.equals(oldFieldName) && text.equals(oldValue))) {
                        addProperty(oldFieldName, oldValue, newDocument);
                    }
                }

                writer = getIndexWriter();
                writer.updateDocument(idTerm, newDocument);
                updated = true;
            }
        }
    }

    if (updated) {
        // make sure that these updates are visible for new
        // IndexReaders/Searchers
        writer.commit();

        // the old IndexReaders/Searchers are not outdated
        invalidateReaders();
    }
}

From source file:org.openrdf.sail.lucene.LuceneIndex.java

License:BSD License

/**
 * Add many statements at the same time, remove many statements at the same
 * time. Ordering by resource has to be done inside this method. The passed
 * added/removed sets are disjunct, no statement can be in both
 * //from   w  w w .j  a  v  a 2  s. c  o  m
 * @param added
 *        all added statements, can have multiple subjects
 * @param removed
 *        all removed statements, can have multiple subjects
 */
public synchronized void addRemoveStatements(Collection<Statement> added, Collection<Statement> removed)
        throws Exception {
    // Buffer per resource
    MapOfListMaps<Resource, String, Statement> rsAdded = new MapOfListMaps<Resource, String, Statement>();
    MapOfListMaps<Resource, String, Statement> rsRemoved = new MapOfListMaps<Resource, String, Statement>();

    HashSet<Resource> resources = new HashSet<Resource>();
    for (Statement s : added) {
        rsAdded.add(s.getSubject(), getContextID(s.getContext()), s);
        resources.add(s.getSubject());
    }
    for (Statement s : removed) {
        rsRemoved.add(s.getSubject(), getContextID(s.getContext()), s);
        resources.add(s.getSubject());
    }

    logger.debug("Removing " + removed.size() + " statements, adding " + added.size() + " statements");

    IndexWriter writer = getIndexWriter();

    // for each resource, add/remove
    for (Resource resource : resources) {
        Map<String, List<Statement>> stmtsToRemove = rsRemoved.get(resource);
        Map<String, List<Statement>> stmtsToAdd = rsAdded.get(resource);

        Set<String> contextsToUpdate = new HashSet<String>(stmtsToAdd.keySet());
        contextsToUpdate.addAll(stmtsToRemove.keySet());

        Map<String, Document> docsByContext = new HashMap<String, Document>();
        // is the resource in the store?
        // fetch the Document representing this Resource
        String resourceId = getResourceID(resource);
        Term uriTerm = new Term(URI_FIELD_NAME, resourceId);
        List<Document> documents = getDocuments(uriTerm);

        for (Document doc : documents) {
            docsByContext.put(this.getContextID(doc), doc);
        }

        for (String contextId : contextsToUpdate) {
            String id = formIdString(resourceId, contextId);

            Term idTerm = new Term(ID_FIELD_NAME, id);
            Document document = docsByContext.get(contextId);
            if (document == null) {
                // there are no such Documents: create one now
                document = new Document();
                addID(id, document);
                addResourceID(resourceId, document);
                addContext(contextId, document);
                // add all statements, remember the contexts
                // HashSet<Resource> contextsToAdd = new HashSet<Resource>();
                List<Statement> list = stmtsToAdd.get(contextId);
                if (list != null) {
                    for (Statement s : list) {
                        addProperty(s, document);
                    }
                }

                // add it to the index
                writer.addDocument(document);

                // THERE SHOULD BE NO DELETED TRIPLES ON A NEWLY ADDED RESOURCE
                if (stmtsToRemove.containsKey(contextId))
                    logger.info(
                            "Statements are marked to be removed that should not be in the store, for resource {} and context {}. Nothing done.",
                            resource, contextId);
            } else {
                // update the Document

                // create a copy of the old document; updating the retrieved
                // Document instance works ok for stored properties but indexed
                // data
                // gets lots when doing an IndexWriter.updateDocument with it
                Document newDocument = new Document();

                // buffer the removed literal statements
                ListMap<String, String> removedOfResource = null;
                {
                    List<Statement> removedStatements = stmtsToRemove.get(contextId);
                    if (removedStatements != null && !removedStatements.isEmpty()) {
                        removedOfResource = new ListMap<String, String>();
                        for (Statement r : removedStatements) {
                            if (r.getObject() instanceof Literal) {
                                // remove value from both property field and the
                                // corresponding text field
                                String label = ((Literal) r.getObject()).getLabel();
                                removedOfResource.put(r.getPredicate().toString(), label);
                                removedOfResource.put(TEXT_FIELD_NAME, label);
                            }
                        }
                    }
                }

                // add all existing fields (including id, uri, context, and text)
                // but without adding the removed ones
                // keep the predicate/value pairs to ensure that the statement
                // cannot be added twice
                SetMap<String, String> copiedProperties = new SetMap<String, String>();
                for (Object oldFieldObject : document.getFields()) {
                    Field oldField = (Field) oldFieldObject;
                    // do not copy removed statements to the new version of the
                    // document
                    if (removedOfResource != null) {
                        // which fields were removed?
                        List<String> objectsRemoved = removedOfResource.get(oldField.name());
                        if ((objectsRemoved != null) && (objectsRemoved.contains(oldField.stringValue())))
                            continue;
                    }
                    newDocument.add(oldField);
                    copiedProperties.put(oldField.name(), oldField.stringValue());
                }

                // add all statements to this document, except for those which
                // are already there
                {
                    List<Statement> addedToResource = stmtsToAdd.get(contextId);
                    String val;
                    if (addedToResource != null && !addedToResource.isEmpty()) {
                        for (Statement s : addedToResource) {
                            val = getLiteralPropertyValueAsString(s);
                            if (val != null) {
                                if (!copiedProperties.containsKeyValuePair(s.getPredicate().stringValue(),
                                        val)) {
                                    addProperty(s, newDocument);
                                }
                            }
                        }
                    }
                }

                // update the index with the cloned document, if it contains any
                // meaningful non-system properties
                int nrProperties = numberOfPropertyFields(newDocument);
                if (nrProperties > 0) {
                    writer.updateDocument(idTerm, newDocument);
                } else {
                    writer.deleteDocuments(idTerm);
                }
            }
        }
    }
    // make sure that these updates are visible for new
    // IndexReaders/Searchers
    writer.commit();

    // the old IndexReaders/Searchers are not outdated
    invalidateReaders();

}

From source file:org.segrada.search.lucene.LuceneSearchEngine.java

License:Apache License

@Override
public synchronized boolean index(String id, String className, String title, String subTitles, String content,
        String[] tagIds, Integer color, String iconFileIdentifier, float weight) {
    try {// w  ww .ja  va 2  s. co m
        // init index writer config
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, this.analyzer);

        // create new index writer
        IndexWriter iWriter = new IndexWriter(directory, indexWriterConfig);
        Document doc = new Document();

        doc.add(new Field("id", id, simpleIndexType));
        doc.add(new Field("className", className, simpleIndexType));

        Field field;
        if (title != null) {
            field = new Field("title", title, indexedTextType);
            field.setBoost(10f * weight);
            doc.add(field);
        }

        if (subTitles != null) {
            field = new Field("subTitles", subTitles, indexedTextType);
            field.setBoost(6f * weight);
            doc.add(field);
        }

        // add content
        if (content == null)
            content = "";
        field = new Field("content", content, indexedTextType);
        field.setBoost(weight);
        doc.add(field);

        // add tagIds
        if (tagIds != null)
            for (String tagId : tagIds) {
                field = new Field("tag", tagId, simpleIndexType);
                field.setBoost(weight);
                doc.add(field);
            }

        // add color and icon - just stored
        if (color != null) {
            field = new IntField("color", color, IntField.TYPE_STORED);
            doc.add(field);
        }

        // add color and icon - just stored
        if (iconFileIdentifier != null) {
            field = new Field("iconFileIdentifier", iconFileIdentifier, TextField.TYPE_STORED);
            doc.add(field);
        }

        // create or update document
        iWriter.updateDocument(new Term("id", id), doc);
        iWriter.close();
    } catch (Exception e) {
        logger.error("Could not index document " + id, e);
        return false;
    }

    return true;
}