Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:org.explore3.searchengine.indexCreator.ImageIndex.java

License:Apache License

static void indexDocument(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocument(writer, new File(file, files[i]));
                }/*from   w  w  w  .ja  v a 2s . c o  m*/
            }
        }

        else {

            try {

                Map<String, String> infoWithField = HtmlImageParsing.parse(file);

                // make a new, empty document
                Document doc = new Document();

                FieldType type = new FieldType();
                type.setIndexed(true);
                type.setStored(true);
                type.setStoreTermVectors(true);
                type.setTokenized(true);
                type.setStoreTermVectorOffsets(true);

                if (!infoWithField.isEmpty()) {
                    Field image = new TextField("image", infoWithField.get("image").toString(),
                            Field.Store.YES);
                    System.out.println(infoWithField.get("image").toString());
                    doc.add(image);
                    Field title = new StringField("title", infoWithField.get("title"), Field.Store.YES);
                    doc.add(title);

                    Field path = new StringField("path", file.getPath(), Field.Store.YES);
                    doc.add(path);
                }

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //System.out.println("adding " + file);
                    writer.addDocument(doc);
                }

                else {
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
            }
        }
    }
}

From source file:org.explore3.searchengine.indexCreator.Indexer.java

License:Apache License

static void indexDocument(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocument(writer, new File(file, files[i]));
                }//from w ww  .  j  a  va 2s . c  o m
            }
        }

        else {

            try {

                Map<String, String> infoWithField = HtmlParsing.parse(file);

                // make a new, empty document
                Document doc = new Document();

                Field title = new TextField("title", infoWithField.get("title"), Field.Store.YES);
                doc.add(title);
                Field path = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(path);

                FieldType type = new FieldType();
                type.setIndexed(true);
                type.setStored(true);
                type.setStoreTermVectors(true);
                type.setTokenized(true);
                type.setStoreTermVectorOffsets(true);
                Field highlighter = new Field("highlighterWords", infoWithField.get("text"), type);
                doc.add(highlighter);
                Field contents = new TextField("words", infoWithField.get("text"), Field.Store.YES);
                doc.add(contents);

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                }

                else {
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
            }
        }
    }
}

From source file:org.fnlp.app.lucene.demo.BuildIndex.java

License:Open Source License

/**
 * @param args//w w w  .  j  av  a2 s . c  o  m
 * @throws IOException 
 * @throws LoadModelException 
 */
public static void main(String[] args) throws IOException, LoadModelException {
    String indexPath = "../tmp/lucene";
    System.out.println("Indexing to directory '" + indexPath + "'...");
    Date start = new Date();
    Directory dir = FSDirectory.open(new File(indexPath));//Dirctory dir-->FSDirectory
    //?? CNFactory
    CNFactory factory = CNFactory.getInstance("../models", Models.SEG_TAG);
    Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, iwc);

    String[] strs = new String[] { "?",
            "?????????",
            "????", "" };
    //Date start = new Date();
    for (int i = 0; i < strs.length; i++) {

        Document doc = new Document();

        Field field = new TextField("content", strs[i], Field.Store.YES);
        doc.add(field);
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            writer.addDocument(doc);
        } else {
            writer.updateDocument(new Term("content", strs[i]), doc);
        }
    }
    writer.close();

    //??????
    //dir.close();
    //??????

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:org.frontcache.cache.impl.LuceneIndexManager.java

License:Apache License

/**
 * Writes webResponse to index//from ww  w . j a v  a2s  .  com
 * @param response
 * @throws IOException
 */
void indexDoc(WebResponse response) throws IOException {

    IndexWriter iWriter = getIndexWriter();

    Document doc = new Document();

    String url = response.getUrl();

    if (null == url) {
        logger.error("URL can't be null during index time for " + response);
        return;
    }

    doc.add(new StringField(URL_FIELD, url, Field.Store.YES));

    doc.add(new StringField(DOMAIN_FIELD, response.getDomain(), Field.Store.YES));

    if (null != response.getContent())
        doc.add(new StoredField(BIN_FIELD, response.getContent()));

    //      doc.add(new NumericDocValuesField(EXPIRE_DATE_FIELD, response.getExpireTimeMillis())); // TODO: store map ?

    doc.add(new StoredField(JSON_FIELD, gson.toJson(response), JSON_TYPE));

    for (String tag : response.getTags())
        doc.add(new StringField(TAGS_FIELD, tag, Field.Store.NO)); // tag is StringField to exact match

    try {
        iWriter.updateDocument(new Term(URL_FIELD, url), doc);
    } catch (IOException e) {
        logger.error("Error while in Lucene index operation: {}", e.getMessage(), e);

    } finally {
        try {
            iWriter.commit();
        } catch (IOException ioEx) {
            logger.error("Error while commiting changes to Lucene index: {}", ioEx.getMessage(), ioEx);
        }
    }

}

From source file:org.imixs.workflow.plugins.jee.extended.LucenePlugin.java

License:Open Source License

/**
 * This method updates the search index for a collection of workitems.
 * //  w  w  w .ja  va2s  .  co  m
 * For each workitem the method test if it did match the conditions to be
 * added into the search index. If the workitem did not match the conditions
 * the workitem will be removed from the index.
 * 
 * The method loads the lucene index properties from the imixs.properties
 * file from the classpath. If no properties are defined the method
 * terminates.
 * 
 * 
 * @param worklist
 *            of ItemCollections to be indexed
 * @return - true if the update was successfull
 * @throws Exception
 */
public static boolean updateWorklist(Collection<ItemCollection> worklist) throws PluginException {

    IndexWriter awriter = null;
    // try loading imixs-search properties
    Properties prop = loadProperties();
    if (prop.isEmpty())
        return false;

    try {
        awriter = createIndexWriter(prop);

        // add workitem to search index....

        for (ItemCollection workitem : worklist) {
            // create term
            Term term = new Term("$uniqueid", workitem.getItemValueString("$uniqueid"));
            // test if document should be indexed or not
            if (matchConditions(prop, workitem))
                awriter.updateDocument(term, createDocument(prop, workitem));
            else
                awriter.deleteDocuments(term);

        }
    } catch (IOException luceneEx) {
        // close writer!
        logger.warning(" Lucene Exception : " + luceneEx.getMessage());

        throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX,
                "Unable to update search index", luceneEx);

    } finally {

        if (awriter != null) {
            logger.fine(" close writer");
            try {
                awriter.close();
            } catch (CorruptIndexException e) {
                throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX,
                        "Unable to update search index", e);
            } catch (IOException e) {
                throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX,
                        "Unable to update search index", e);
            }

        }
    }

    logger.fine(" update worklist successfull");
    return true;
}

From source file:org.infoglue.cms.controllers.kernel.impl.simple.LuceneController.java

License:Open Source License

private void indexInformation(NotificationMessage notificationMessage) {
    try {/*  ww  w  .ja  v  a  2s . co m*/
        //Object objectIdentity = getObjectIdentity(object);

        IndexWriter writer = null;
        try {
            String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator
                    + "index";

            File INDEX_DIR = new File(index);
            writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(new String[] {}));
            writer.setMaxMergeDocs(500000);
            if (logger.isInfoEnabled())
                logger.info("Indexing to directory '" + INDEX_DIR + "'...");

            if (notificationMessage.getType() == NotificationMessage.TRANS_CREATE) {
                List<Document> documents = getDocuments(notificationMessage);
                Iterator<Document> documentsIterator = documents.iterator();
                while (documentsIterator.hasNext()) {
                    Document indexingDocument = documentsIterator.next();
                    String uid = indexingDocument.get("uid");
                    if (logger.isInfoEnabled())
                        logger.info("Adding document with uid:" + uid + " - " + indexingDocument);
                    if (indexingDocument != null)
                        writer.addDocument(indexingDocument);
                }
            } else if (notificationMessage.getType() == NotificationMessage.TRANS_UPDATE) {
                List<Document> documents = getDocuments(notificationMessage);
                Iterator<Document> documentsIterator = documents.iterator();
                while (documentsIterator.hasNext()) {
                    Document indexingDocument = documentsIterator.next();
                    String uid = indexingDocument.get("uid");
                    if (logger.isInfoEnabled())
                        logger.info("Updating document with uid:" + uid + " - " + indexingDocument);
                    if (indexingDocument != null)
                        writer.updateDocument(new Term("uid", "" + uid), indexingDocument);
                }
            } else if (notificationMessage.getType() == NotificationMessage.TRANS_DELETE) {
                String uid = "";
                if (notificationMessage.getClassName().equals(ContentImpl.class.getName())) {
                    uid = "contentId_" + notificationMessage.getObjectId();
                } else if (notificationMessage.getClassName().equals(ContentVersionImpl.class.getName())) {
                    uid = "contentVersionId_" + notificationMessage.getObjectId();
                } else if (notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName())) {
                    uid = "digitalAssetId_" + notificationMessage.getObjectId();
                }

                if (logger.isInfoEnabled())
                    logger.info("Deleting documents:" + "uid=" + uid);
                writer.deleteDocuments(new Term("uid", "" + uid));
            }
        } catch (Exception e) {
            logger.error("Error indexing:" + e.getMessage(), e);
        } finally {
            indexedDocumentsSinceLastOptimize++;
            if (indexedDocumentsSinceLastOptimize > 250) {
                logger.info("Optimizing...");
                writer.optimize();
                indexedDocumentsSinceLastOptimize = 0;
            }
            writer.close();
        }
    } catch (Exception e) {
        logger.error("Error indexing:" + e.getMessage(), e);
    }
}

From source file:org.infoglue.cms.controllers.kernel.impl.simple.SearchController.java

License:Open Source License

/**
 * This method searches with lucene//from w w  w  . ja  va2 s  .c  o m
 */
private List<ContentVersionVO> getContentVersionVOListFromLucene(Integer[] repositoryIdAsIntegerToSearch,
        String searchString, int maxRows, String userName, Integer languageId,
        Integer[] contentTypeDefinitionIds, Integer caseSensitive, Integer stateId, boolean includeAssets)
        throws SystemException, Bug {
    List<ContentVersionVO> contentVersionVOList = new ArrayList<ContentVersionVO>();

    try {
        String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator
                + "index";

        boolean indexExists = IndexReader.indexExists(new File(index));
        if (!indexExists) {
            try {
                File INDEX_DIR = new File(index);
                IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer());
                logger.info("Indexing to directory '" + INDEX_DIR + "'...");
                writer.updateDocument(new Term("initializer", "true"), getDocument("initializer"));
                logger.info("Optimizing...");
                writer.optimize();
                writer.close();
            } catch (Exception e) {
                logger.error("An error creating index:" + e.getMessage(), e);
            }
        }

        /*if(searchString.indexOf(" ") > -1)
           searchString = "\"" + searchString + "\"";
        else*/ if (!searchString.endsWith("*"))
            searchString = searchString + "*";

        List<String> fieldNames = new ArrayList<String>();
        List<String> queryStrings = new ArrayList<String>();
        List<BooleanClause.Occur> booleanList = new ArrayList<BooleanClause.Occur>();

        if (repositoryIdAsIntegerToSearch != null && repositoryIdAsIntegerToSearch.length > 0) {
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < repositoryIdAsIntegerToSearch.length; i++) {
                if (i > 0)
                    sb.append(" OR ");
                sb.append("" + repositoryIdAsIntegerToSearch[i]);
            }

            if (sb.length() > 0) {
                fieldNames.add("repositoryId");
                queryStrings.add("" + sb.toString());
                booleanList.add(BooleanClause.Occur.MUST);
            }
        }
        if (languageId != null && languageId.intValue() > 0) {
            fieldNames.add("languageId");
            queryStrings.add("" + languageId);
            booleanList.add(BooleanClause.Occur.MUST);
        }
        if (contentTypeDefinitionIds != null && contentTypeDefinitionIds.length > 0) {
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < contentTypeDefinitionIds.length; i++) {
                Integer contentTypeDefinitionId = contentTypeDefinitionIds[i];
                if (contentTypeDefinitionId != null) {
                    if (i > 0)
                        sb.append(" OR ");
                    sb.append("" + contentTypeDefinitionId);
                }
            }

            if (sb.length() > 0) {
                fieldNames.add("contentTypeDefinitionId");
                queryStrings.add("" + sb.toString());
                booleanList.add(BooleanClause.Occur.MUST);
            }
        }
        if (userName != null && !userName.equals("")) {
            fieldNames.add("lastModifier");
            queryStrings.add("" + userName);
            booleanList.add(BooleanClause.Occur.MUST);
        }
        if (stateId != null && !stateId.equals("")) {
            fieldNames.add("stateId");
            queryStrings.add("" + stateId);
            booleanList.add(BooleanClause.Occur.MUST);
        }
        if (!includeAssets) {
            fieldNames.add("isAsset");
            queryStrings.add("true");
            booleanList.add(BooleanClause.Occur.MUST_NOT);
        }

        if (searchString != null && searchString.length() > 0) {
            fieldNames.add("contents");
            queryStrings.add(searchString);
            booleanList.add(BooleanClause.Occur.MUST);
        }

        String[] fields = new String[fieldNames.size()];
        fields = (String[]) fieldNames.toArray(fields);

        String[] queries = new String[fieldNames.size()];
        queries = (String[]) queryStrings.toArray(queries);

        BooleanClause.Occur[] flags = new BooleanClause.Occur[fieldNames.size()];
        flags = (BooleanClause.Occur[]) booleanList.toArray(flags);

        IndexReader reader = IndexReader.open(index);

        Searcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(new String[] {});

        Query query = MultiFieldQueryParser.parse(queries, fields, flags, analyzer);
        logger.info("Searching for: " + query.toString());
        //System.out.println("Searching for: " + query.toString());

        Hits hits = searcher.search(query);

        logger.info(hits.length() + " total matching documents");

        final int HITS_PER_PAGE = new Integer(maxRows);
        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
            int end = Math.min(hits.length(), start + HITS_PER_PAGE);
            for (int i = start; i < end; i++) {
                org.apache.lucene.document.Document doc = hits.doc(i);
                String contentVersionId = doc.get("contentVersionId");
                String contentId = doc.get("contentId");
                logger.info("doc:" + doc);
                logger.info("contentVersionId:" + contentVersionId);
                logger.info("contentId:" + contentId);

                if (contentVersionId == null && contentId != null) {
                    try {
                        ContentVO cvo = ContentController.getContentController()
                                .getContentVOWithId(new Integer(contentId));
                        logger.info("cvo:" + cvo);

                        String path = doc.get("path");
                        if (path != null) {
                            logger.info((i + 1) + ". " + path);
                            String title = doc.get("title");
                            if (title != null) {
                                logger.info("   Title: " + doc.get("title"));
                            }
                        } else {
                            logger.info((i + 1) + ". " + "No path for this document");
                        }
                    } catch (Exception e) {
                        logger.error("ContentVersion with id:" + contentVersionId
                                + " was not valid - skipping but how did the index become corrupt?");
                        deleteVersionFromIndex(contentVersionId);
                    }
                } else {
                    try {
                        ContentVersionVO cvvo = ContentVersionController.getContentVersionController()
                                .getFullContentVersionVOWithId(new Integer(contentVersionId));
                        logger.info("cvvo:" + cvvo);
                        contentVersionVOList.add(cvvo);

                        String path = doc.get("path");
                        if (path != null) {
                            logger.info((i + 1) + ". " + path);
                            String title = doc.get("title");
                            if (title != null) {
                                logger.info("   Title: " + doc.get("title"));
                            }
                        } else {
                            logger.info((i + 1) + ". " + "No path for this document");
                        }
                    } catch (Exception e) {
                        logger.error("ContentVersion with id:" + contentVersionId
                                + " was not valid - skipping but how did the index become corrupt?");
                        deleteVersionFromIndex(contentVersionId);
                    }
                }
            }

            if (queries != null) // non-interactive
                break;
        }

        reader.close();
    } catch (Exception e) {
        logger.error("Error searching:" + e.getMessage(), e);
    }

    return contentVersionVOList;
}

From source file:org.janusgraph.diskstorage.lucene.LuceneExample.java

License:Apache License

void indexDocs(IndexWriter writer, String documentId, Map<String, Object> docMap) throws IOException {
    Document doc = new Document();

    Field documentIdField = new StringField("docid", documentId, Field.Store.YES);
    doc.add(documentIdField);//from  w  w  w.  jav a  2  s  .c  o  m

    for (Map.Entry<String, Object> kv : docMap.entrySet()) {
        String key = kv.getKey();
        Object value = kv.getValue();

        if (value instanceof Number) {
            final Field field;
            if (value instanceof Integer || value instanceof Long) {
                field = new LongPoint(key, ((Number) value).longValue());
            } else { //double or float
                field = new DoublePoint(key, ((Number) value).doubleValue());
            }
            doc.add(field);
        } else if (value instanceof String) {
            String str = (String) value;
            Field field = new TextField(key + TXT_SUFFIX, str, Field.Store.NO);
            doc.add(field);
            if (str.length() < 256)
                field = new StringField(key + STR_SUFFIX, str, Field.Store.NO);
            doc.add(field);
        } else if (value instanceof Geoshape) {
            Shape shape = ((Geoshape) value).getShape();
            for (IndexableField f : getSpatialStrategy(key).createIndexableFields(shape)) {
                doc.add(f);
            }
        } else
            throw new IllegalArgumentException("Unsupported type: " + value);
    }

    writer.updateDocument(new Term("docid", documentId), doc);

}

From source file:org.janusgraph.diskstorage.lucene.LuceneIndex.java

License:Apache License

private void mutateStores(Map.Entry<String, Map<String, IndexMutation>> stores,
        KeyInformation.IndexRetriever information) throws IOException, BackendException {
    IndexReader reader = null;/*from  w w  w  .  j a  v  a 2 s  .  c  om*/
    try {
        final String storeName = stores.getKey();
        final IndexWriter writer = getWriter(storeName, information);
        reader = DirectoryReader.open(writer, true, true);
        final IndexSearcher searcher = new IndexSearcher(reader);
        for (final Map.Entry<String, IndexMutation> entry : stores.getValue().entrySet()) {
            final String documentId = entry.getKey();
            final IndexMutation mutation = entry.getValue();

            if (mutation.isDeleted()) {
                if (log.isTraceEnabled())
                    log.trace("Deleted entire document [{}]", documentId);

                writer.deleteDocuments(new Term(DOCID, documentId));
                continue;
            }

            final Pair<Document, Map<String, Shape>> docAndGeo = retrieveOrCreate(documentId, searcher);
            final Document doc = docAndGeo.getKey();
            final Map<String, Shape> geoFields = docAndGeo.getValue();

            Preconditions.checkNotNull(doc);
            for (final IndexEntry del : mutation.getDeletions()) {
                Preconditions.checkArgument(!del.hasMetaData(),
                        "Lucene index does not support indexing meta data: %s", del);
                final String key = del.field;
                if (doc.getField(key) != null) {
                    if (log.isTraceEnabled())
                        log.trace("Removing field [{}] on document [{}]", key, documentId);

                    doc.removeFields(key);
                    geoFields.remove(key);
                }
            }

            addToDocument(storeName, documentId, doc, mutation.getAdditions(), geoFields, information);

            //write the old document to the index with the modifications
            writer.updateDocument(new Term(DOCID, documentId), doc);
        }
        writer.commit();
    } finally {
        IOUtils.closeQuietly(reader);
    }
}

From source file:org.janusgraph.diskstorage.lucene.LuceneIndex.java

License:Apache License

@Override
public void restore(Map<String, Map<String, List<IndexEntry>>> documents,
        KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
    writerLock.lock();//from ww  w . ja v a2 s.  com
    try {
        for (final Map.Entry<String, Map<String, List<IndexEntry>>> stores : documents.entrySet()) {
            final String store = stores.getKey();
            final IndexWriter writer = getWriter(store, information);
            final IndexReader reader = DirectoryReader.open(writer, true, true);
            final IndexSearcher searcher = new IndexSearcher(reader);

            for (final Map.Entry<String, List<IndexEntry>> entry : stores.getValue().entrySet()) {
                final String docID = entry.getKey();
                final List<IndexEntry> content = entry.getValue();

                if (content == null || content.isEmpty()) {
                    if (log.isTraceEnabled())
                        log.trace("Deleting document [{}]", docID);

                    writer.deleteDocuments(new Term(DOCID, docID));
                    continue;
                }

                final Pair<Document, Map<String, Shape>> docAndGeo = retrieveOrCreate(docID, searcher);
                addToDocument(store, docID, docAndGeo.getKey(), content, docAndGeo.getValue(), information);

                //write the old document to the index with the modifications
                writer.updateDocument(new Term(DOCID, docID), docAndGeo.getKey());
            }
            writer.commit();
        }
        tx.commit();
    } catch (final IOException e) {
        throw new TemporaryBackendException("Could not update Lucene index", e);
    } finally {
        writerLock.unlock();
    }
}