Example usage for org.apache.lucene.index IndexWriter updateDocument

List of usage examples for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Usage

From source file:org.explore3.searchengine.indexCreator.ImageIndex.java

License:Apache License

static void indexDocument(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocument(writer, new File(file, files[i]));
                }/*from   w  w  w  .ja  v a 2s . c o  m*/
            }
        }

        else {

            try {

                Map<String, String> infoWithField = HtmlImageParsing.parse(file);

                // make a new, empty document
                Document doc = new Document();

                FieldType type = new FieldType();
                type.setIndexed(true);
                type.setStored(true);
                type.setStoreTermVectors(true);
                type.setTokenized(true);
                type.setStoreTermVectorOffsets(true);

                if (!infoWithField.isEmpty()) {
                    Field image = new TextField("image", infoWithField.get("image").toString(),
                            Field.Store.YES);
                    System.out.println(infoWithField.get("image").toString());
                    doc.add(image);
                    Field title = new StringField("title", infoWithField.get("title"), Field.Store.YES);
                    doc.add(title);

                    Field path = new StringField("path", file.getPath(), Field.Store.YES);
                    doc.add(path);
                }

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //System.out.println("adding " + file);
                    writer.addDocument(doc);
                }

                else {
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
            }
        }
    }
}

From source file:org.explore3.searchengine.indexCreator.Indexer.java

License:Apache License

static void indexDocument(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocument(writer, new File(file, files[i]));
                }//from w ww  .  j  a  va 2s . c  o m
            }
        }

        else {

            try {

                Map<String, String> infoWithField = HtmlParsing.parse(file);

                // make a new, empty document
                Document doc = new Document();

                Field title = new TextField("title", infoWithField.get("title"), Field.Store.YES);
                doc.add(title);
                Field path = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(path);

                FieldType type = new FieldType();
                type.setIndexed(true);
                type.setStored(true);
                type.setStoreTermVectors(true);
                type.setTokenized(true);
                type.setStoreTermVectorOffsets(true);
                Field highlighter = new Field("highlighterWords", infoWithField.get("text"), type);
                doc.add(highlighter);
                Field contents = new TextField("words", infoWithField.get("text"), Field.Store.YES);
                doc.add(contents);

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                }

                else {
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
            }
        }
    }
}

From source file:org.fnlp.app.lucene.demo.BuildIndex.java

License:Open Source License

/**
 * @param args//w w w  .  j  av  a2 s . c  o  m
 * @throws IOException 
 * @throws LoadModelException 
 */
public static void main(String[] args) throws IOException, LoadModelException {
    String indexPath = "../tmp/lucene";
    System.out.println("Indexing to directory '" + indexPath + "'...");
    Date start = new Date();
    Directory dir = FSDirectory.open(new File(indexPath));//Dirctory dir-->FSDirectory
    //?? CNFactory
    CNFactory factory = CNFactory.getInstance("../models", Models.SEG_TAG);
    Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    IndexWriter writer = new IndexWriter(dir, iwc);

    String[] strs = new String[] { "?",
            "?????????",
            "????", "" };
    //Date start = new Date();
    for (int i = 0; i < strs.length; i++) {

        Document doc = new Document();

        Field field = new TextField("content", strs[i], Field.Store.YES);
        doc.add(field);
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            writer.addDocument(doc);
        } else {
            writer.updateDocument(new Term("content", strs[i]), doc);
        }
    }
    writer.close();

    //??????
    //dir.close();
    //??????

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:org.frontcache.cache.impl.LuceneIndexManager.java

License:Apache License

/**
 * Writes webResponse to index//from ww  w . j a v  a2s  .  com
 * @param response
 * @throws IOException
 */
void indexDoc(WebResponse response) throws IOException {

    IndexWriter iWriter = getIndexWriter();

    Document doc = new Document();

    String url = response.getUrl();

    if (null == url) {
        logger.error("URL can't be null during index time for " + response);
        return;
    }

    doc.add(new StringField(URL_FIELD, url, Field.Store.YES));

    doc.add(new StringField(DOMAIN_FIELD, response.getDomain(), Field.Store.YES));

    if (null != response.getContent())
        doc.add(new StoredField(BIN_FIELD, response.getContent()));

    //      doc.add(new NumericDocValuesField(EXPIRE_DATE_FIELD, response.getExpireTimeMillis())); // TODO: store map ?

    doc.add(new StoredField(JSON_FIELD, gson.toJson(response), JSON_TYPE));

    for (String tag : response.getTags())
        doc.add(new StringField(TAGS_FIELD, tag, Field.Store.NO)); // tag is StringField to exact match

    try {
        iWriter.updateDocument(new Term(URL_FIELD, url), doc);
    } catch (IOException e) {
        logger.error("Error while in Lucene index operation: {}", e.getMessage(), e);

    } finally {
        try {
            iWriter.commit();
        } catch (IOException ioEx) {
            logger.error("Error while commiting changes to Lucene index: {}", ioEx.getMessage(), ioEx);
        }
    }

}

From source file:org.imixs.workflow.plugins.jee.extended.LucenePlugin.java

License:Open Source License

/**
 * This method updates the search index for a collection of workitems.
 * //  w  w  w .ja  va2s  .  co  m
 * For each workitem the method test if it did match the conditions to be
 * added into the search index. If the workitem did not match the conditions
 * the workitem will be removed from the index.
 * 
 * The method loads the lucene index properties from the imixs.properties
 * file from the classpath. If no properties are defined the method
 * terminates.
 * 
 * 
 * @param worklist
 *            of ItemCollections to be indexed
 * @return - true if the update was successfull
 * @throws Exception
 */
public static boolean updateWorklist(Collection<ItemCollection> worklist) throws PluginException {

    IndexWriter awriter = null;
    // try loading imixs-search properties
    Properties prop = loadProperties();
    if (prop.isEmpty())
        return false;

    try {
        awriter = createIndexWriter(prop);

        // add workitem to search index....

        for (ItemCollection workitem : worklist) {
            // create term
            Term term = new Term("$uniqueid", workitem.getItemValueString("$uniqueid"));
            // test if document should be indexed or not
            if (matchConditions(prop, workitem))
                awriter.updateDocument(term, createDocument(prop, workitem));
            else
                awriter.deleteDocuments(term);

        }
    } catch (IOException luceneEx) {
        // close writer!
        logger.warning(" Lucene Exception : " + luceneEx.getMessage());

        throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX,
                "Unable to update search index", luceneEx);

    } finally {

        if (awriter != null) {
            logger.fine(" close writer");
            try {
                awriter.close();
            } catch (CorruptIndexException e) {
                throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX,
                        "Unable to update search index", e);
            } catch (IOException e) {
                throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX,
                        "Unable to update search index", e);
            }

        }
    }

    logger.fine(" update worklist successfull");
    return true;
}

From source file:org.infoglue.cms.controllers.kernel.impl.simple.LuceneController.java

License:Open Source License

private void indexInformation(NotificationMessage notificationMessage) {
    try {/*  ww  w  .ja  v  a  2s . co m*/
        //Object objectIdentity = getObjectIdentity(object);

        IndexWriter writer = null;
        try {
            String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator
                    + "index";

            File INDEX_DIR = new File(index);
            writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(new String[] {}));
            writer.setMaxMergeDocs(500000);
            if (logger.isInfoEnabled())
                logger.info("Indexing to directory '" + INDEX_DIR + "'...");

            if (notificationMessage.getType() == NotificationMessage.TRANS_CREATE) {
                List<Document> documents = getDocuments(notificationMessage);
                Iterator<Document> documentsIterator = documents.iterator();
                while (documentsIterator.hasNext()) {
                    Document indexingDocument = documentsIterator.next();
                    String uid = indexingDocument.get("uid");
                    if (logger.isInfoEnabled())
                        logger.info("Adding document with uid:" + uid + " - " + indexingDocument);
                    if (indexingDocument != null)
                        writer.addDocument(indexingDocument);
                }
            } else if (notificationMessage.getType() == NotificationMessage.TRANS_UPDATE) {
                List<Document> documents = getDocuments(notificationMessage);
                Iterator<Document> documentsIterator = documents.iterator();
                while (documentsIterator.hasNext()) {
                    Document indexingDocument = documentsIterator.next();
                    String uid = indexingDocument.get("uid");
                    if (logger.isInfoEnabled())
                        logger.info("Updating document with uid:" + uid + " - " + indexingDocument);
                    if (indexingDocument != null)
                        writer.updateDocument(new Term("uid", "" + uid), indexingDocument);
                }
            } else if (notificationMessage.getType() == NotificationMessage.TRANS_DELETE) {
                String uid = "";
                if (notificationMessage.getClassName().equals(ContentImpl.class.getName())) {
                    uid = "contentId_" + notificationMessage.getObjectId();
                } else if (notificationMessage.getClassName().equals(ContentVersionImpl.class.getName())) {
                    uid = "contentVersionId_" + notificationMessage.getObjectId();
                } else if (notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName())) {
                    uid = "digitalAssetId_" + notificationMessage.getObjectId();
                }

                if (logger.isInfoEnabled())
                    logger.info("Deleting documents:" + "uid=" + uid);
                writer.deleteDocuments(new Term("uid", "" + uid));
            }
        } catch (Exception e) {
            logger.error("Error indexing:" + e.getMessage(), e);
        } finally {
            indexedDocumentsSinceLastOptimize++;
            if (indexedDocumentsSinceLastOptimize > 250) {
                logger.info("Optimizing...");
                writer.optimize();
                indexedDocumentsSinceLastOptimize = 0;
            }
            writer.close();
        }
    } catch (Exception e) {
        logger.error("Error indexing:" + e.getMessage(), e);
    }
}

From source file:org.infoglue.cms.controllers.kernel.impl.simple.SearchController.java

License:Open Source License

/**
 * This method searches with lucene//from w w  w  . ja  va2 s  .c  o m
 */
private List<ContentVersionVO> getContentVersionVOListFromLucene(Integer[] repositoryIdAsIntegerToSearch,
        String searchString, int maxRows, String userName, Integer languageId,
        Integer[] contentTypeDefinitionIds, Integer caseSensitive, Integer stateId, boolean includeAssets)
        throws SystemException, Bug {
    List<ContentVersionVO> contentVersionVOList = new ArrayList<ContentVersionVO>();

    try {
        String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator
                + "index";

        boolean indexExists = IndexReader.indexExists(new File(index));
        if (!indexExists) {
            try {
                File INDEX_DIR = new File(index);
                IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer());
                logger.info("Indexing to directory '" + INDEX_DIR + "'...");
                writer.updateDocument(new Term("initializer", "true"), getDocument("initializer"));
                logger.info("Optimizing...");
                writer.optimize();
                writer.close();
            } catch (Exception e) {
                logger.error("An error creating index:" + e.getMessage(), e);
            }
        }

        /*if(searchString.indexOf(" ") > -1)
           searchString = "\"" + searchString + "\"";
        else*/ if (!searchString.endsWith("*"))
            searchString = searchString + "*";

        List<String> fieldNames = new ArrayList<String>();
        List<String> queryStrings = new ArrayList<String>();
        List<BooleanClause.Occur> booleanList = new ArrayList<BooleanClause.Occur>();

        if (repositoryIdAsIntegerToSearch != null && repositoryIdAsIntegerToSearch.length > 0) {
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < repositoryIdAsIntegerToSearch.length; i++) {
                if (i > 0)
                    sb.append(" OR ");
                sb.append("" + repositoryIdAsIntegerToSearch[i]);
            }

            if (sb.length() > 0) {
                fieldNames.add("repositoryId");
                queryStrings.add("" + sb.toString());
                booleanList.add(BooleanClause.Occur.MUST);
            }
        }
        if (languageId != null && languageId.intValue() > 0) {
            fieldNames.add("languageId");
            queryStrings.add("" + languageId);
            booleanList.add(BooleanClause.Occur.MUST);
        }
        if (contentTypeDefinitionIds != null && contentTypeDefinitionIds.length > 0) {
            StringBuffer sb = new StringBuffer();
            for (int i = 0; i < contentTypeDefinitionIds.length; i++) {
                Integer contentTypeDefinitionId = contentTypeDefinitionIds[i];
                if (contentTypeDefinitionId != null) {
                    if (i > 0)
                        sb.append(" OR ");
                    sb.append("" + contentTypeDefinitionId);
                }
            }

            if (sb.length() > 0) {
                fieldNames.add("contentTypeDefinitionId");
                queryStrings.add("" + sb.toString());
                booleanList.add(BooleanClause.Occur.MUST);
            }
        }
        if (userName != null && !userName.equals("")) {
            fieldNames.add("lastModifier");
            queryStrings.add("" + userName);
            booleanList.add(BooleanClause.Occur.MUST);
        }
        if (stateId != null && !stateId.equals("")) {
            fieldNames.add("stateId");
            queryStrings.add("" + stateId);
            booleanList.add(BooleanClause.Occur.MUST);
        }
        if (!includeAssets) {
            fieldNames.add("isAsset");
            queryStrings.add("true");
            booleanList.add(BooleanClause.Occur.MUST_NOT);
        }

        if (searchString != null && searchString.length() > 0) {
            fieldNames.add("contents");
            queryStrings.add(searchString);
            booleanList.add(BooleanClause.Occur.MUST);
        }

        String[] fields = new String[fieldNames.size()];
        fields = (String[]) fieldNames.toArray(fields);

        String[] queries = new String[fieldNames.size()];
        queries = (String[]) queryStrings.toArray(queries);

        BooleanClause.Occur[] flags = new BooleanClause.Occur[fieldNames.size()];
        flags = (BooleanClause.Occur[]) booleanList.toArray(flags);

        IndexReader reader = IndexReader.open(index);

        Searcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(new String[] {});

        Query query = MultiFieldQueryParser.parse(queries, fields, flags, analyzer);
        logger.info("Searching for: " + query.toString());
        //System.out.println("Searching for: " + query.toString());

        Hits hits = searcher.search(query);

        logger.info(hits.length() + " total matching documents");

        final int HITS_PER_PAGE = new Integer(maxRows);
        for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) {
            int end = Math.min(hits.length(), start + HITS_PER_PAGE);
            for (int i = start; i < end; i++) {
                org.apache.lucene.document.Document doc = hits.doc(i);
                String contentVersionId = doc.get("contentVersionId");
                String contentId = doc.get("contentId");
                logger.info("doc:" + doc);
                logger.info("contentVersionId:" + contentVersionId);
                logger.info("contentId:" + contentId);

                if (contentVersionId == null && contentId != null) {
                    try {
                        ContentVO cvo = ContentController.getContentController()
                                .getContentVOWithId(new Integer(contentId));
                        logger.info("cvo:" + cvo);

                        String path = doc.get("path");
                        if (path != null) {
                            logger.info((i + 1) + ". " + path);
                            String title = doc.get("title");
                            if (title != null) {
                                logger.info("   Title: " + doc.get("title"));
                            }
                        } else {
                            logger.info((i + 1) + ". " + "No path for this document");
                        }
                    } catch (Exception e) {
                        logger.error("ContentVersion with id:" + contentVersionId
                                + " was not valid - skipping but how did the index become corrupt?");
                        deleteVersionFromIndex(contentVersionId);
                    }
                } else {
                    try {
                        ContentVersionVO cvvo = ContentVersionController.getContentVersionController()
                                .getFullContentVersionVOWithId(new Integer(contentVersionId));
                        logger.info("cvvo:" + cvvo);
                        contentVersionVOList.add(cvvo);

                        String path = doc.get("path");
                        if (path != null) {
                            logger.info((i + 1) + ". " + path);
                            String title = doc.get("title");
                            if (title != null) {
                                logger.info("   Title: " + doc.get("title"));
                            }
                        } else {
                            logger.info((i + 1) + ". " + "No path for this document");
                        }
                    } catch (Exception e) {
                        logger.error("ContentVersion with id:" + contentVersionId
                                + " was not valid - skipping but how did the index become corrupt?");
                        deleteVersionFromIndex(contentVersionId);
                    }
                }
            }

            if (queries != null) // non-interactive
                break;
        }

        reader.close();
    } catch (Exception e) {
        logger.error("Error searching:" + e.getMessage(), e);
    }

    return contentVersionVOList;
}

From source file:org.janusgraph.diskstorage.lucene.LuceneExample.java

License:Apache License

void indexDocs(IndexWriter writer, String documentId, Map<String, Object> docMap) throws IOException {
    Document doc = new Document();

    Field documentIdField = new StringField("docid", documentId, Field.Store.YES);
    doc.add(documentIdField);//from  w  w  w.  jav a  2  s  .c  o  m

    for (Map.Entry<String, Object> kv : docMap.entrySet()) {
        String key = kv.getKey();
        Object value = kv.getValue();

        if (value instanceof Number) {
            final Field field;
            if (value instanceof Integer || value instanceof Long) {
                field = new LongPoint(key, ((Number) value).longValue());
            } else { //double or float
                field = new DoublePoint(key, ((Number) value).doubleValue());
            }
            doc.add(field);
        } else if (value instanceof String) {
            String str = (String) value;
            Field field = new TextField(key + TXT_SUFFIX, str, Field.Store.NO);
            doc.add(field);
            if (str.length() < 256)
                field = new StringField(key + STR_SUFFIX, str, Field.Store.NO);
            doc.add(field);
        } else if (value instanceof Geoshape) {
            Shape shape = ((Geoshape) value).getShape();
            for (IndexableField f : getSpatialStrategy(key).createIndexableFields(shape)) {
                doc.add(f);
            }
        } else
            throw new IllegalArgumentException("Unsupported type: " + value);
    }

    writer.updateDocument(new Term("docid", documentId), doc);

}

From source file:org.janusgraph.diskstorage.lucene.LuceneIndex.java

License:Apache License

private void mutateStores(Map.Entry<String, Map<String, IndexMutation>> stores,
        KeyInformation.IndexRetriever information) throws IOException, BackendException {
    IndexReader reader = null;/*from  w w  w  .  j a  v  a 2 s  .  c  om*/
    try {
        final String storeName = stores.getKey();
        final IndexWriter writer = getWriter(storeName, information);
        reader = DirectoryReader.open(writer, true, true);
        final IndexSearcher searcher = new IndexSearcher(reader);
        for (final Map.Entry<String, IndexMutation> entry : stores.getValue().entrySet()) {
            final String documentId = entry.getKey();
            final IndexMutation mutation = entry.getValue();

            if (mutation.isDeleted()) {
                if (log.isTraceEnabled())
                    log.trace("Deleted entire document [{}]", documentId);

                writer.deleteDocuments(new Term(DOCID, documentId));
                continue;
            }

            final Pair<Document, Map<String, Shape>> docAndGeo = retrieveOrCreate(documentId, searcher);
            final Document doc = docAndGeo.getKey();
            final Map<String, Shape> geoFields = docAndGeo.getValue();

            Preconditions.checkNotNull(doc);
            for (final IndexEntry del : mutation.getDeletions()) {
                Preconditions.checkArgument(!del.hasMetaData(),
                        "Lucene index does not support indexing meta data: %s", del);
                final String key = del.field;
                if (doc.getField(key) != null) {
                    if (log.isTraceEnabled())
                        log.trace("Removing field [{}] on document [{}]", key, documentId);

                    doc.removeFields(key);
                    geoFields.remove(key);
                }
            }

            addToDocument(storeName, documentId, doc, mutation.getAdditions(), geoFields, information);

            //write the old document to the index with the modifications
            writer.updateDocument(new Term(DOCID, documentId), doc);
        }
        writer.commit();
    } finally {
        IOUtils.closeQuietly(reader);
    }
}

From source file:org.janusgraph.diskstorage.lucene.LuceneIndex.java

License:Apache License

@Override
public void restore(Map<String, Map<String, List<IndexEntry>>> documents,
        KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException {
    writerLock.lock();//from ww  w . ja v a2 s.  com
    try {
        for (final Map.Entry<String, Map<String, List<IndexEntry>>> stores : documents.entrySet()) {
            final String store = stores.getKey();
            final IndexWriter writer = getWriter(store, information);
            final IndexReader reader = DirectoryReader.open(writer, true, true);
            final IndexSearcher searcher = new IndexSearcher(reader);

            for (final Map.Entry<String, List<IndexEntry>> entry : stores.getValue().entrySet()) {
                final String docID = entry.getKey();
                final List<IndexEntry> content = entry.getValue();

                if (content == null || content.isEmpty()) {
                    if (log.isTraceEnabled())
                        log.trace("Deleting document [{}]", docID);

                    writer.deleteDocuments(new Term(DOCID, docID));
                    continue;
                }

                final Pair<Document, Map<String, Shape>> docAndGeo = retrieveOrCreate(docID, searcher);
                addToDocument(store, docID, docAndGeo.getKey(), content, docAndGeo.getValue(), information);

                //write the old document to the index with the modifications
                writer.updateDocument(new Term(DOCID, docID), docAndGeo.getKey());
            }
            writer.commit();
        }
        tx.commit();
    } catch (final IOException e) {
        throw new TemporaryBackendException("Could not update Lucene index", e);
    } finally {
        writerLock.unlock();
    }
}