List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:org.explore3.searchengine.indexCreator.ImageIndex.java
License:Apache License
static void indexDocument(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocument(writer, new File(file, files[i])); }/*from w w w .ja v a 2s . c o m*/ } } else { try { Map<String, String> infoWithField = HtmlImageParsing.parse(file); // make a new, empty document Document doc = new Document(); FieldType type = new FieldType(); type.setIndexed(true); type.setStored(true); type.setStoreTermVectors(true); type.setTokenized(true); type.setStoreTermVectorOffsets(true); if (!infoWithField.isEmpty()) { Field image = new TextField("image", infoWithField.get("image").toString(), Field.Store.YES); System.out.println(infoWithField.get("image").toString()); doc.add(image); Field title = new StringField("title", infoWithField.get("title"), Field.Store.YES); doc.add(title); Field path = new StringField("path", file.getPath(), Field.Store.YES); doc.add(path); } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { //System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { } } } }
From source file:org.explore3.searchengine.indexCreator.Indexer.java
License:Apache License
static void indexDocument(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocument(writer, new File(file, files[i])); }//from w ww . j a va 2s . c o m } } else { try { Map<String, String> infoWithField = HtmlParsing.parse(file); // make a new, empty document Document doc = new Document(); Field title = new TextField("title", infoWithField.get("title"), Field.Store.YES); doc.add(title); Field path = new StringField("path", file.getPath(), Field.Store.YES); doc.add(path); FieldType type = new FieldType(); type.setIndexed(true); type.setStored(true); type.setStoreTermVectors(true); type.setTokenized(true); type.setStoreTermVectorOffsets(true); Field highlighter = new Field("highlighterWords", infoWithField.get("text"), type); doc.add(highlighter); Field contents = new TextField("words", infoWithField.get("text"), Field.Store.YES); doc.add(contents); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { System.out.println("adding " + file); writer.addDocument(doc); } else { System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { } } } }
From source file:org.fnlp.app.lucene.demo.BuildIndex.java
License:Open Source License
/** * @param args//w w w . j av a2 s . c o m * @throws IOException * @throws LoadModelException */ public static void main(String[] args) throws IOException, LoadModelException { String indexPath = "../tmp/lucene"; System.out.println("Indexing to directory '" + indexPath + "'..."); Date start = new Date(); Directory dir = FSDirectory.open(new File(indexPath));//Dirctory dir-->FSDirectory //?? CNFactory CNFactory factory = CNFactory.getInstance("../models", Models.SEG_TAG); Analyzer analyzer = new FNLPAnalyzer(Version.LUCENE_47); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); String[] strs = new String[] { "?", "?????????", "????", "" }; //Date start = new Date(); for (int i = 0; i < strs.length; i++) { Document doc = new Document(); Field field = new TextField("content", strs[i], Field.Store.YES); doc.add(field); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { writer.updateDocument(new Term("content", strs[i]), doc); } } writer.close(); //?????? //dir.close(); //?????? Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:org.frontcache.cache.impl.LuceneIndexManager.java
License:Apache License
/** * Writes webResponse to index//from ww w . j a v a2s . com * @param response * @throws IOException */ void indexDoc(WebResponse response) throws IOException { IndexWriter iWriter = getIndexWriter(); Document doc = new Document(); String url = response.getUrl(); if (null == url) { logger.error("URL can't be null during index time for " + response); return; } doc.add(new StringField(URL_FIELD, url, Field.Store.YES)); doc.add(new StringField(DOMAIN_FIELD, response.getDomain(), Field.Store.YES)); if (null != response.getContent()) doc.add(new StoredField(BIN_FIELD, response.getContent())); // doc.add(new NumericDocValuesField(EXPIRE_DATE_FIELD, response.getExpireTimeMillis())); // TODO: store map ? doc.add(new StoredField(JSON_FIELD, gson.toJson(response), JSON_TYPE)); for (String tag : response.getTags()) doc.add(new StringField(TAGS_FIELD, tag, Field.Store.NO)); // tag is StringField to exact match try { iWriter.updateDocument(new Term(URL_FIELD, url), doc); } catch (IOException e) { logger.error("Error while in Lucene index operation: {}", e.getMessage(), e); } finally { try { iWriter.commit(); } catch (IOException ioEx) { logger.error("Error while commiting changes to Lucene index: {}", ioEx.getMessage(), ioEx); } } }
From source file:org.imixs.workflow.plugins.jee.extended.LucenePlugin.java
License:Open Source License
/** * This method updates the search index for a collection of workitems. * // w w w .ja va2s . co m * For each workitem the method test if it did match the conditions to be * added into the search index. If the workitem did not match the conditions * the workitem will be removed from the index. * * The method loads the lucene index properties from the imixs.properties * file from the classpath. If no properties are defined the method * terminates. * * * @param worklist * of ItemCollections to be indexed * @return - true if the update was successfull * @throws Exception */ public static boolean updateWorklist(Collection<ItemCollection> worklist) throws PluginException { IndexWriter awriter = null; // try loading imixs-search properties Properties prop = loadProperties(); if (prop.isEmpty()) return false; try { awriter = createIndexWriter(prop); // add workitem to search index.... for (ItemCollection workitem : worklist) { // create term Term term = new Term("$uniqueid", workitem.getItemValueString("$uniqueid")); // test if document should be indexed or not if (matchConditions(prop, workitem)) awriter.updateDocument(term, createDocument(prop, workitem)); else awriter.deleteDocuments(term); } } catch (IOException luceneEx) { // close writer! logger.warning(" Lucene Exception : " + luceneEx.getMessage()); throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX, "Unable to update search index", luceneEx); } finally { if (awriter != null) { logger.fine(" close writer"); try { awriter.close(); } catch (CorruptIndexException e) { throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX, "Unable to update search index", e); } catch (IOException e) { throw new PluginException(LucenePlugin.class.getSimpleName(), INVALID_INDEX, "Unable to update search index", e); } } } logger.fine(" update worklist successfull"); return true; }
From source file:org.infoglue.cms.controllers.kernel.impl.simple.LuceneController.java
License:Open Source License
private void indexInformation(NotificationMessage notificationMessage) { try {/* ww w .ja v a 2s . co m*/ //Object objectIdentity = getObjectIdentity(object); IndexWriter writer = null; try { String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator + "index"; File INDEX_DIR = new File(index); writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(new String[] {})); writer.setMaxMergeDocs(500000); if (logger.isInfoEnabled()) logger.info("Indexing to directory '" + INDEX_DIR + "'..."); if (notificationMessage.getType() == NotificationMessage.TRANS_CREATE) { List<Document> documents = getDocuments(notificationMessage); Iterator<Document> documentsIterator = documents.iterator(); while (documentsIterator.hasNext()) { Document indexingDocument = documentsIterator.next(); String uid = indexingDocument.get("uid"); if (logger.isInfoEnabled()) logger.info("Adding document with uid:" + uid + " - " + indexingDocument); if (indexingDocument != null) writer.addDocument(indexingDocument); } } else if (notificationMessage.getType() == NotificationMessage.TRANS_UPDATE) { List<Document> documents = getDocuments(notificationMessage); Iterator<Document> documentsIterator = documents.iterator(); while (documentsIterator.hasNext()) { Document indexingDocument = documentsIterator.next(); String uid = indexingDocument.get("uid"); if (logger.isInfoEnabled()) logger.info("Updating document with uid:" + uid + " - " + indexingDocument); if (indexingDocument != null) writer.updateDocument(new Term("uid", "" + uid), indexingDocument); } } else if (notificationMessage.getType() == NotificationMessage.TRANS_DELETE) { String uid = ""; if (notificationMessage.getClassName().equals(ContentImpl.class.getName())) { uid = "contentId_" + notificationMessage.getObjectId(); } else if (notificationMessage.getClassName().equals(ContentVersionImpl.class.getName())) { uid = "contentVersionId_" + notificationMessage.getObjectId(); } else if (notificationMessage.getClassName().equals(DigitalAssetImpl.class.getName())) { uid = "digitalAssetId_" + notificationMessage.getObjectId(); } if (logger.isInfoEnabled()) logger.info("Deleting documents:" + "uid=" + uid); writer.deleteDocuments(new Term("uid", "" + uid)); } } catch (Exception e) { logger.error("Error indexing:" + e.getMessage(), e); } finally { indexedDocumentsSinceLastOptimize++; if (indexedDocumentsSinceLastOptimize > 250) { logger.info("Optimizing..."); writer.optimize(); indexedDocumentsSinceLastOptimize = 0; } writer.close(); } } catch (Exception e) { logger.error("Error indexing:" + e.getMessage(), e); } }
From source file:org.infoglue.cms.controllers.kernel.impl.simple.SearchController.java
License:Open Source License
/** * This method searches with lucene//from w w w . ja va2 s .c o m */ private List<ContentVersionVO> getContentVersionVOListFromLucene(Integer[] repositoryIdAsIntegerToSearch, String searchString, int maxRows, String userName, Integer languageId, Integer[] contentTypeDefinitionIds, Integer caseSensitive, Integer stateId, boolean includeAssets) throws SystemException, Bug { List<ContentVersionVO> contentVersionVOList = new ArrayList<ContentVersionVO>(); try { String index = CmsPropertyHandler.getContextRootPath() + File.separator + "lucene" + File.separator + "index"; boolean indexExists = IndexReader.indexExists(new File(index)); if (!indexExists) { try { File INDEX_DIR = new File(index); IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer()); logger.info("Indexing to directory '" + INDEX_DIR + "'..."); writer.updateDocument(new Term("initializer", "true"), getDocument("initializer")); logger.info("Optimizing..."); writer.optimize(); writer.close(); } catch (Exception e) { logger.error("An error creating index:" + e.getMessage(), e); } } /*if(searchString.indexOf(" ") > -1) searchString = "\"" + searchString + "\""; else*/ if (!searchString.endsWith("*")) searchString = searchString + "*"; List<String> fieldNames = new ArrayList<String>(); List<String> queryStrings = new ArrayList<String>(); List<BooleanClause.Occur> booleanList = new ArrayList<BooleanClause.Occur>(); if (repositoryIdAsIntegerToSearch != null && repositoryIdAsIntegerToSearch.length > 0) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < repositoryIdAsIntegerToSearch.length; i++) { if (i > 0) sb.append(" OR "); sb.append("" + repositoryIdAsIntegerToSearch[i]); } if (sb.length() > 0) { fieldNames.add("repositoryId"); queryStrings.add("" + sb.toString()); booleanList.add(BooleanClause.Occur.MUST); } } if (languageId != null && languageId.intValue() > 0) { fieldNames.add("languageId"); queryStrings.add("" + languageId); booleanList.add(BooleanClause.Occur.MUST); } if (contentTypeDefinitionIds != null && contentTypeDefinitionIds.length > 0) { StringBuffer sb = new StringBuffer(); for (int i = 0; i < contentTypeDefinitionIds.length; i++) { Integer contentTypeDefinitionId = contentTypeDefinitionIds[i]; if (contentTypeDefinitionId != null) { if (i > 0) sb.append(" OR "); sb.append("" + contentTypeDefinitionId); } } if (sb.length() > 0) { fieldNames.add("contentTypeDefinitionId"); queryStrings.add("" + sb.toString()); booleanList.add(BooleanClause.Occur.MUST); } } if (userName != null && !userName.equals("")) { fieldNames.add("lastModifier"); queryStrings.add("" + userName); booleanList.add(BooleanClause.Occur.MUST); } if (stateId != null && !stateId.equals("")) { fieldNames.add("stateId"); queryStrings.add("" + stateId); booleanList.add(BooleanClause.Occur.MUST); } if (!includeAssets) { fieldNames.add("isAsset"); queryStrings.add("true"); booleanList.add(BooleanClause.Occur.MUST_NOT); } if (searchString != null && searchString.length() > 0) { fieldNames.add("contents"); queryStrings.add(searchString); booleanList.add(BooleanClause.Occur.MUST); } String[] fields = new String[fieldNames.size()]; fields = (String[]) fieldNames.toArray(fields); String[] queries = new String[fieldNames.size()]; queries = (String[]) queryStrings.toArray(queries); BooleanClause.Occur[] flags = new BooleanClause.Occur[fieldNames.size()]; flags = (BooleanClause.Occur[]) booleanList.toArray(flags); IndexReader reader = IndexReader.open(index); Searcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(new String[] {}); Query query = MultiFieldQueryParser.parse(queries, fields, flags, analyzer); logger.info("Searching for: " + query.toString()); //System.out.println("Searching for: " + query.toString()); Hits hits = searcher.search(query); logger.info(hits.length() + " total matching documents"); final int HITS_PER_PAGE = new Integer(maxRows); for (int start = 0; start < hits.length(); start += HITS_PER_PAGE) { int end = Math.min(hits.length(), start + HITS_PER_PAGE); for (int i = start; i < end; i++) { org.apache.lucene.document.Document doc = hits.doc(i); String contentVersionId = doc.get("contentVersionId"); String contentId = doc.get("contentId"); logger.info("doc:" + doc); logger.info("contentVersionId:" + contentVersionId); logger.info("contentId:" + contentId); if (contentVersionId == null && contentId != null) { try { ContentVO cvo = ContentController.getContentController() .getContentVOWithId(new Integer(contentId)); logger.info("cvo:" + cvo); String path = doc.get("path"); if (path != null) { logger.info((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { logger.info(" Title: " + doc.get("title")); } } else { logger.info((i + 1) + ". " + "No path for this document"); } } catch (Exception e) { logger.error("ContentVersion with id:" + contentVersionId + " was not valid - skipping but how did the index become corrupt?"); deleteVersionFromIndex(contentVersionId); } } else { try { ContentVersionVO cvvo = ContentVersionController.getContentVersionController() .getFullContentVersionVOWithId(new Integer(contentVersionId)); logger.info("cvvo:" + cvvo); contentVersionVOList.add(cvvo); String path = doc.get("path"); if (path != null) { logger.info((i + 1) + ". " + path); String title = doc.get("title"); if (title != null) { logger.info(" Title: " + doc.get("title")); } } else { logger.info((i + 1) + ". " + "No path for this document"); } } catch (Exception e) { logger.error("ContentVersion with id:" + contentVersionId + " was not valid - skipping but how did the index become corrupt?"); deleteVersionFromIndex(contentVersionId); } } } if (queries != null) // non-interactive break; } reader.close(); } catch (Exception e) { logger.error("Error searching:" + e.getMessage(), e); } return contentVersionVOList; }
From source file:org.janusgraph.diskstorage.lucene.LuceneExample.java
License:Apache License
void indexDocs(IndexWriter writer, String documentId, Map<String, Object> docMap) throws IOException { Document doc = new Document(); Field documentIdField = new StringField("docid", documentId, Field.Store.YES); doc.add(documentIdField);//from w w w. jav a 2 s .c o m for (Map.Entry<String, Object> kv : docMap.entrySet()) { String key = kv.getKey(); Object value = kv.getValue(); if (value instanceof Number) { final Field field; if (value instanceof Integer || value instanceof Long) { field = new LongPoint(key, ((Number) value).longValue()); } else { //double or float field = new DoublePoint(key, ((Number) value).doubleValue()); } doc.add(field); } else if (value instanceof String) { String str = (String) value; Field field = new TextField(key + TXT_SUFFIX, str, Field.Store.NO); doc.add(field); if (str.length() < 256) field = new StringField(key + STR_SUFFIX, str, Field.Store.NO); doc.add(field); } else if (value instanceof Geoshape) { Shape shape = ((Geoshape) value).getShape(); for (IndexableField f : getSpatialStrategy(key).createIndexableFields(shape)) { doc.add(f); } } else throw new IllegalArgumentException("Unsupported type: " + value); } writer.updateDocument(new Term("docid", documentId), doc); }
From source file:org.janusgraph.diskstorage.lucene.LuceneIndex.java
License:Apache License
private void mutateStores(Map.Entry<String, Map<String, IndexMutation>> stores, KeyInformation.IndexRetriever information) throws IOException, BackendException { IndexReader reader = null;/*from w w w . j a v a 2 s . c om*/ try { final String storeName = stores.getKey(); final IndexWriter writer = getWriter(storeName, information); reader = DirectoryReader.open(writer, true, true); final IndexSearcher searcher = new IndexSearcher(reader); for (final Map.Entry<String, IndexMutation> entry : stores.getValue().entrySet()) { final String documentId = entry.getKey(); final IndexMutation mutation = entry.getValue(); if (mutation.isDeleted()) { if (log.isTraceEnabled()) log.trace("Deleted entire document [{}]", documentId); writer.deleteDocuments(new Term(DOCID, documentId)); continue; } final Pair<Document, Map<String, Shape>> docAndGeo = retrieveOrCreate(documentId, searcher); final Document doc = docAndGeo.getKey(); final Map<String, Shape> geoFields = docAndGeo.getValue(); Preconditions.checkNotNull(doc); for (final IndexEntry del : mutation.getDeletions()) { Preconditions.checkArgument(!del.hasMetaData(), "Lucene index does not support indexing meta data: %s", del); final String key = del.field; if (doc.getField(key) != null) { if (log.isTraceEnabled()) log.trace("Removing field [{}] on document [{}]", key, documentId); doc.removeFields(key); geoFields.remove(key); } } addToDocument(storeName, documentId, doc, mutation.getAdditions(), geoFields, information); //write the old document to the index with the modifications writer.updateDocument(new Term(DOCID, documentId), doc); } writer.commit(); } finally { IOUtils.closeQuietly(reader); } }
From source file:org.janusgraph.diskstorage.lucene.LuceneIndex.java
License:Apache License
@Override public void restore(Map<String, Map<String, List<IndexEntry>>> documents, KeyInformation.IndexRetriever information, BaseTransaction tx) throws BackendException { writerLock.lock();//from ww w . ja v a2 s. com try { for (final Map.Entry<String, Map<String, List<IndexEntry>>> stores : documents.entrySet()) { final String store = stores.getKey(); final IndexWriter writer = getWriter(store, information); final IndexReader reader = DirectoryReader.open(writer, true, true); final IndexSearcher searcher = new IndexSearcher(reader); for (final Map.Entry<String, List<IndexEntry>> entry : stores.getValue().entrySet()) { final String docID = entry.getKey(); final List<IndexEntry> content = entry.getValue(); if (content == null || content.isEmpty()) { if (log.isTraceEnabled()) log.trace("Deleting document [{}]", docID); writer.deleteDocuments(new Term(DOCID, docID)); continue; } final Pair<Document, Map<String, Shape>> docAndGeo = retrieveOrCreate(docID, searcher); addToDocument(store, docID, docAndGeo.getKey(), content, docAndGeo.getValue(), information); //write the old document to the index with the modifications writer.updateDocument(new Term(DOCID, docID), docAndGeo.getKey()); } writer.commit(); } tx.commit(); } catch (final IOException e) { throw new TemporaryBackendException("Could not update Lucene index", e); } finally { writerLock.unlock(); } }