List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:org.neo4j.index.impl.lucene.CommitContext.java
License:Open Source License
private void applyDocuments(IndexWriter writer, IndexType type, Map<Long, DocumentContext> documents) throws IOException { for (Map.Entry<Long, DocumentContext> entry : documents.entrySet()) { DocumentContext context = entry.getValue(); if (context.exists) { if (LuceneDataSource.documentIsEmpty(context.document)) { writer.deleteDocuments(type.idTerm(context.entityId)); } else { writer.updateDocument(type.idTerm(context.entityId), context.document); }//from w w w. ja va2 s . co m } else { writer.addDocument(context.document); } } }
From source file:org.ofbiz.content.search.DocumentIndexer.java
License:Apache License
@Override public void run() { IndexWriter indexWriter = null; int uncommittedDocs = 0; while (true) { LuceneDocument ofbizDocument;/*from w w w .j a v a 2 s .c o m*/ try { // Execution will pause here until the queue receives a LuceneDocument for indexing ofbizDocument = documentIndexQueue.take(); } catch (InterruptedException e) { Debug.logError(e, module); if (indexWriter != null) { try { indexWriter.close(); indexWriter = null; } catch (IOException ioe) { Debug.logError(ioe, module); } } break; } Term documentIdentifier = ofbizDocument.getDocumentIdentifier(); Document document = ofbizDocument.prepareDocument(this.delegator); if (indexWriter == null) { try { indexWriter = new IndexWriter(this.indexDirectory, new IndexWriterConfig( SearchWorker.LUCENE_VERSION, new StandardAnalyzer(SearchWorker.LUCENE_VERSION))); } catch (CorruptIndexException e) { Debug.logError("Corrupted lucene index: " + e.getMessage(), module); break; } catch (LockObtainFailedException e) { Debug.logError("Could not obtain Lock on lucene index " + e.getMessage(), module); // TODO: put the thread to sleep waiting for the locked to be released break; } catch (IOException e) { Debug.logError(e.getMessage(), module); break; } } try { if (document == null) { indexWriter.deleteDocuments(documentIdentifier); if (Debug.infoOn()) Debug.logInfo(getName() + ": deleted Lucene document: " + ofbizDocument, module); } else { indexWriter.updateDocument(documentIdentifier, document); if (Debug.infoOn()) Debug.logInfo(getName() + ": indexed Lucene document: " + ofbizDocument, module); } } catch (Exception e) { Debug.logError(e, getName() + ": error processing Lucene document: " + ofbizDocument, module); if (documentIndexQueue.peek() == null) { try { indexWriter.close(); indexWriter = null; } catch (IOException ioe) { Debug.logError(ioe, module); } } continue; } uncommittedDocs++; if (uncommittedDocs == UNCOMMITTED_DOC_LIMIT || documentIndexQueue.peek() == null) { // limit reached or queue empty, time to commit try { indexWriter.commit(); } catch (IOException e) { Debug.logError(e, module); } uncommittedDocs = 0; } if (documentIndexQueue.peek() == null) { try { indexWriter.close(); indexWriter = null; } catch (IOException e) { Debug.logError(e, module); } } } }
From source file:org.olat.search.service.indexer.JmsIndexer.java
License:Apache License
/** * Add or update a lucene document in the permanent index. * @param uuid/*from w w w . j ava 2s. c om*/ * @param document */ @Override public void addDocuments(List<Document> documents) { if (documents == null || documents.isEmpty()) return;//nothing to do IndexWriter writer = null; try { DirectoryReader currentReader = getReader(); IndexSearcher searcher = new IndexSearcher(currentReader); writer = permanentIndexWriter.getAndLock(); for (Document document : documents) { if (document != null) { String resourceUrl = document.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl); TopDocs hits = searcher.search(new TermQuery(uuidTerm), 10); if (hits.totalHits > 0) { writer.updateDocument(uuidTerm, document); } else { writer.addDocument(document); } } } } catch (IOException e) { log.error("", e); } finally { permanentIndexWriter.release(writer); } }
From source file:org.olat.search.service.indexer.JmsIndexer.java
License:Apache License
@Override public void addDocument(Document document, IndexWriter writer) { try {/*w w w . j a v a 2 s . c o m*/ String resourceUrl = document.get(AbstractOlatDocument.RESOURCEURL_FIELD_NAME); Term uuidTerm = new Term(AbstractOlatDocument.RESOURCEURL_FIELD_NAME, resourceUrl); DirectoryReader currentReader = getReader(); IndexSearcher searcher = new IndexSearcher(currentReader); TopDocs hits = searcher.search(new TermQuery(uuidTerm), 10); if (hits.totalHits > 0) { writer.updateDocument(uuidTerm, document); } else { writer.addDocument(document); } } catch (IOException e) { log.error("", e); } }
From source file:org.openeclass.lucene.demo.IndexCourses.java
License:Open Source License
private static void indexCourses(IndexWriter writer, Connection con) throws SQLException, IOException { PreparedStatement sql = con// w w w.j a va2s . co m .prepareStatement("SELECT id, title, keywords, code, public_code, prof_names, created FROM course"); ResultSet rs = sql.executeQuery(); int c = 0; while (rs.next()) { Long id = rs.getLong(1); String title = rs.getString(2); String keys = rs.getString(3); String code = rs.getString(4); String publicCode = rs.getString(5); String profNames = rs.getString(6); //Timestamp created = rs.getTimestamp(7); Document doc = new Document(); Field idField = new Field("course_id", id.toString(), Field.Store.YES, Field.Index.NOT_ANALYZED); doc.add(idField); Field titleField = new Field("title", title, Field.Store.YES, Field.Index.ANALYZED); doc.add(titleField); Field keysField = new Field("keywords", keys, Field.Store.YES, Field.Index.ANALYZED); doc.add(keysField); Field codeField = new Field("code", code, Field.Store.YES, Field.Index.ANALYZED); doc.add(codeField); Field publicCodeField = new Field("public_code", publicCode, Field.Store.YES, Field.Index.ANALYZED); doc.add(publicCodeField); Field profsField = new Field("prof_names", profNames, Field.Store.YES, Field.Index.ANALYZED); doc.add(profsField); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { writer.addDocument(doc); } else { writer.updateDocument(new Term("course_id", id.toString()), doc); } c++; } System.out.println("total db rows: " + c); rs.close(); sql.close(); }
From source file:org.openerproject.targetproperties.svector.indexing.CustomLuceneIndexer.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from ww w . j av a 2 s. c om * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help log.warn(fnfe); return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
/** * Indexes the specified Statement.//w ww .j a va 2s . c o m */ public synchronized void addStatement(Statement statement) throws IOException { // determine stuff to store Value object = statement.getObject(); if (!(object instanceof Literal)) { return; } String field = statement.getPredicate().toString(); String text = ((Literal) object).getLabel(); String context = getContextID(statement.getContext()); boolean updated = false; IndexWriter writer = null; // fetch the Document representing this Resource String resourceId = getResourceID(statement.getSubject()); String contextId = getContextID(statement.getContext()); String id = formIdString(resourceId, contextId); Term idTerm = new Term(ID_FIELD_NAME, id); Document document = getDocument(idTerm); if (document == null) { // there is no such Document: create one now document = new Document(); addID(id, document); addResourceID(resourceId, document); // add context addContext(context, document); addProperty(field, text, document); // add it to the index writer = getIndexWriter(); writer.addDocument(document); updated = true; } else { // update this Document when this triple has not been stored already if (!hasProperty(field, text, document)) { // create a copy of the old document; updating the retrieved // Document instance works ok for stored properties but indexed data // gets lots when doing an IndexWriter.updateDocument with it Document newDocument = new Document(); // add all existing fields (including id, uri, context, and text) for (Object oldFieldObject : document.getFields()) { Field oldField = (Field) oldFieldObject; newDocument.add(oldField); } // add the new triple to the cloned document addProperty(field, text, newDocument); // update the index with the cloned document writer = getIndexWriter(); writer.updateDocument(idTerm, newDocument); updated = true; } } if (updated) { // make sure that these updates are visible for new // IndexReaders/Searchers writer.commit(); // the old IndexReaders/Searchers are not outdated invalidateReaders(); } }
From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
public synchronized void removeStatement(Statement statement) throws IOException { Value object = statement.getObject(); if (!(object instanceof Literal)) { return;//from www.j a va2 s . c o m } IndexWriter writer = null; boolean updated = false; // fetch the Document representing this Resource String resourceId = getResourceID(statement.getSubject()); String contextId = getContextID(statement.getContext()); String id = formIdString(resourceId, contextId); Term idTerm = new Term(ID_FIELD_NAME, id); Document document = getDocument(idTerm); if (document != null) { // determine the values used in the index for this triple String fieldName = statement.getPredicate().toString(); String text = ((Literal) object).getLabel(); // see if this triple occurs in this Document if (hasProperty(fieldName, text, document)) { // if the Document only has one predicate field, we can remove the // document int nrProperties = numberOfPropertyFields(document); if (nrProperties == 0) { logger.info("encountered document with zero properties, should have been deleted: {}", resourceId); } else if (nrProperties == 1) { writer = getIndexWriter(); writer.deleteDocuments(idTerm); updated = true; } else { // there are more triples encoded in this Document: remove the // document and add a new Document without this triple Document newDocument = new Document(); addID(id, newDocument); addResourceID(resourceId, newDocument); addContext(contextId, newDocument); for (Object oldFieldObject : document.getFields()) { Field oldField = (Field) oldFieldObject; String oldFieldName = oldField.name(); String oldValue = oldField.stringValue(); if (isPropertyField(oldFieldName) && !(fieldName.equals(oldFieldName) && text.equals(oldValue))) { addProperty(oldFieldName, oldValue, newDocument); } } writer = getIndexWriter(); writer.updateDocument(idTerm, newDocument); updated = true; } } } if (updated) { // make sure that these updates are visible for new // IndexReaders/Searchers writer.commit(); // the old IndexReaders/Searchers are not outdated invalidateReaders(); } }
From source file:org.openrdf.sail.lucene.LuceneIndex.java
License:BSD License
/** * Add many statements at the same time, remove many statements at the same * time. Ordering by resource has to be done inside this method. The passed * added/removed sets are disjunct, no statement can be in both * //from w w w .j a v a 2 s. c o m * @param added * all added statements, can have multiple subjects * @param removed * all removed statements, can have multiple subjects */ public synchronized void addRemoveStatements(Collection<Statement> added, Collection<Statement> removed) throws Exception { // Buffer per resource MapOfListMaps<Resource, String, Statement> rsAdded = new MapOfListMaps<Resource, String, Statement>(); MapOfListMaps<Resource, String, Statement> rsRemoved = new MapOfListMaps<Resource, String, Statement>(); HashSet<Resource> resources = new HashSet<Resource>(); for (Statement s : added) { rsAdded.add(s.getSubject(), getContextID(s.getContext()), s); resources.add(s.getSubject()); } for (Statement s : removed) { rsRemoved.add(s.getSubject(), getContextID(s.getContext()), s); resources.add(s.getSubject()); } logger.debug("Removing " + removed.size() + " statements, adding " + added.size() + " statements"); IndexWriter writer = getIndexWriter(); // for each resource, add/remove for (Resource resource : resources) { Map<String, List<Statement>> stmtsToRemove = rsRemoved.get(resource); Map<String, List<Statement>> stmtsToAdd = rsAdded.get(resource); Set<String> contextsToUpdate = new HashSet<String>(stmtsToAdd.keySet()); contextsToUpdate.addAll(stmtsToRemove.keySet()); Map<String, Document> docsByContext = new HashMap<String, Document>(); // is the resource in the store? // fetch the Document representing this Resource String resourceId = getResourceID(resource); Term uriTerm = new Term(URI_FIELD_NAME, resourceId); List<Document> documents = getDocuments(uriTerm); for (Document doc : documents) { docsByContext.put(this.getContextID(doc), doc); } for (String contextId : contextsToUpdate) { String id = formIdString(resourceId, contextId); Term idTerm = new Term(ID_FIELD_NAME, id); Document document = docsByContext.get(contextId); if (document == null) { // there are no such Documents: create one now document = new Document(); addID(id, document); addResourceID(resourceId, document); addContext(contextId, document); // add all statements, remember the contexts // HashSet<Resource> contextsToAdd = new HashSet<Resource>(); List<Statement> list = stmtsToAdd.get(contextId); if (list != null) { for (Statement s : list) { addProperty(s, document); } } // add it to the index writer.addDocument(document); // THERE SHOULD BE NO DELETED TRIPLES ON A NEWLY ADDED RESOURCE if (stmtsToRemove.containsKey(contextId)) logger.info( "Statements are marked to be removed that should not be in the store, for resource {} and context {}. Nothing done.", resource, contextId); } else { // update the Document // create a copy of the old document; updating the retrieved // Document instance works ok for stored properties but indexed // data // gets lots when doing an IndexWriter.updateDocument with it Document newDocument = new Document(); // buffer the removed literal statements ListMap<String, String> removedOfResource = null; { List<Statement> removedStatements = stmtsToRemove.get(contextId); if (removedStatements != null && !removedStatements.isEmpty()) { removedOfResource = new ListMap<String, String>(); for (Statement r : removedStatements) { if (r.getObject() instanceof Literal) { // remove value from both property field and the // corresponding text field String label = ((Literal) r.getObject()).getLabel(); removedOfResource.put(r.getPredicate().toString(), label); removedOfResource.put(TEXT_FIELD_NAME, label); } } } } // add all existing fields (including id, uri, context, and text) // but without adding the removed ones // keep the predicate/value pairs to ensure that the statement // cannot be added twice SetMap<String, String> copiedProperties = new SetMap<String, String>(); for (Object oldFieldObject : document.getFields()) { Field oldField = (Field) oldFieldObject; // do not copy removed statements to the new version of the // document if (removedOfResource != null) { // which fields were removed? List<String> objectsRemoved = removedOfResource.get(oldField.name()); if ((objectsRemoved != null) && (objectsRemoved.contains(oldField.stringValue()))) continue; } newDocument.add(oldField); copiedProperties.put(oldField.name(), oldField.stringValue()); } // add all statements to this document, except for those which // are already there { List<Statement> addedToResource = stmtsToAdd.get(contextId); String val; if (addedToResource != null && !addedToResource.isEmpty()) { for (Statement s : addedToResource) { val = getLiteralPropertyValueAsString(s); if (val != null) { if (!copiedProperties.containsKeyValuePair(s.getPredicate().stringValue(), val)) { addProperty(s, newDocument); } } } } } // update the index with the cloned document, if it contains any // meaningful non-system properties int nrProperties = numberOfPropertyFields(newDocument); if (nrProperties > 0) { writer.updateDocument(idTerm, newDocument); } else { writer.deleteDocuments(idTerm); } } } } // make sure that these updates are visible for new // IndexReaders/Searchers writer.commit(); // the old IndexReaders/Searchers are not outdated invalidateReaders(); }
From source file:org.segrada.search.lucene.LuceneSearchEngine.java
License:Apache License
@Override public synchronized boolean index(String id, String className, String title, String subTitles, String content, String[] tagIds, Integer color, String iconFileIdentifier, float weight) { try {// w ww .ja va 2 s. co m // init index writer config IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_47, this.analyzer); // create new index writer IndexWriter iWriter = new IndexWriter(directory, indexWriterConfig); Document doc = new Document(); doc.add(new Field("id", id, simpleIndexType)); doc.add(new Field("className", className, simpleIndexType)); Field field; if (title != null) { field = new Field("title", title, indexedTextType); field.setBoost(10f * weight); doc.add(field); } if (subTitles != null) { field = new Field("subTitles", subTitles, indexedTextType); field.setBoost(6f * weight); doc.add(field); } // add content if (content == null) content = ""; field = new Field("content", content, indexedTextType); field.setBoost(weight); doc.add(field); // add tagIds if (tagIds != null) for (String tagId : tagIds) { field = new Field("tag", tagId, simpleIndexType); field.setBoost(weight); doc.add(field); } // add color and icon - just stored if (color != null) { field = new IntField("color", color, IntField.TYPE_STORED); doc.add(field); } // add color and icon - just stored if (iconFileIdentifier != null) { field = new Field("iconFileIdentifier", iconFileIdentifier, TextField.TYPE_STORED); doc.add(field); } // create or update document iWriter.updateDocument(new Term("id", id), doc); iWriter.close(); } catch (Exception e) { logger.error("Could not index document " + id, e); return false; } return true; }