List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:net.semanticmetadata.lire.imageanalysis.bovw.VLADBuilder.java
License:Open Source License
/** * Indexes all documents in the index, that do not include the VLAD feature yet. * * @throws IOException/*from ww w .j a va 2s . co m*/ */ public void indexMissing() throws IOException { init(); // Reading clusters from disk: clusters = Cluster.readClusters(clusterFile); // create & store histograms: System.out.println("Creating histograms ..."); LireFeature f = getFeatureInstance(); IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(), true, LuceneUtils.AnalyzerType.WhitespaceAnalyzer); for (int i = 0; i < reader.maxDoc(); i++) { // if (!reader.isDeleted(i)) { Document d = reader.document(i); // Only if there are no values yet: if (d.getValues(vladFieldName) == null || d.getValues(vladFieldName).length == 0) { createVisualWords(d, f); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d); } // } } iw.commit(); iw.close(); System.out.println("Finished."); }
From source file:net.semanticmetadata.lire.indexing.IndexVisualWordsTest.java
License:Open Source License
public void testIndexMissingFiles() throws IOException { // first delete some of the existing ones ... System.out.println("Deleting visual words from docs ..."); IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index))); IndexWriter iw = LuceneUtils.createIndexWriter(index, false); int maxDocs = ir.maxDoc(); for (int i = 0; i < maxDocs / 10; i++) { Document d = ir.document(i); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM); d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW_VECTOR); // d.removeFields(DocumentBuilder.FIELD_NAME_SURF); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d); }/*from w ww . j a va2s . c om*/ System.out.println("# of deleted docs: " + maxDocs / 10); System.out.println("Optimizing and closing ..."); iw.close(); ir.close(); System.out.println("Creating new visual words ..."); BOVWBuilder surfFeatureHistogramBuilder = new BOVWBuilder( DirectoryReader.open(FSDirectory.open(new File(index))), new SurfFeature(), numSamples, clusters); // surfFeatureHistogramBuilder.indexMissing(); // System.out.println("Finished."); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java
License:Open Source License
/** * Creates a set of reference objects and stores it in a new index (hashFunctionsFileName "<indexPath>-ro"). Then creates ordered * lists of reference object positions for each data item in the index with given feature. * Finally a new index (hashFunctionsFileName "<indexPath>-ms") is created where all the original documents as well as the new data * are stored.//from w ww . j a va 2 s.c om * * @param indexPath the path to the original index * @throws IOException */ public void createIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); if (numDocs < numReferenceObjects) { throw new UnsupportedOperationException("Too few documents in index."); } // progress report progress.setNumDocsAll(numDocs); progress.setCurrentState(State.RoSelection); boolean hasDeletions = reader.hasDeletions(); // init reference objects: IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true); HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects); double numDocsDouble = (double) numDocs; while (referenceObjsIds.size() < numReferenceObjects) { referenceObjsIds.add((int) (numDocsDouble * Math.random())); } int count = 0; if (hasDeletions) { System.err.println("WARNING: There are deleted docs in your index. You should " + "optimize your index before using this method."); } // progress report progress.setCurrentState(State.RoIndexing); // find them in the index and put them into a separate index: for (int i : referenceObjsIds) { count++; Document document = reader.document(i); document.add(new Field("ro-id", count + "", StringField.TYPE_STORED)); iw.addDocument(document); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Indexing); // now find the reference objects for each entry ;) IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>(); analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField); iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); } iw.commit(); iw.close(); // progress report progress.setCurrentState(State.Idle); }
From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java
License:Open Source License
/** * We assume that the initial indexing has been done and a set of reference objects has been * found and indexed in the separate fileList. However further documents were added and they * now need to get a ranked list of reference objects. So we (i) get all these new documents * missing the field "ro-order" and (ii) add this field. * * @param indexPath the index to update//w ww .j a v a2 s .c o m * @throws IOException */ public void updateIndex(String indexPath) throws IOException { IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath))); int numDocs = reader.numDocs(); boolean hasDeletions = reader.hasDeletions(); int countUpdated = 0; IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro"))); ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName); Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1); perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION)); PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper( new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField); IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)), new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper) .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); StringBuilder sb = new StringBuilder(256); // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < numDocs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document document = reader.document(i); if (document.getField("ro-order") == null) { // if the field is not here we create it. ImageSearchHits hits = searcher.search(document, readerRo); sb.delete(0, sb.length()); for (int j = 0; j < numReferenceObjectsUsed; j++) { sb.append(hits.doc(j).getValues("ro-id")[0]); sb.append(' '); } // System.out.println(sb.toString()); document.add(new TextField("ro-order", sb.toString(), Field.Store.YES)); iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER, document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document); countUpdated++; } // progress report progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1); // debug: System.out.println("countUpdated = " + countUpdated); } iw.commit(); iw.close(); }
From source file:net.simpleframework.ado.lucene.AbstractLuceneManager.java
License:Apache License
@Override public void doUpdateIndex(final Object... objects) { IndexWriter iWriter = null; try {//from ww w. j a v a 2s . c o m iWriter = createIndexWriter(); for (final Object obj : objects) { final String id = getId(obj); if (StringUtils.hasText(id)) { final LuceneDocument document = new LuceneDocument(); if (objectToDocument(obj, document)) { iWriter.updateDocument(new Term("id", id), document.doc); } } } } catch (final IOException e) { throw ADOException.of(e); } finally { closeWriter(iWriter); } }
From source file:net.tooan.ynpay.third.mongodb.lucene.backend.IndexUpdateTask.java
License:Apache License
@Override public void run() { Class<?> clazz = obj.getClass(); String[] name = MapperUtil.getEntityName(clazz); IndexWriterCache cache = IndexWriterCache.getInstance(); IndexWriter writer = cache.get(StringUtils.join(name, ".")); Document doc = new Document(); IndexCreator creator = new IndexCreator(obj, ""); creator.create(doc);/*from www. j a v a 2 s.c om*/ Term term = new Term(FieldsCache.getInstance().getIdFieldName(clazz), obj.getId()); try { writer.updateDocument(term, doc); } catch (CorruptIndexException ex) { logger.error("IndexWriter can not update the document", ex); } catch (IOException ex) { logger.error("IndexWriter can not update the document", ex); } }
From source file:net.ymate.platform.module.search.Searchs.java
License:Apache License
public static void indexUpdate(final ISearchable searchable, final ICallbackHandler handler) { __doCheckModuleInited();//from w ww . j a va 2s . co m __executor.execute(new Runnable() { public void run() { IndexedMeta _meta = getIndexedMeta(searchable); IndexWriter _writer = getIndexWriter(_meta.getIndexName()); Document _doc = __doIndexDocumentCreate(searchable); Term term = new Term(IndexedMeta.FIELD_ID, searchable.getId()); try { _writer.updateDocument(term, _doc); if (handler != null) { handler.onIndexUpdated(searchable); } } catch (IOException ex) { _LOG.error("IndexWriter Update Document Error:", ex); } } }); }
From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.IndexFiles.java
License:Apache License
private static void indexFields(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from ww w . j a v a2 s .c o m*/ doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify // a Reader, // so that the text of the file is tokenized and indexed, but not // stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will // fail. BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)); String line = null; String[] splitLine; String content = ""; boolean inContent = false; while ((line = br.readLine()) != null) { if (inContent && !line.contains(":")) { content += " " + line; } FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); ft.setStored(true); if (!line.isEmpty() && line.contains(":")) { // System.out.println("line: " + line); splitLine = line.split(":"); if (splitLine[0].equals("content")) { inContent = true; if (splitLine.length > 1) { content += splitLine[1]; } } else if (splitLine[0].equals("end_content")) { inContent = false; // System.out.println("content - " + content.trim()); doc.add(new Field("content", content.trim(), ft)); } else { doc.add(new Field(splitLine[0].trim(), splitLine[1].trim(), ft)); System.out.println(splitLine[0].trim() + " - " + splitLine[1].trim()); } } } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // we use updateDocument instead to replace the old one matching // the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:oldClasses.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory./*from ww w . j av a2 s. com*/ * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException * If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use // a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the // resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named // "contents". // Specify a Reader, // so that the text of the file is tokenized and // indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special // characters // will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may // have // been indexed) so // we use updateDocument instead to replace the old // one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.abondar.experimental.eventsearch.SearchData.java
public void indexDoc(IndexWriter iw, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w . ja v a2 s . co m*/ ObjectMapper mapper = new ObjectMapper(); Event eb = mapper.readValue(new File(file.toString()), Event.class); doc.add(new TextField("category", eb.getCategory(), Field.Store.YES)); if (iw.getConfig().getOpenMode() == OpenMode.CREATE) { iw.addDocument(doc); for (IndexableField ifd : doc.getFields()) { System.out.println(ifd.stringValue() + " " + ifd.name()); } System.out.println("adding " + file); } else { iw.updateDocument(new Term("path", file.toString()), doc); System.out.println("updating " + file); } } }