Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:net.semanticmetadata.lire.imageanalysis.bovw.VLADBuilder.java

License:Open Source License

/**
 * Indexes all documents in the index, that do not include the VLAD feature yet.
 *
 * @throws IOException/*from ww w .j a  va 2s  . co  m*/
 */
public void indexMissing() throws IOException {
    init();
    // Reading clusters from disk:
    clusters = Cluster.readClusters(clusterFile);
    //  create & store histograms:
    System.out.println("Creating histograms ...");
    LireFeature f = getFeatureInstance();
    IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(), true,
            LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
    for (int i = 0; i < reader.maxDoc(); i++) {
        //            if (!reader.isDeleted(i)) {
        Document d = reader.document(i);
        // Only if there are no values yet:
        if (d.getValues(vladFieldName) == null || d.getValues(vladFieldName).length == 0) {
            createVisualWords(d, f);
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
        }
        //            }
    }
    iw.commit();
    iw.close();
    System.out.println("Finished.");
}

From source file:net.semanticmetadata.lire.indexing.IndexVisualWordsTest.java

License:Open Source License

public void testIndexMissingFiles() throws IOException {
    // first delete some of the existing ones ...
    System.out.println("Deleting visual words from docs ...");
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexWriter iw = LuceneUtils.createIndexWriter(index, false);
    int maxDocs = ir.maxDoc();
    for (int i = 0; i < maxDocs / 10; i++) {
        Document d = ir.document(i);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM);
        d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW_VECTOR);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF);
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
    }/*from w ww .  j a va2s  .  c om*/
    System.out.println("# of deleted docs:  " + maxDocs / 10);
    System.out.println("Optimizing and closing ...");
    iw.close();
    ir.close();
    System.out.println("Creating new visual words ...");
    BOVWBuilder surfFeatureHistogramBuilder = new BOVWBuilder(
            DirectoryReader.open(FSDirectory.open(new File(index))), new SurfFeature(), numSamples, clusters);
    //        surfFeatureHistogramBuilder.indexMissing();
    //        System.out.println("Finished.");
}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * Creates a set of reference objects and stores it in a new index (hashFunctionsFileName "<indexPath>-ro"). Then creates ordered
 * lists of reference object positions for each data item in the index with given feature.
 * Finally a new index (hashFunctionsFileName "<indexPath>-ms") is created where all the original documents as well as the new data
 * are stored.//from   w ww . j  a va 2 s.c  om
 *
 * @param indexPath the path to the original index
 * @throws IOException
 */
public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
        throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
        referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
        System.err.println("WARNING: There are deleted docs in your index. You should "
                + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
        count++;
        Document document = reader.document(i);
        document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
        iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
            sb.append(hits.doc(j).getValues("ro-id")[0]);
            sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);

}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * We assume that the initial indexing has been done and a set of reference objects has been
 * found and indexed in the separate fileList. However further documents were added and they
 * now need to get a ranked list of reference objects. So we (i) get all these new documents
 * missing the field "ro-order" and (ii) add this field.
 *
 * @param indexPath the index to update//w ww .j a v  a2 s  .c  o m
 * @throws IOException
 */
public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        if (document.getField("ro-order") == null) { // if the field is not here we create it.
            ImageSearchHits hits = searcher.search(document, readerRo);
            sb.delete(0, sb.length());
            for (int j = 0; j < numReferenceObjectsUsed; j++) {
                sb.append(hits.doc(j).getValues("ro-id")[0]);
                sb.append(' ');
            }
            // System.out.println(sb.toString());
            document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);
            countUpdated++;
        }

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

        // debug:
        System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
}

From source file:net.simpleframework.ado.lucene.AbstractLuceneManager.java

License:Apache License

@Override
public void doUpdateIndex(final Object... objects) {
    IndexWriter iWriter = null;
    try {//from   ww w. j  a v  a 2s  .  c  o  m
        iWriter = createIndexWriter();
        for (final Object obj : objects) {
            final String id = getId(obj);
            if (StringUtils.hasText(id)) {
                final LuceneDocument document = new LuceneDocument();
                if (objectToDocument(obj, document)) {
                    iWriter.updateDocument(new Term("id", id), document.doc);
                }
            }
        }
    } catch (final IOException e) {
        throw ADOException.of(e);
    } finally {
        closeWriter(iWriter);
    }
}

From source file:net.tooan.ynpay.third.mongodb.lucene.backend.IndexUpdateTask.java

License:Apache License

@Override
public void run() {
    Class<?> clazz = obj.getClass();
    String[] name = MapperUtil.getEntityName(clazz);
    IndexWriterCache cache = IndexWriterCache.getInstance();
    IndexWriter writer = cache.get(StringUtils.join(name, "."));
    Document doc = new Document();
    IndexCreator creator = new IndexCreator(obj, "");
    creator.create(doc);/*from   www. j  a v a  2  s.c  om*/
    Term term = new Term(FieldsCache.getInstance().getIdFieldName(clazz), obj.getId());
    try {
        writer.updateDocument(term, doc);
    } catch (CorruptIndexException ex) {
        logger.error("IndexWriter can not update the document", ex);
    } catch (IOException ex) {
        logger.error("IndexWriter can not update the document", ex);
    }
}

From source file:net.ymate.platform.module.search.Searchs.java

License:Apache License

public static void indexUpdate(final ISearchable searchable, final ICallbackHandler handler) {
    __doCheckModuleInited();//from   w ww . j  a va  2s  . co  m
    __executor.execute(new Runnable() {

        public void run() {
            IndexedMeta _meta = getIndexedMeta(searchable);
            IndexWriter _writer = getIndexWriter(_meta.getIndexName());
            Document _doc = __doIndexDocumentCreate(searchable);
            Term term = new Term(IndexedMeta.FIELD_ID, searchable.getId());
            try {
                _writer.updateDocument(term, _doc);
                if (handler != null) {
                    handler.onIndexUpdated(searchable);
                }
            } catch (IOException ex) {
                _LOG.error("IndexWriter Update Document Error:", ex);
            }
        }
    });
}

From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.IndexFiles.java

License:Apache License

private static void indexFields(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from ww  w . j  a v  a2 s  .c  o m*/
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents". Specify
        // a Reader,
        // so that the text of the file is tokenized and indexed, but not
        // stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will
        // fail.
        BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
        String line = null;
        String[] splitLine;
        String content = "";
        boolean inContent = false;
        while ((line = br.readLine()) != null) {
            if (inContent && !line.contains(":")) {
                content += " " + line;
            }
            FieldType ft = new FieldType();
            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            ft.setStored(true);
            if (!line.isEmpty() && line.contains(":")) {
                //               System.out.println("line: " + line);
                splitLine = line.split(":");
                if (splitLine[0].equals("content")) {
                    inContent = true;
                    if (splitLine.length > 1) {
                        content += splitLine[1];
                    }
                } else if (splitLine[0].equals("end_content")) {
                    inContent = false;
                    //                  System.out.println("content - " + content.trim());
                    doc.add(new Field("content", content.trim(), ft));
                } else {
                    doc.add(new Field(splitLine[0].trim(), splitLine[1].trim(), ft));
                    System.out.println(splitLine[0].trim() + " - " + splitLine[1].trim());
                }
            }
        }
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // we use updateDocument instead to replace the old one matching
            // the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:oldClasses.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./*from ww w  .  j av  a2  s.  com*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use
                // a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the
                // resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named
                // "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and
                // indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special
                // characters
                // will fail.

                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may
                    // have
                    // been indexed) so
                    // we use updateDocument instead to replace the old
                    // one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.abondar.experimental.eventsearch.SearchData.java

public void indexDoc(IndexWriter iw, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {

        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w w w . ja v a2  s .  co  m*/
        ObjectMapper mapper = new ObjectMapper();
        Event eb = mapper.readValue(new File(file.toString()), Event.class);
        doc.add(new TextField("category", eb.getCategory(), Field.Store.YES));

        if (iw.getConfig().getOpenMode() == OpenMode.CREATE) {
            iw.addDocument(doc);
            for (IndexableField ifd : doc.getFields()) {
                System.out.println(ifd.stringValue() + "  " + ifd.name());
            }
            System.out.println("adding " + file);

        } else {

            iw.updateDocument(new Term("path", file.toString()), doc);
            System.out.println("updating " + file);
        }

    }
}