Example usage for org.apache.lucene.index IndexWriter updateDocument

List of usage examples for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Usage

From source file:net.semanticmetadata.lire.imageanalysis.bovw.VLADBuilder.java

License:Open Source License

/**
 * Indexes all documents in the index, that do not include the VLAD feature yet.
 *
 * @throws IOException/*from ww w .j a  va 2s  . co  m*/
 */
public void indexMissing() throws IOException {
    init();
    // Reading clusters from disk:
    clusters = Cluster.readClusters(clusterFile);
    //  create & store histograms:
    System.out.println("Creating histograms ...");
    LireFeature f = getFeatureInstance();
    IndexWriter iw = LuceneUtils.createIndexWriter(((DirectoryReader) reader).directory(), true,
            LuceneUtils.AnalyzerType.WhitespaceAnalyzer);
    for (int i = 0; i < reader.maxDoc(); i++) {
        //            if (!reader.isDeleted(i)) {
        Document d = reader.document(i);
        // Only if there are no values yet:
        if (d.getValues(vladFieldName) == null || d.getValues(vladFieldName).length == 0) {
            createVisualWords(d, f);
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
        }
        //            }
    }
    iw.commit();
    iw.close();
    System.out.println("Finished.");
}

From source file:net.semanticmetadata.lire.indexing.IndexVisualWordsTest.java

License:Open Source License

public void testIndexMissingFiles() throws IOException {
    // first delete some of the existing ones ...
    System.out.println("Deleting visual words from docs ...");
    IndexReader ir = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexWriter iw = LuceneUtils.createIndexWriter(index, false);
    int maxDocs = ir.maxDoc();
    for (int i = 0; i < maxDocs / 10; i++) {
        Document d = ir.document(i);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM);
        d.removeFields(DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW_VECTOR);
        //            d.removeFields(DocumentBuilder.FIELD_NAME_SURF);
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                d.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), d);
    }/*from w ww .  j a va2s  .  c om*/
    System.out.println("# of deleted docs:  " + maxDocs / 10);
    System.out.println("Optimizing and closing ...");
    iw.close();
    ir.close();
    System.out.println("Creating new visual words ...");
    BOVWBuilder surfFeatureHistogramBuilder = new BOVWBuilder(
            DirectoryReader.open(FSDirectory.open(new File(index))), new SurfFeature(), numSamples, clusters);
    //        surfFeatureHistogramBuilder.indexMissing();
    //        System.out.println("Finished.");
}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * Creates a set of reference objects and stores it in a new index (hashFunctionsFileName "<indexPath>-ro"). Then creates ordered
 * lists of reference object positions for each data item in the index with given feature.
 * Finally a new index (hashFunctionsFileName "<indexPath>-ms") is created where all the original documents as well as the new data
 * are stored.//from   w ww . j  a va 2 s.c  om
 *
 * @param indexPath the path to the original index
 * @throws IOException
 */
public void createIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();

    if (numDocs < numReferenceObjects) {
        throw new UnsupportedOperationException("Too few documents in index.");
    }

    // progress report
    progress.setNumDocsAll(numDocs);
    progress.setCurrentState(State.RoSelection);

    boolean hasDeletions = reader.hasDeletions();

    // init reference objects:
    IndexWriter iw = LuceneUtils.createIndexWriter(indexPath + "-ro", true);
    HashSet<Integer> referenceObjsIds = new HashSet<Integer>(numReferenceObjects);

    double numDocsDouble = (double) numDocs;
    while (referenceObjsIds.size() < numReferenceObjects) {
        referenceObjsIds.add((int) (numDocsDouble * Math.random()));
    }
    int count = 0;

    if (hasDeletions) {
        System.err.println("WARNING: There are deleted docs in your index. You should "
                + "optimize your index before using this method.");
    }

    // progress report
    progress.setCurrentState(State.RoIndexing);

    // find them in the index and put them into a separate index:
    for (int i : referenceObjsIds) {
        count++;
        Document document = reader.document(i);
        document.add(new Field("ro-id", count + "", StringField.TYPE_STORED));
        iw.addDocument(document);
    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Indexing);

    // now find the reference objects for each entry ;)
    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> analyzerPerField = new HashMap<String, Analyzer>();
    analyzerPerField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), analyzerPerField);

    iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        ImageSearchHits hits = searcher.search(document, readerRo);
        sb.delete(0, sb.length());
        for (int j = 0; j < numReferenceObjectsUsed; j++) {
            sb.append(hits.doc(j).getValues("ro-id")[0]);
            sb.append(' ');
        }
        // System.out.println(sb.toString());
        document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
        iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

    }
    iw.commit();
    iw.close();

    // progress report
    progress.setCurrentState(State.Idle);

}

From source file:net.semanticmetadata.lire.indexing.MetricSpacesInvertedListIndexing.java

License:Open Source License

/**
 * We assume that the initial indexing has been done and a set of reference objects has been
 * found and indexed in the separate fileList. However further documents were added and they
 * now need to get a ranked list of reference objects. So we (i) get all these new documents
 * missing the field "ro-order" and (ii) add this field.
 *
 * @param indexPath the index to update//w ww .j a v  a2 s  .c  o m
 * @throws IOException
 */
public void updateIndex(String indexPath) throws IOException {
    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexPath)));
    int numDocs = reader.numDocs();
    boolean hasDeletions = reader.hasDeletions();
    int countUpdated = 0;

    IndexReader readerRo = DirectoryReader.open(FSDirectory.open(new File(indexPath + "-ro")));
    ImageSearcher searcher = new GenericImageSearcher(numReferenceObjectsUsed, featureClass, featureFieldName);
    Map<String, Analyzer> perField = new HashMap<String, Analyzer>(1);
    perField.put("ro-order", new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION));
    PerFieldAnalyzerWrapper aWrapper = new PerFieldAnalyzerWrapper(
            new SimpleAnalyzer(LuceneUtils.LUCENE_VERSION), perField);

    IndexWriter iw = new IndexWriter(FSDirectory.open(new File(indexPath)),
            new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, aWrapper)
                    .setOpenMode(IndexWriterConfig.OpenMode.CREATE));
    StringBuilder sb = new StringBuilder(256);
    // Needed for check whether the document is deleted.
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    for (int i = 0; i < numDocs; i++) {
        if (reader.hasDeletions() && !liveDocs.get(i))
            continue; // if it is deleted, just ignore it.
        Document document = reader.document(i);
        if (document.getField("ro-order") == null) { // if the field is not here we create it.
            ImageSearchHits hits = searcher.search(document, readerRo);
            sb.delete(0, sb.length());
            for (int j = 0; j < numReferenceObjectsUsed; j++) {
                sb.append(hits.doc(j).getValues("ro-id")[0]);
                sb.append(' ');
            }
            // System.out.println(sb.toString());
            document.add(new TextField("ro-order", sb.toString(), Field.Store.YES));
            iw.updateDocument(new Term(DocumentBuilder.FIELD_NAME_IDENTIFIER,
                    document.getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]), document);
            countUpdated++;
        }

        // progress report
        progress.setNumDocsProcessed(progress.getNumDocsProcessed() + 1);

        // debug:
        System.out.println("countUpdated = " + countUpdated);
    }
    iw.commit();
    iw.close();
}

From source file:net.simpleframework.ado.lucene.AbstractLuceneManager.java

License:Apache License

@Override
public void doUpdateIndex(final Object... objects) {
    IndexWriter iWriter = null;
    try {//from   ww w. j  a v  a 2s  .  c  o  m
        iWriter = createIndexWriter();
        for (final Object obj : objects) {
            final String id = getId(obj);
            if (StringUtils.hasText(id)) {
                final LuceneDocument document = new LuceneDocument();
                if (objectToDocument(obj, document)) {
                    iWriter.updateDocument(new Term("id", id), document.doc);
                }
            }
        }
    } catch (final IOException e) {
        throw ADOException.of(e);
    } finally {
        closeWriter(iWriter);
    }
}

From source file:net.tooan.ynpay.third.mongodb.lucene.backend.IndexUpdateTask.java

License:Apache License

@Override
public void run() {
    Class<?> clazz = obj.getClass();
    String[] name = MapperUtil.getEntityName(clazz);
    IndexWriterCache cache = IndexWriterCache.getInstance();
    IndexWriter writer = cache.get(StringUtils.join(name, "."));
    Document doc = new Document();
    IndexCreator creator = new IndexCreator(obj, "");
    creator.create(doc);/*from   www. j  a v a  2  s.c  om*/
    Term term = new Term(FieldsCache.getInstance().getIdFieldName(clazz), obj.getId());
    try {
        writer.updateDocument(term, doc);
    } catch (CorruptIndexException ex) {
        logger.error("IndexWriter can not update the document", ex);
    } catch (IOException ex) {
        logger.error("IndexWriter can not update the document", ex);
    }
}

From source file:net.ymate.platform.module.search.Searchs.java

License:Apache License

public static void indexUpdate(final ISearchable searchable, final ICallbackHandler handler) {
    __doCheckModuleInited();//from   w ww . j  a va  2s  . co  m
    __executor.execute(new Runnable() {

        public void run() {
            IndexedMeta _meta = getIndexedMeta(searchable);
            IndexWriter _writer = getIndexWriter(_meta.getIndexName());
            Document _doc = __doIndexDocumentCreate(searchable);
            Term term = new Term(IndexedMeta.FIELD_ID, searchable.getId());
            try {
                _writer.updateDocument(term, _doc);
                if (handler != null) {
                    handler.onIndexUpdated(searchable);
                }
            } catch (IOException ex) {
                _LOG.error("IndexWriter Update Document Error:", ex);
            }
        }
    });
}

From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.IndexFiles.java

License:Apache License

private static void indexFields(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from ww  w . j  a v  a2 s  .c  o m*/
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents". Specify
        // a Reader,
        // so that the text of the file is tokenized and indexed, but not
        // stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will
        // fail.
        BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
        String line = null;
        String[] splitLine;
        String content = "";
        boolean inContent = false;
        while ((line = br.readLine()) != null) {
            if (inContent && !line.contains(":")) {
                content += " " + line;
            }
            FieldType ft = new FieldType();
            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            ft.setStored(true);
            if (!line.isEmpty() && line.contains(":")) {
                //               System.out.println("line: " + line);
                splitLine = line.split(":");
                if (splitLine[0].equals("content")) {
                    inContent = true;
                    if (splitLine.length > 1) {
                        content += splitLine[1];
                    }
                } else if (splitLine[0].equals("end_content")) {
                    inContent = false;
                    //                  System.out.println("content - " + content.trim());
                    doc.add(new Field("content", content.trim(), ft));
                } else {
                    doc.add(new Field(splitLine[0].trim(), splitLine[1].trim(), ft));
                    System.out.println(splitLine[0].trim() + " - " + splitLine[1].trim());
                }
            }
        }
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // we use updateDocument instead to replace the old one matching
            // the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:oldClasses.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./*from ww w  .  j av  a2  s.  com*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use
                // a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the
                // resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named
                // "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and
                // indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special
                // characters
                // will fail.

                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may
                    // have
                    // been indexed) so
                    // we use updateDocument instead to replace the old
                    // one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.abondar.experimental.eventsearch.SearchData.java

public void indexDoc(IndexWriter iw, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {

        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w w w . ja v a2  s .  co  m*/
        ObjectMapper mapper = new ObjectMapper();
        Event eb = mapper.readValue(new File(file.toString()), Event.class);
        doc.add(new TextField("category", eb.getCategory(), Field.Store.YES));

        if (iw.getConfig().getOpenMode() == OpenMode.CREATE) {
            iw.addDocument(doc);
            for (IndexableField ifd : doc.getFields()) {
                System.out.println(ifd.stringValue() + "  " + ifd.name());
            }
            System.out.println("adding " + file);

        } else {

            iw.updateDocument(new Term("path", file.toString()), doc);
            System.out.println("updating " + file);
        }

    }
}