Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:org.xcmis.search.lucene.index.PersistedIndex.java

License:Open Source License

/**
 * {@inheritDoc}//w w w  .j  av a 2s.co m
 */
public IndexTransactionModificationReport save(final IndexTransaction<Document> changes) throws IndexException {

    final Set<String> removedDocuments = new HashSet<String>();
    final Set<String> updatedDocuments = new HashSet<String>();

    try {
        // index already started
        synchronized (this.indexDirectiry) {

            final Set<String> removed = changes.getRemovedDocuments();
            IndexWriter writer = null;
            IndexReader reader = null;

            Map<String, Document> updated = null;
            for (final String removedUuid : removed) {

                if (reader == null) {
                    reader = this.getIndexReader();
                }

                if (this.getDocument(removedUuid, reader) != null) {
                    removedDocuments.add(removedUuid);
                }
            }

            if (removedDocuments.size() > 0 || changes.getAddedDocuments().size() > 0) {

                writer = new IndexWriter(this.indexDirectiry, new StandardAnalyzer(), MaxFieldLength.UNLIMITED);

                // removed
                for (final String uuid : removedDocuments) {
                    writer.deleteDocuments(new Term(FieldNames.UUID, uuid));
                }
                // updated
                for (final String uuid : updatedDocuments) {
                    // TODO possible use only delete
                    writer.updateDocument(new Term(FieldNames.UUID, uuid), updated.get(uuid));
                }
                // added
                for (final Document document : changes.getAddedDocuments().values()) {
                    writer.addDocument(document);
                }

                writer.commit();
                writer.close();

                this.lastModifedTime = System.currentTimeMillis();
            }
        }

    } catch (final CorruptIndexException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    } catch (final IOException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    }

    return new IndexTransactionModificationReportImpl(changes.getAddedDocuments().keySet(), removedDocuments,
            updatedDocuments);
}

From source file:org.xcmis.search.lucene.InMemoryLuceneQueryableIndexStorage.java

License:Open Source License

/**
 * @throws IndexException//  www. j  av a  2s .  c o  m
 * @see org.xcmis.search.lucene.AbstractLuceneQueryableIndexStorage#save(org.xcmis.search.lucene.index.LuceneIndexTransaction)
 */
@Override
protected synchronized Object save(LuceneIndexTransaction indexTransaction)
        throws IndexException, IndexTransactionException {

    try {
        IndexWriter writer = new IndexWriter(ramDirectory, new StandardAnalyzer(), MaxFieldLength.UNLIMITED);

        // removed
        for (final String uuid : indexTransaction.getRemovedDocuments()) {
            writer.deleteDocuments(new Term(FieldNames.UUID, uuid));
        }

        // added
        for (final Entry<String, Document> entry : indexTransaction.getAddedDocuments().entrySet()) {
            writer.updateDocument(new Term(FieldNames.UUID, entry.getKey()), entry.getValue());
        }

        writer.commit();
        writer.close();
    } catch (CorruptIndexException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    } catch (LockObtainFailedException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    } catch (IOException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    }
    return new Object();
}

From source file:part2.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //  w  w w . j av a2  s.c om
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            if (!file.toString().toLowerCase().contains(".txt")) {
                return;
            }

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Make Book object which parses the file and finds Author and Title of the text file. 
                Book book = new Book();
                book.parse(fis);

                Field authorField = new TextField("author", book.getAuthor(), Field.Store.YES);
                Field titleField = new TextField("title", book.getTitle(), Field.Store.YES);
                Field releaseField = new TextField("release date", book.getReleaseDate(), Field.Store.YES);
                Field languageField = new TextField("language", book.getLanguage(), Field.Store.YES);

                authorField.setBoost(3.0f);
                titleField.setBoost(3.0f);

                doc.add(authorField);
                doc.add(titleField);
                doc.add(releaseField);
                doc.add(languageField);

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:perf.TestBenchNRTPKLookup.java

License:Apache License

public static void main(String[] args) throws IOException {
    Directory dir = new MMapDirectory(new File(args[0]));
    //Directory dir = new NIOFSDirectory(new File(args[0]));
    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
    iwc.setRAMBufferSizeMB(250);/*from w  w w  . ja v a2 s . c o m*/
    IndexWriter writer = new IndexWriter(dir, iwc);
    final SearcherManager manager = new SearcherManager(writer, true, new SearcherFactory() {
        @Override
        public IndexSearcher newSearcher(IndexReader r) {
            return new IndexSearcher(r);
        }
    });
    FieldType type = new FieldType();
    type.setIndexed(true);
    type.setTokenized(false);
    type.setStored(false);
    type.freeze();

    HashMap<Object, TermsEnum> cachedTermsEnum = new HashMap<Object, TermsEnum>();
    long time = System.currentTimeMillis();
    long lastTime = time;
    int num = 2500000;
    Random r = new Random(16);
    for (int i = 0; i < num; i++) {
        //Term t = new Term("_id", Integer.toString(i));
        String id = String.format("%010d", r.nextInt(Integer.MAX_VALUE));
        Term t = new Term("_id", id);
        IndexSearcher acquire = manager.acquire();
        try {
            IndexReader indexReader = acquire.getIndexReader();
            List<AtomicReaderContext> leaves = indexReader.leaves();
            for (AtomicReaderContext atomicReaderContext : leaves) {
                AtomicReader reader = atomicReaderContext.reader();
                TermsEnum termsEnum = cachedTermsEnum.get(reader.getCombinedCoreAndDeletesKey());
                if (termsEnum == null) {
                    termsEnum = reader.fields().terms("_id").iterator(null);
                    //cachedTermsEnum.put(reader.getCombinedCoreAndDeletesKey(), termsEnum); // uncomment this line to see improvements
                }
                // MKM
                //System.out.println("\nlookup seg=: " + reader + " term=" + t);
                if (termsEnum.seekExact(t.bytes())) {
                    DocsEnum termDocsEnum = termsEnum.docs(reader.getLiveDocs(), null);
                    if (termDocsEnum != null) {
                        break;
                    }
                }
            }
        } finally {
            manager.release(acquire);
        }
        Document d = new Document();

        d.add(new Field("_id", id, type));
        writer.updateDocument(t, d);
        //writer.addDocument(d);
        if (i % 50000 == 0) {
            long t1 = System.currentTimeMillis();
            System.out.println(i + " " + (t1 - lastTime) + " ms");
            lastTime = t1;
        }
        if ((i + 1) % 250000 == 0) {
            System.out.println("Reopen...");
            manager.maybeRefresh();
            IndexSearcher s = manager.acquire();
            try {
                System.out.println("  got: " + s);
            } finally {
                manager.release(s);
            }
        }
    }

    System.out.println("\nTotal: " + (System.currentTimeMillis() - time) + " msec");
    //System.out.println("loadBlockCount: " + BlockTreeTermsReader.loadBlockCount);

    manager.close();
    writer.close();

    dir.close();
}

From source file:practica1_2.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./*from   ww  w.j av a2 s.  c  om*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                // doc.add(new TextField("contents", new BufferedReader(
                // new InputStreamReader(fis, "UTF-8"))));

                insertIndexTag("title", file, doc, true);
                insertIndexTag("identifier", file, doc, false);
                insertIndexTag("subject", file, doc, true);
                insertIndexTag("type", file, doc, false);
                insertIndexTag("description", file, doc, true);
                insertIndexTag("creator", file, doc, true);
                insertIndexTag("publisher", file, doc, true);
                insertIndexTag("format", file, doc, false);
                insertIndexTag("language", file, doc, false);
                readPosition(doc, file, true);
                readPosition(doc, file, false);

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:resource.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//w  ww. j  a  v  a  2  s.  c o m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 * @throws TikaException 
 * @throws SAXException 
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);
                Field relativePathField = new StringField("shortPath", getRelativePath(file.getPath()),
                        Field.Store.YES);
                doc.add(relativePathField);
                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                if (file.getName().endsWith(".txt"))
                    doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
                if (file.getName().endsWith(".html")) {
                    doc.add(new TextField("contents", html2String(fis), Field.Store.YES));
                    doc.add(new StringField("title", getHTMLTitle(file), Field.Store.YES));
                }
                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    try {
                        writer.updateDocument(new Term("path", file.getPath()), doc);
                    } catch (Exception e) {
                        // TODO 
                        e.printStackTrace();
                    }
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:ru.npopm.dep715.searchdocs.lucene.IndexFiles.java

License:Apache License

/**
 * Indexes a single document//from   ww  w .j a v a  2 s .c o  m
 */
static void indexDoc(IndexWriter writer, File file, long lastModified) throws IOException {
    try (InputStream stream = new FileInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            //                writer.deleteDocuments(new Term("path", file.toString()));
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:ru.npopm.dep715.searchdocs.lucene._IndexFiles_.java

License:Apache License

/**
 * Indexes a single document//from   w w  w .  j a  va 2  s .  co m
 */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            //                writer.deleteDocuments(new Term("path", file.toString()));
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:se.riddle.jekyll.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * <p/>/* w w w  . j a  v  a  2s  .  com*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file   The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")),
                        Field.TermVector.YES));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:searchEngine.IndexFiles.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        File docDir = new File(file.toString());
        org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(docDir, "UTF8");

        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from   w w w .j a  va2s. c  om
        doc.add(new LongPoint("modified", lastModified));
        doc.add(new TextField("title", jsoupDoc.title(), Field.Store.YES));
        doc.add(new TextField("contents", jsoupDoc.text(), Field.Store.YES));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}