Example usage for org.apache.lucene.index IndexWriter updateDocument

List of usage examples for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Usage

From source file:org.xcmis.search.lucene.index.PersistedIndex.java

License:Open Source License

/**
 * {@inheritDoc}//w w w  .j  av a 2s.co m
 */
public IndexTransactionModificationReport save(final IndexTransaction<Document> changes) throws IndexException {

    final Set<String> removedDocuments = new HashSet<String>();
    final Set<String> updatedDocuments = new HashSet<String>();

    try {
        // index already started
        synchronized (this.indexDirectiry) {

            final Set<String> removed = changes.getRemovedDocuments();
            IndexWriter writer = null;
            IndexReader reader = null;

            Map<String, Document> updated = null;
            for (final String removedUuid : removed) {

                if (reader == null) {
                    reader = this.getIndexReader();
                }

                if (this.getDocument(removedUuid, reader) != null) {
                    removedDocuments.add(removedUuid);
                }
            }

            if (removedDocuments.size() > 0 || changes.getAddedDocuments().size() > 0) {

                writer = new IndexWriter(this.indexDirectiry, new StandardAnalyzer(), MaxFieldLength.UNLIMITED);

                // removed
                for (final String uuid : removedDocuments) {
                    writer.deleteDocuments(new Term(FieldNames.UUID, uuid));
                }
                // updated
                for (final String uuid : updatedDocuments) {
                    // TODO possible use only delete
                    writer.updateDocument(new Term(FieldNames.UUID, uuid), updated.get(uuid));
                }
                // added
                for (final Document document : changes.getAddedDocuments().values()) {
                    writer.addDocument(document);
                }

                writer.commit();
                writer.close();

                this.lastModifedTime = System.currentTimeMillis();
            }
        }

    } catch (final CorruptIndexException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    } catch (final IOException e) {
        throw new IndexException(e.getLocalizedMessage(), e);
    }

    return new IndexTransactionModificationReportImpl(changes.getAddedDocuments().keySet(), removedDocuments,
            updatedDocuments);
}

From source file:org.xcmis.search.lucene.InMemoryLuceneQueryableIndexStorage.java

License:Open Source License

/**
 * @throws IndexException//  www. j  av a  2s .  c o  m
 * @see org.xcmis.search.lucene.AbstractLuceneQueryableIndexStorage#save(org.xcmis.search.lucene.index.LuceneIndexTransaction)
 */
@Override
protected synchronized Object save(LuceneIndexTransaction indexTransaction)
        throws IndexException, IndexTransactionException {

    try {
        IndexWriter writer = new IndexWriter(ramDirectory, new StandardAnalyzer(), MaxFieldLength.UNLIMITED);

        // removed
        for (final String uuid : indexTransaction.getRemovedDocuments()) {
            writer.deleteDocuments(new Term(FieldNames.UUID, uuid));
        }

        // added
        for (final Entry<String, Document> entry : indexTransaction.getAddedDocuments().entrySet()) {
            writer.updateDocument(new Term(FieldNames.UUID, entry.getKey()), entry.getValue());
        }

        writer.commit();
        writer.close();
    } catch (CorruptIndexException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    } catch (LockObtainFailedException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    } catch (IOException e) {
        throw new IndexModificationException(e.getLocalizedMessage(), e);
    }
    return new Object();
}

From source file:part2.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //  w  w w . j av a2  s.c om
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            if (!file.toString().toLowerCase().contains(".txt")) {
                return;
            }

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Make Book object which parses the file and finds Author and Title of the text file. 
                Book book = new Book();
                book.parse(fis);

                Field authorField = new TextField("author", book.getAuthor(), Field.Store.YES);
                Field titleField = new TextField("title", book.getTitle(), Field.Store.YES);
                Field releaseField = new TextField("release date", book.getReleaseDate(), Field.Store.YES);
                Field languageField = new TextField("language", book.getLanguage(), Field.Store.YES);

                authorField.setBoost(3.0f);
                titleField.setBoost(3.0f);

                doc.add(authorField);
                doc.add(titleField);
                doc.add(releaseField);
                doc.add(languageField);

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:perf.TestBenchNRTPKLookup.java

License:Apache License

public static void main(String[] args) throws IOException {
    Directory dir = new MMapDirectory(new File(args[0]));
    //Directory dir = new NIOFSDirectory(new File(args[0]));
    IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer());
    iwc.setRAMBufferSizeMB(250);/*from w  w w  . ja v a2 s . c o m*/
    IndexWriter writer = new IndexWriter(dir, iwc);
    final SearcherManager manager = new SearcherManager(writer, true, new SearcherFactory() {
        @Override
        public IndexSearcher newSearcher(IndexReader r) {
            return new IndexSearcher(r);
        }
    });
    FieldType type = new FieldType();
    type.setIndexed(true);
    type.setTokenized(false);
    type.setStored(false);
    type.freeze();

    HashMap<Object, TermsEnum> cachedTermsEnum = new HashMap<Object, TermsEnum>();
    long time = System.currentTimeMillis();
    long lastTime = time;
    int num = 2500000;
    Random r = new Random(16);
    for (int i = 0; i < num; i++) {
        //Term t = new Term("_id", Integer.toString(i));
        String id = String.format("%010d", r.nextInt(Integer.MAX_VALUE));
        Term t = new Term("_id", id);
        IndexSearcher acquire = manager.acquire();
        try {
            IndexReader indexReader = acquire.getIndexReader();
            List<AtomicReaderContext> leaves = indexReader.leaves();
            for (AtomicReaderContext atomicReaderContext : leaves) {
                AtomicReader reader = atomicReaderContext.reader();
                TermsEnum termsEnum = cachedTermsEnum.get(reader.getCombinedCoreAndDeletesKey());
                if (termsEnum == null) {
                    termsEnum = reader.fields().terms("_id").iterator(null);
                    //cachedTermsEnum.put(reader.getCombinedCoreAndDeletesKey(), termsEnum); // uncomment this line to see improvements
                }
                // MKM
                //System.out.println("\nlookup seg=: " + reader + " term=" + t);
                if (termsEnum.seekExact(t.bytes())) {
                    DocsEnum termDocsEnum = termsEnum.docs(reader.getLiveDocs(), null);
                    if (termDocsEnum != null) {
                        break;
                    }
                }
            }
        } finally {
            manager.release(acquire);
        }
        Document d = new Document();

        d.add(new Field("_id", id, type));
        writer.updateDocument(t, d);
        //writer.addDocument(d);
        if (i % 50000 == 0) {
            long t1 = System.currentTimeMillis();
            System.out.println(i + " " + (t1 - lastTime) + " ms");
            lastTime = t1;
        }
        if ((i + 1) % 250000 == 0) {
            System.out.println("Reopen...");
            manager.maybeRefresh();
            IndexSearcher s = manager.acquire();
            try {
                System.out.println("  got: " + s);
            } finally {
                manager.release(s);
            }
        }
    }

    System.out.println("\nTotal: " + (System.currentTimeMillis() - time) + " msec");
    //System.out.println("loadBlockCount: " + BlockTreeTermsReader.loadBlockCount);

    manager.close();
    writer.close();

    dir.close();
}

From source file:practica1_2.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./*from   ww  w.j av a2 s.  c  om*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                // doc.add(new TextField("contents", new BufferedReader(
                // new InputStreamReader(fis, "UTF-8"))));

                insertIndexTag("title", file, doc, true);
                insertIndexTag("identifier", file, doc, false);
                insertIndexTag("subject", file, doc, true);
                insertIndexTag("type", file, doc, false);
                insertIndexTag("description", file, doc, true);
                insertIndexTag("creator", file, doc, true);
                insertIndexTag("publisher", file, doc, true);
                insertIndexTag("format", file, doc, false);
                insertIndexTag("language", file, doc, false);
                readPosition(doc, file, true);
                readPosition(doc, file, false);

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:resource.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//w  ww. j  a  v  a  2  s.  c o m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 * @throws TikaException 
 * @throws SAXException 
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);
                Field relativePathField = new StringField("shortPath", getRelativePath(file.getPath()),
                        Field.Store.YES);
                doc.add(relativePathField);
                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                if (file.getName().endsWith(".txt"))
                    doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
                if (file.getName().endsWith(".html")) {
                    doc.add(new TextField("contents", html2String(fis), Field.Store.YES));
                    doc.add(new StringField("title", getHTMLTitle(file), Field.Store.YES));
                }
                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    try {
                        writer.updateDocument(new Term("path", file.getPath()), doc);
                    } catch (Exception e) {
                        // TODO 
                        e.printStackTrace();
                    }
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:ru.npopm.dep715.searchdocs.lucene.IndexFiles.java

License:Apache License

/**
 * Indexes a single document//from   ww  w .j a v a  2 s .c o  m
 */
static void indexDoc(IndexWriter writer, File file, long lastModified) throws IOException {
    try (InputStream stream = new FileInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            //                writer.deleteDocuments(new Term("path", file.toString()));
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:ru.npopm.dep715.searchdocs.lucene._IndexFiles_.java

License:Apache License

/**
 * Indexes a single document//from   w w  w .  j a  va 2  s .  co m
 */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            //                writer.deleteDocuments(new Term("path", file.toString()));
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:se.riddle.jekyll.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * <p/>/* w w w  . j a  v  a  2s  .  com*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file   The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")),
                        Field.TermVector.YES));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:searchEngine.IndexFiles.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        File docDir = new File(file.toString());
        org.jsoup.nodes.Document jsoupDoc = Jsoup.parse(docDir, "UTF8");

        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from   w w w .j a  va2s. c  om
        doc.add(new LongPoint("modified", lastModified));
        doc.add(new TextField("title", jsoupDoc.title(), Field.Store.YES));
        doc.add(new TextField("contents", jsoupDoc.text(), Field.Store.YES));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}