Example usage for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig()

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:com.paladin.sys.lucene.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * <p/>/*from  w  w  w  . j a  v a2  s . c o  m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file   The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */

static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (!file.canRead())
        return;

    if (file.isDirectory() && file.list() != null) {
        String[] files = file.list();
        for (int i = 0; i < files.length; i++)
            indexDocs(writer, new File(file, files[i]));
    } else {
        FileInputStream fis;
        try {
            fis = new FileInputStream(file);
        } catch (FileNotFoundException e) {
            // at least on windows, some temporary files raise this exception with an "access denied" message
            // checking if the file can be read doesn't help
            return;
        }

        try {
            Document doc = new Document();
            // Add the path of the file as a field named "path".  Use a field that is indexed (i.e. searchable), but don't tokenize
            // the field into separate words and don't index term frequency or positional information:
            Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                    Field.Index.NOT_ANALYZED_NO_NORMS);
            pathField.setOmitTermFreqAndPositions(true);
            doc.add(pathField);

            // Add the last modified date of the file a field named "modified". Use a NumericField that is indexed (i.e. efficiently filterable with
            // NumericRangeFilter).  This indexes to milli-second resolution, which is often too fine.  You could instead create a number based on
            // year/month/day/hour/minutes/seconds, down the resolution you require. For example the long value 2011021714 would mean
            // February 17, 2011, 2-3 PM.
            NumericField modifiedField = new NumericField("modified");
            modifiedField.setLongValue(file.lastModified());
            doc.add(modifiedField);

            // Add the contents of the file to a field named "contents".  Specify a Reader, so that the text of the file is tokenized and indexed, but not stored.
            // Note that FileReader expects the file to be in UTF-8 encoding. If that's not the case searching for special characters will fail.
            doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

            if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                out.println("adding " + file);
                writer.addDocument(doc);
            } else {
                out.println("updating " + file);
                writer.updateDocument(new Term("path", file.getPath()), doc);
            }
        } finally {
            fis.close();
        }
    }
}

From source file:com.qwazr.search.index.IndexInstance.java

License:Apache License

private IndexInstance(SchemaInstance schema, Directory dataDirectory, IndexSettingsDefinition settings,
        LinkedHashMap<String, AnalyzerDefinition> analyzerMap, LinkedHashMap<String, FieldDefinition> fieldMap,
        FileSet fileSet, IndexWriter indexWriter, SearcherManager searcherManager,
        UpdatableAnalyzer queryAnalyzer) {
    this.schema = schema;
    this.fileSet = fileSet;
    this.dataDirectory = dataDirectory;
    this.analyzerMap = analyzerMap;
    this.fieldMap = fieldMap;
    this.indexWriter = indexWriter;
    this.indexWriterConfig = indexWriter.getConfig();
    this.indexAnalyzer = (UpdatableAnalyzer) indexWriterConfig.getAnalyzer();
    this.queryAnalyzer = queryAnalyzer;
    this.snapshotDeletionPolicy = (SnapshotDeletionPolicy) indexWriterConfig.getIndexDeletionPolicy();
    this.settings = settings;
    this.searcherManager = searcherManager;
    this.facetsReaderStateCache = null;
}

From source file:com.sg.business.vault.index.demo.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from w w  w .  j a  va  2 s  .co m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES); //$NON-NLS-1$
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                //          doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                //          doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                try {
                    doc.add(new TextField("contents", FileUtil.getContent(file.getName(), fis), //$NON-NLS-1$
                            Field.Store.NO));
                } catch (Exception e) {
                    e.printStackTrace();
                }

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file); //$NON-NLS-1$
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file); //$NON-NLS-1$
                    writer.updateDocument(new Term("path", file.getPath()), doc); //$NON-NLS-1$
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.slieer.app.lecene3x.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./*  w w w. j a v  a  2 s .  com*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.slieer.lucene.apachedemo.IndexFiles.java

License:Apache License

static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }//from w  w w.j  a  v a 2  s  .  c  om
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.tistory.devyongsik.demo.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /* w  ww  . jav a  2s.c o m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i])); //10.         .
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file); //11.    Stream .
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                //12.   .  Document  Row.
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:

                //13.   Document,  Document     .
                //      .   path   path .
                //          .
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setOmitTermFreqAndPositions(true);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.

                //14. Field        .
                //      ,    .
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.

                //15. path, modified,   contents   Document .
                //       ,    String, Numeric, Reader  
                //         .
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { //16.        add...
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file); //17. Create or Update update .
                    //   3.X   API .
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void handleBackup(Operation op, BackupRequest req) throws Throwable {
    SnapshotDeletionPolicy snapshotter = null;
    IndexCommit commit = null;/*  w w w. ja v a  2 s  . c o m*/
    handleMaintenanceImpl(true);
    IndexWriter w = this.writer;
    if (w == null) {
        op.fail(new CancellationException());
        return;
    }
    try {
        // Create a snapshot so the index files won't be deleted.
        snapshotter = (SnapshotDeletionPolicy) w.getConfig().getIndexDeletionPolicy();
        commit = snapshotter.snapshot();

        String indexDirectory = UriUtils.buildUriPath(getHost().getStorageSandbox().getPath(),
                FILE_PATH_LUCENE);

        // Add the files in the commit to a zip file.
        List<URI> fileList = FileUtils.filesToUris(indexDirectory, commit.getFileNames());
        req.backupFile = FileUtils.zipFiles(fileList, this.indexDirectory + "-" + Utils.getNowMicrosUtc());

        op.setBody(req).complete();
    } catch (Exception e) {
        this.logSevere(e);
        throw e;
    } finally {
        if (snapshotter != null) {
            snapshotter.release(commit);
        }
        w.deleteUnusedFiles();
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexBackupService.java

License:Open Source License

private void takeSnapshot(Path destinationPath, boolean isZipBackup, InternalDocumentIndexInfo indexInfo)
        throws IOException {

    IndexWriter writer = indexInfo.indexWriter;
    boolean isInMemoryIndex = indexInfo.indexDirectory == null;

    URI storageSandbox = getHost().getStorageSandbox();

    SnapshotDeletionPolicy snapshotter = null;
    IndexCommit commit = null;/*from  w w  w. ja  v a  2 s  .  c  o m*/
    long backupStartTime = System.currentTimeMillis();
    try {
        // Create a snapshot so the index files won't be deleted.
        writer.commit();
        snapshotter = (SnapshotDeletionPolicy) writer.getConfig().getIndexDeletionPolicy();
        commit = snapshotter.snapshot();

        if (isZipBackup) {
            Path tempDir = null;
            try {
                List<URI> fileList = new ArrayList<>();
                if (isInMemoryIndex) {
                    tempDir = Files.createTempDirectory("lucene-in-memory-backup");
                    copyInMemoryLuceneIndexToDirectory(commit, tempDir);
                    List<URI> files = Files.list(tempDir).map(Path::toUri).collect(toList());
                    fileList.addAll(files);
                } else {

                    Path indexDirectoryPath = Paths.get(storageSandbox).resolve(indexInfo.indexDirectory);
                    List<URI> files = commit.getFileNames().stream().map(indexDirectoryPath::resolve)
                            .map(Path::toUri).collect(toList());
                    fileList.addAll(files);
                }

                // Add files in the commit to a zip file.
                FileUtils.zipFiles(fileList, destinationPath.toFile());
            } finally {
                if (tempDir != null) {
                    FileUtils.deleteFiles(tempDir.toFile());
                }
            }
        } else {
            // incremental backup

            // create destination dir if not exist
            if (!Files.exists(destinationPath)) {
                Files.createDirectory(destinationPath);
            }

            Set<String> sourceFileNames = new HashSet<>(commit.getFileNames());

            Set<String> destFileNames = Files.list(destinationPath).filter(Files::isRegularFile)
                    .map(path -> path.getFileName().toString()).collect(toSet());

            Path tempDir = null;
            try {
                Path indexDirectoryPath;
                if (isInMemoryIndex) {
                    // copy files into temp directory and point index directory path to temp dir
                    tempDir = Files.createTempDirectory("lucene-in-memory-backup");
                    copyInMemoryLuceneIndexToDirectory(commit, tempDir);
                    indexDirectoryPath = tempDir;
                } else {
                    indexDirectoryPath = Paths.get(storageSandbox).resolve(indexInfo.indexDirectory);
                }

                // add files exist in source but not in dest
                Set<String> toAdd = new HashSet<>(sourceFileNames);
                toAdd.removeAll(destFileNames);
                for (String filename : toAdd) {
                    Path source = indexDirectoryPath.resolve(filename);
                    Path target = destinationPath.resolve(filename);
                    Files.copy(source, target);
                }

                // delete files exist in dest but not in source
                Set<String> toDelete = new HashSet<>(destFileNames);
                toDelete.removeAll(sourceFileNames);
                for (String filename : toDelete) {
                    Path path = destinationPath.resolve(filename);
                    Files.delete(path);
                }

                long backupEndTime = System.currentTimeMillis();
                logInfo("Incremental backup performed. dir=%s, added=%d, deleted=%d, took=%dms",
                        destinationPath, toAdd.size(), toDelete.size(), backupEndTime - backupStartTime);
            } finally {
                if (tempDir != null) {
                    FileUtils.deleteFiles(tempDir.toFile());
                }
            }
        }
    } finally {
        if (snapshotter != null && commit != null) {
            snapshotter.release(commit);
        }
        writer.deleteUnusedFiles();
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void handleBackup(Operation op, BackupRequest req) throws Throwable {
    SnapshotDeletionPolicy snapshotter = null;
    IndexCommit commit = null;/*from  w w w  . j a  v  a 2s  .c  om*/
    handleMaintenanceImpl(true);
    IndexWriter w = this.writer;
    if (w == null) {
        op.fail(new CancellationException());
        return;
    }
    try {
        // Create a snapshot so the index files won't be deleted.
        snapshotter = (SnapshotDeletionPolicy) w.getConfig().getIndexDeletionPolicy();
        commit = snapshotter.snapshot();

        String indexDirectory = UriUtils.buildUriPath(getHost().getStorageSandbox().getPath(),
                this.indexDirectory);

        // Add the files in the commit to a zip file.
        List<URI> fileList = FileUtils.filesToUris(indexDirectory, commit.getFileNames());
        req.backupFile = FileUtils.zipFiles(fileList, this.indexDirectory + "-" + Utils.getNowMicrosUtc());

        op.setBody(req).complete();
    } catch (Exception e) {
        this.logSevere(e);
        throw e;
    } finally {
        if (snapshotter != null) {
            snapshotter.release(commit);
        }
        w.deleteUnusedFiles();
    }
}

From source file:com.work.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from  w  ww. j  a  v  a2 s .co  m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.

        // doc.add(new LongPoint("modified", lastModified));
        doc.add(new StringField("modified", lastModified + "", Field.Store.YES));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}