Example usage for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig()

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:com.icdd.lucene.CreateIndex.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    // filter non-xml files
    if (filter.accept(file.toFile())) {

        System.out.println("num: " + num);
        num++;//w w  w.j  a va  2  s .  c o  m
        if (num < endset && num >= offset) {

            try (InputStream stream = Files.newInputStream(file)) {
                // make a new,empty document
                Document doc = new Document();

                Field pathField = new StringField("path", file.toString(), Field.Store.YES);
                String filename = file.getFileName().toString();
                int post = filename.indexOf('_');
                if (post > 0) {
                    filename = filename.substring(post + 1, filename.length() - 4);
                }

                doc.add(pathField);
                doc.add(new StringField("title", filename, Field.Store.YES));
                doc.add(new SortedNumericDocValuesField("modified", lastModified));
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document
                    // can
                    // be there):
                    logger.info("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been
                    // indexed) so
                    // path, if present:
                    logger.info("updating " + file);
                    writer.updateDocument(new Term("path", file.toString()), doc);
                }
            }
        }
    }
}

From source file:com.javapr.plaintextindex.search.Index.java

License:Apache License

public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException {

    // nur lesbare Dateien verwenden
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();

            if (files != null) {
                for (int i = 0; i < files.length; i++) {

                    indexDocs(writer, new File(file, files[i]));

                }/*from  w  w w  .j  a v  a 2 s . c  o m*/
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {

                return;
            }

            try {

                //Word Dokumente mit Tika parsen
                ContentHandler contenthandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
                Parser parser = new AutoDetectParser();
                parser.parse(fis, contenthandler, metadata, new ParseContext());

                // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern
                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                Field filename = new StringField("filename", file.getName(), Field.Store.YES);
                doc.add(filename);

                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //neuer Index, wenn neues Dokument
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    long size = file.length() / 1024;
                    list.add(file + ", " + size + "kb");
                    //Index updaten, wenn lteres Index-Dokument schon vorhanden
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.lin.studytest.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try {//from w ww . j  ava2  s  . c om
        InputStream stream = Files.newInputStream(file);
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    } finally {

    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from w w w.  j av a2  s  .  c om*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.mathworks.xzheng.admin.SearcherManager.java

License:Apache License

public SearcherManager(IndexWriter writer) throws IOException { //2
    this.writer = writer;
    currentSearcher = new IndexSearcher(DirectoryReader.open(writer.getDirectory())); //C
    warm(currentSearcher);//  w w w .j  av a 2 s  .  c o m

    writer.getConfig().setMergedSegmentWarmer( // 3
            new IndexWriter.IndexReaderWarmer() { // 3
                public void warm(AtomicReader reader) throws IOException { // 3
                    SearcherManager.this.warm(new IndexSearcher(reader)); // 3
                } // 3
            }); // 3
}

From source file:com.mycompany.lucenedemo.IndexFiles.java

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from ww  w  . jav  a2 s .  com

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.mycompany.restlet.search.sample.indexer.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStream stream = Files.newInputStream(file);
    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field pathField = new StringField("path", file.toString(), Field.Store.YES);
    doc.add(pathField);//  ww  w.j  a v  a 2 s  .com

    // Add the last modified date of the file a field named "modified".
    // Use a LongField that is indexed (i.e. efficiently filterable with
    // NumericRangeFilter).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongField("modified", lastModified, Field.Store.NO));

    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new TextField("contents",
            new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        System.out.println("adding " + file);
        writer.addDocument(doc);
    } else {
        // Existing index (an old copy of this document may have been indexed) so 
        // we use updateDocument instead to replace the old one matching the exact 
        // path, if present:
        System.out.println("updating " + file);
        writer.updateDocument(new Term("path", file.toString()), doc);
    }
}

From source file:com.nearinfinity.blur.manager.writer.nrt.SearcherManager.java

License:Apache License

/**
 * Creates and returns a new SearcherManager from the given
 * {@link IndexWriter}.//from  ww w .j a v a2  s .  c  o m
 * 
 * @param writer
 *          the IndexWriter to open the IndexReader from.
 * @param applyAllDeletes
 *          If <code>true</code>, all buffered deletes will be applied (made
 *          visible) in the {@link IndexSearcher} / {@link IndexReader}. If
 *          <code>false</code>, the deletes may or may not be applied, but
 *          remain buffered (in IndexWriter) so that they will be applied in
 *          the future. Applying deletes can be costly, so if your app can
 *          tolerate deleted documents being returned you might gain some
 *          performance by passing <code>false</code>. See
 *          {@link IndexReader#openIfChanged(IndexReader, IndexWriter, boolean)}
 *          .
 * @param warmer
 *          An optional {@link SearcherWarmer}. Pass <code>null</code> if you
 *          don't require the searcher to warmed before going live. If this is
 *          <code>non-null</code> then a merged segment warmer is installed on
 *          the provided IndexWriter's config.
 * @param es
 *          An optional {@link ExecutorService} so different segments can be
 *          searched concurrently (see
 *          {@link IndexSearcher#IndexSearcher(IndexReader,ExecutorService)}.
 *          Pass <code>null</code> to search segments sequentially.
 * 
 * @throws IOException
 */
public SearcherManager(IndexWriter writer, boolean applyAllDeletes, final SearcherWarmer warmer,
        final ExecutorService es) throws IOException {
    this.es = es;
    this.warmer = warmer;
    currentSearcher = new IndexSearcher(IndexReader.open(writer, applyAllDeletes));
    if (warmer != null) {
        writer.getConfig().setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
            @Override
            public void warm(IndexReader reader) throws IOException {
                warmer.warm(new IndexSearcher(reader, es));
            }
        });
    }
}

From source file:com.nero.model.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from   ww  w .ja va  2s .c o m
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.paladin.common.LuceneHelper.java

License:Apache License

/**
 * ?/*from  ww w .j av a 2s  .co m*/
 * //TODO:???
 *
 * @param writer
 * @param table
 */
private static void indexTable(IndexWriter writer, String table) throws IOException {
    String sql = "SELECT ID, TITLE, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase();

    if (table.equalsIgnoreCase("motto"))
        sql = "SELECT ID, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase();

    List<Map<String, Object>> blogs = QueryHelper.queryList(sql);

    for (Map<String, Object> blog : blogs) {
        Document doc = new Document();
        Field id_field = new Field("id", blog.get("ID").toString(), Field.Store.YES,
                Field.Index.NOT_ANALYZED_NO_NORMS);
        // ?
        StringBuilder builder = new StringBuilder();

        if (table.equalsIgnoreCase("motto"))
            builder.append(blog.get("CONTENT"));
        else
            builder.append(blog.get("TITLE"));
        builder.append(Constants.LUCENE_FIELD_SEP);
        builder.append(blog.get("CONTENT"));
        builder.append(Constants.LUCENE_FIELD_SEP);
        builder.append(blog.get("TAG"));

        Field t_c_t_field = new Field("title_content_tag", builder.toString(), Field.Store.YES,
                Field.Index.ANALYZED);

        doc.add(id_field);
        doc.add(t_c_t_field);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE)
            writer.addDocument(doc);
        else// id??
            writer.updateDocument(new Term("id", blog.get("ID").toString()), doc);
    }
}