Example usage for org.apache.lucene.index IndexWriter getConfig

List of usage examples for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig() 

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:com.icdd.lucene.CreateIndex.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    // filter non-xml files
    if (filter.accept(file.toFile())) {

        System.out.println("num: " + num);
        num++;//w w  w.j  a va  2  s .  c o  m
        if (num < endset && num >= offset) {

            try (InputStream stream = Files.newInputStream(file)) {
                // make a new,empty document
                Document doc = new Document();

                Field pathField = new StringField("path", file.toString(), Field.Store.YES);
                String filename = file.getFileName().toString();
                int post = filename.indexOf('_');
                if (post > 0) {
                    filename = filename.substring(post + 1, filename.length() - 4);
                }

                doc.add(pathField);
                doc.add(new StringField("title", filename, Field.Store.YES));
                doc.add(new SortedNumericDocValuesField("modified", lastModified));
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document
                    // can
                    // be there):
                    logger.info("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been
                    // indexed) so
                    // path, if present:
                    logger.info("updating " + file);
                    writer.updateDocument(new Term("path", file.toString()), doc);
                }
            }
        }
    }
}

From source file:com.javapr.plaintextindex.search.Index.java

License:Apache License

public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException {

    // nur lesbare Dateien verwenden
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();

            if (files != null) {
                for (int i = 0; i < files.length; i++) {

                    indexDocs(writer, new File(file, files[i]));

                }/*from  w  w w  .j  a v  a 2 s . c  o m*/
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {

                return;
            }

            try {

                //Word Dokumente mit Tika parsen
                ContentHandler contenthandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
                Parser parser = new AutoDetectParser();
                parser.parse(fis, contenthandler, metadata, new ParseContext());

                // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern
                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                Field filename = new StringField("filename", file.getName(), Field.Store.YES);
                doc.add(filename);

                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //neuer Index, wenn neues Dokument
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    long size = file.length() / 1024;
                    list.add(file + ", " + size + "kb");
                    //Index updaten, wenn lteres Index-Dokument schon vorhanden
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.lin.studytest.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try {//from w ww . j  ava2  s  . c om
        InputStream stream = Files.newInputStream(file);
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    } finally {

    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from w w w.  j av a2  s  .  c om*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.mathworks.xzheng.admin.SearcherManager.java

License:Apache License

public SearcherManager(IndexWriter writer) throws IOException { //2
    this.writer = writer;
    currentSearcher = new IndexSearcher(DirectoryReader.open(writer.getDirectory())); //C
    warm(currentSearcher);//  w w w .j  av a 2 s  .  c o m

    writer.getConfig().setMergedSegmentWarmer( // 3
            new IndexWriter.IndexReaderWarmer() { // 3
                public void warm(AtomicReader reader) throws IOException { // 3
                    SearcherManager.this.warm(new IndexSearcher(reader)); // 3
                } // 3
            }); // 3
}

From source file:com.mycompany.lucenedemo.IndexFiles.java

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from ww  w  . jav  a2 s .  com

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.mycompany.restlet.search.sample.indexer.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStream stream = Files.newInputStream(file);
    // make a new, empty document
    Document doc = new Document();

    // Add the path of the file as a field named "path".  Use a
    // field that is indexed (i.e. searchable), but don't tokenize 
    // the field into separate words and don't index term frequency
    // or positional information:
    Field pathField = new StringField("path", file.toString(), Field.Store.YES);
    doc.add(pathField);//  ww  w.j  a v  a 2 s  .com

    // Add the last modified date of the file a field named "modified".
    // Use a LongField that is indexed (i.e. efficiently filterable with
    // NumericRangeFilter).  This indexes to milli-second resolution, which
    // is often too fine.  You could instead create a number based on
    // year/month/day/hour/minutes/seconds, down the resolution you require.
    // For example the long value 2011021714 would mean
    // February 17, 2011, 2-3 PM.
    doc.add(new LongField("modified", lastModified, Field.Store.NO));

    // Add the contents of the file to a field named "contents".  Specify a Reader,
    // so that the text of the file is tokenized and indexed, but not stored.
    // Note that FileReader expects the file to be in UTF-8 encoding.
    // If that's not the case searching for special characters will fail.
    doc.add(new TextField("contents",
            new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        System.out.println("adding " + file);
        writer.addDocument(doc);
    } else {
        // Existing index (an old copy of this document may have been indexed) so 
        // we use updateDocument instead to replace the old one matching the exact 
        // path, if present:
        System.out.println("updating " + file);
        writer.updateDocument(new Term("path", file.toString()), doc);
    }
}

From source file:com.nearinfinity.blur.manager.writer.nrt.SearcherManager.java

License:Apache License

/**
 * Creates and returns a new SearcherManager from the given
 * {@link IndexWriter}.//from  ww w .j a v a2  s .  c  o m
 * 
 * @param writer
 *          the IndexWriter to open the IndexReader from.
 * @param applyAllDeletes
 *          If <code>true</code>, all buffered deletes will be applied (made
 *          visible) in the {@link IndexSearcher} / {@link IndexReader}. If
 *          <code>false</code>, the deletes may or may not be applied, but
 *          remain buffered (in IndexWriter) so that they will be applied in
 *          the future. Applying deletes can be costly, so if your app can
 *          tolerate deleted documents being returned you might gain some
 *          performance by passing <code>false</code>. See
 *          {@link IndexReader#openIfChanged(IndexReader, IndexWriter, boolean)}
 *          .
 * @param warmer
 *          An optional {@link SearcherWarmer}. Pass <code>null</code> if you
 *          don't require the searcher to warmed before going live. If this is
 *          <code>non-null</code> then a merged segment warmer is installed on
 *          the provided IndexWriter's config.
 * @param es
 *          An optional {@link ExecutorService} so different segments can be
 *          searched concurrently (see
 *          {@link IndexSearcher#IndexSearcher(IndexReader,ExecutorService)}.
 *          Pass <code>null</code> to search segments sequentially.
 * 
 * @throws IOException
 */
public SearcherManager(IndexWriter writer, boolean applyAllDeletes, final SearcherWarmer warmer,
        final ExecutorService es) throws IOException {
    this.es = es;
    this.warmer = warmer;
    currentSearcher = new IndexSearcher(IndexReader.open(writer, applyAllDeletes));
    if (warmer != null) {
        writer.getConfig().setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
            @Override
            public void warm(IndexReader reader) throws IOException {
                warmer.warm(new IndexSearcher(reader, es));
            }
        });
    }
}

From source file:com.nero.model.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from   ww  w .ja va  2s .c o m
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.paladin.common.LuceneHelper.java

License:Apache License

/**
 * ?/*from  ww w .j av a 2s  .co m*/
 * //TODO:???
 *
 * @param writer
 * @param table
 */
private static void indexTable(IndexWriter writer, String table) throws IOException {
    String sql = "SELECT ID, TITLE, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase();

    if (table.equalsIgnoreCase("motto"))
        sql = "SELECT ID, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase();

    List<Map<String, Object>> blogs = QueryHelper.queryList(sql);

    for (Map<String, Object> blog : blogs) {
        Document doc = new Document();
        Field id_field = new Field("id", blog.get("ID").toString(), Field.Store.YES,
                Field.Index.NOT_ANALYZED_NO_NORMS);
        // ?
        StringBuilder builder = new StringBuilder();

        if (table.equalsIgnoreCase("motto"))
            builder.append(blog.get("CONTENT"));
        else
            builder.append(blog.get("TITLE"));
        builder.append(Constants.LUCENE_FIELD_SEP);
        builder.append(blog.get("CONTENT"));
        builder.append(Constants.LUCENE_FIELD_SEP);
        builder.append(blog.get("TAG"));

        Field t_c_t_field = new Field("title_content_tag", builder.toString(), Field.Store.YES,
                Field.Index.ANALYZED);

        doc.add(id_field);
        doc.add(t_c_t_field);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE)
            writer.addDocument(doc);
        else// id??
            writer.updateDocument(new Term("id", blog.get("ID").toString()), doc);
    }
}