Example usage for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig()

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:FileIndexer.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from   w ww .  j  a  v a 2 s. c om

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:antnlp.opie.indexsearch.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStreamReader iReader = new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8);
    BufferedReader bufReader = new BufferedReader(iReader);

    String docLine = null;/*from w w  w  .  ja va  2  s  .  co  m*/
    while ((docLine = bufReader.readLine()) != null) {
        docLine = docLine.trim();
        if (docLine.length() == 0)
            continue;
        String[] column = docLine.split("\\t");
        System.out.println(column[0]);
        System.out.println(column[1]);

        // make a new, empty document
        Document doc = new Document();

        // Add the id of the file as a field named "id".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field docidField = new StringField("docid", column[0], Field.Store.YES);
        doc.add(docidField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents", column[1], Field.Store.YES));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + column[0]);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + column[0]);
            writer.updateDocument(new Term("docid", column[0]), doc);
        }
    }
    iReader.close();
    bufReader.close();
}

From source file:aos.lucene.admin.SearcherManager.java

License:Apache License

public SearcherManager(IndexWriter writer) throws IOException {

    this.writer = writer;
    IndexReader reader = DirectoryReader.open(writer.getDirectory());
    currentSearcher = new IndexSearcher(reader);
    warm(currentSearcher);//  w  w  w  . ja va2 s.  com

    writer.getConfig().setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() {
        public void warm(AtomicReader reader) throws IOException {
            SearcherManager.this.warm(new IndexSearcher(reader));
        }
    });
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Indexes a single document and writes it to the given index writer
 * @param writer - the index writer to writer
 * @param metadata - the document/*from ww  w  .  j a va  2  s  .  co  m*/
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException {
    Path file = Paths.get(metadata.getFilename());
    try {
        Document doc = new Document();

        Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add Document metadata //
        doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES));
        // End of Document Metadata //

        Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(),
                Field.Store.YES);
        doc.add(modified);

        PDFTextExtractor extractor = new PDFTextExtractor();
        // Get the string contents
        String textContents = extractor.extractText(file.toString());

        // Store the string contents
        FieldType contentsType = new FieldType();
        contentsType.setStored(true);
        contentsType.setTokenized(true);
        contentsType.setStoreTermVectors(true);
        contentsType.setStoreTermVectorPositions(true);
        contentsType.setStoreTermVectorPayloads(true);
        contentsType.setStoreTermVectorOffsets(true);
        contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType);
        doc.add(contents);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            log.info("adding " + file + " to index");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            log.info("updating " + file + " in index");
            writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc);
        }
    } catch (IOException e) {
        log.error("Failed to read file " + metadata.getFilename());
    }

}

From source file:Application.mediaIndexer.java

/**
 * Indexes a single document// w w  w . ja v a2s. c  om
 * 
 * @throws TikaException
 * @throws SAXException
 */
public static void indexDoc(IndexWriter writer, Path file, TextArea results, long lastModified)
        throws IOException, SAXException, TikaException {
    AutoDetectParser parser = new AutoDetectParser();
    BodyContentHandler handler = new BodyContentHandler();
    Metadata metadata = new Metadata();
    try (InputStream stream = Files.newInputStream(file)) {
        parser.parse(stream, handler, metadata);
        Document doc = new Document();
        String[] metadataNames = metadata.names();
        for (String name : metadataNames)
            doc.add(new TextField(name, metadata.get(name), Field.Store.YES));
        doc.add(new StringField("path", file.toString(), Field.Store.YES));
        doc.add(new LongPoint("modified", lastModified));
        results.appendText("Title: " + metadata.get("title") + "\n");
        results.appendText("Artists: " + metadata.get("xmpDM:artist") + "\n");
        results.appendText("Genre: " + metadata.get("xmpDM:genre") + "\n");
        results.appendText("Year: " + metadata.get("xmpDM:releaseDate") + "\n");
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            results.appendText("adding " + file + "\n");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed):
            results.appendText("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java

License:Apache License

/**
 * Creates the synonym index//w w  w  .ja  v  a 2  s .c  om
 *
 * @throws IOException
 */
private void indexSKOSModel() throws IOException {
    IndexWriterConfig cfg = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(indexDir, cfg);
    writer.getConfig().setRAMBufferSizeMB(48);
    /* iterate SKOS concepts, create Lucene docs and add them to the index */
    ResIterator concept_iter = skosModel.listResourcesWithProperty(RDF.type, SKOS.Concept);
    while (concept_iter.hasNext()) {
        Resource skos_concept = concept_iter.next();
        Document concept_doc = createDocumentsFromConcept(skos_concept);
        writer.addDocument(concept_doc);
    }
    writer.close();
}

From source file:at.ac.univie.mminf.luceneSKOS.skos.impl.SKOSEngineImpl.java

License:Apache License

/**
 * Creates the synonym index//from w  w w. ja v a 2  s . c  om
 * 
 * @throws IOException
 */
private void indexSKOSModel() throws IOException {
    IndexWriterConfig cfg = new IndexWriterConfig(matchVersion, analyzer);
    IndexWriter writer = new IndexWriter(indexDir, cfg);
    writer.getConfig().setRAMBufferSizeMB(48);

    /* iterate SKOS concepts, create Lucene docs and add them to the index */
    ResIterator concept_iter = skosModel.listResourcesWithProperty(RDF.type, SKOS.Concept);
    while (concept_iter.hasNext()) {
        Resource skos_concept = concept_iter.next();

        Document concept_doc = createDocumentsFromConcept(skos_concept);

        // System.out.println("Adding document to index " + concept_doc);

        writer.addDocument(concept_doc);
    }

    writer.close();
}

From source file:back.Indexer.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * // w ww  .jav a2  s .com
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:br.andrew.lucene.testing.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from w  w  w .  j a  v a  2  s  . c o  m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(final IndexWriter writer, final File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            final String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    IndexFiles.indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (final FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                final Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                final Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:buscador.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./*  w w  w . j  ava  2  s .c  o m*/
 *
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be
 * stored
 * @param file The file to index, or the directory to recurse into to find
 * files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.YES));
                insertarenIndice(file, "dc:creator", "creator", doc, "text");
                insertarenIndice(file, "dc:title", "title", doc, "text");
                insertarenIndice(file, "dc:description", "description", doc, "text");
                insertarenIndice(file, "dc:identifier", "identifier", doc, "text");
                insertarenIndice(file, "dc:date", "date", doc, "text");

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}