Example usage for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig()

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:net.riezebos.thoth.content.search.Indexer.java

License:Apache License

protected void addToIndex(IndexWriter writer, String resourcePath, String resourceType, String title,
        String contents, Map<String, String> metaTags) throws IOException {
    String extension = ThothUtil.getExtension(resourcePath);
    if (extension == null)
        extension = "";
    extension = extension.toLowerCase();

    Document document = new Document();
    document.add(new StringField(INDEX_PATH, resourcePath, Field.Store.YES));
    document.add(new TextField(INDEX_TYPE, resourceType, Store.YES));
    document.add(new TextField(INDEX_TITLE, title, Store.YES));
    document.add(new TextField(INDEX_CONTENTS, contents, Store.NO));
    document.add(new TextField(INDEX_USED, "true", Store.NO));
    document.add(new TextField(INDEX_EXTENSION, extension.toLowerCase(), Store.NO));

    metaTags.entrySet().stream().forEach(entry -> document
            .add(new TextField(entry.getKey().toLowerCase(), String.valueOf(entry.getValue()), Store.NO)));

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        LOG.debug("Indexer for context " + contentManager.getContextName() + " added " + resourcePath);
        writer.addDocument(document);/*from  ww  w .  j  a  v  a2  s.  c o m*/
    } else {
        // Existing index (an old copy of this document may have been indexed) so
        // we use updateDocument instead to replace the old one matching the exact
        // path, if present:
        LOG.debug("Indexer for context " + contentManager.getContextName() + " updated " + resourcePath);
        writer.updateDocument(new Term(INDEX_PATH, resourcePath), document);
    }
}

From source file:net.riezebos.thoth.content.search.util.TestIndexer.java

License:Apache License

@Override
protected IndexWriter getWriter(boolean wipeIndex) throws IOException {
    LiveIndexWriterConfig config = mock(LiveIndexWriterConfig.class);
    when(config.getOpenMode()).thenReturn(wipeIndex ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);

    IndexWriter indexWriter = mock(IndexWriter.class);
    when(indexWriter.getConfig()).thenReturn(config);
    recordAddDocument(indexWriter);//  w ww .ja v a2 s.  co  m
    recordUpdateDocument(indexWriter);

    return indexWriter;
}

From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.IndexFiles.java

License:Apache License

private static void indexFields(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*w w w . ja  v  a  2s.co m*/
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents". Specify
        // a Reader,
        // so that the text of the file is tokenized and indexed, but not
        // stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will
        // fail.
        BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
        String line = null;
        String[] splitLine;
        String content = "";
        boolean inContent = false;
        while ((line = br.readLine()) != null) {
            if (inContent && !line.contains(":")) {
                content += " " + line;
            }
            FieldType ft = new FieldType();
            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            ft.setStored(true);
            if (!line.isEmpty() && line.contains(":")) {
                //               System.out.println("line: " + line);
                splitLine = line.split(":");
                if (splitLine[0].equals("content")) {
                    inContent = true;
                    if (splitLine.length > 1) {
                        content += splitLine[1];
                    }
                } else if (splitLine[0].equals("end_content")) {
                    inContent = false;
                    //                  System.out.println("content - " + content.trim());
                    doc.add(new Field("content", content.trim(), ft));
                } else {
                    doc.add(new Field(splitLine[0].trim(), splitLine[1].trim(), ft));
                    System.out.println(splitLine[0].trim() + " - " + splitLine[1].trim());
                }
            }
        }
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // we use updateDocument instead to replace the old one matching
            // the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:oldClasses.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//from   w w  w  . j  a  v  a 2 s  .c o  m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use
                // a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the
                // resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named
                // "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and
                // indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special
                // characters
                // will fail.

                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may
                    // have
                    // been indexed) so
                    // we use updateDocument instead to replace the old
                    // one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.abondar.experimental.eventsearch.SearchData.java

public void indexDoc(IndexWriter iw, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {

        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*w w w  .  ja v a  2 s  .c  o m*/
        ObjectMapper mapper = new ObjectMapper();
        Event eb = mapper.readValue(new File(file.toString()), Event.class);
        doc.add(new TextField("category", eb.getCategory(), Field.Store.YES));

        if (iw.getConfig().getOpenMode() == OpenMode.CREATE) {
            iw.addDocument(doc);
            for (IndexableField ifd : doc.getFields()) {
                System.out.println(ifd.stringValue() + "  " + ifd.name());
            }
            System.out.println("adding " + file);

        } else {

            iw.updateDocument(new Term("path", file.toString()), doc);
            System.out.println("updating " + file);
        }

    }
}

From source file:org.apache.pdfbox.examples.lucene.IndexPDFFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given, recurses over files and directories
 * found under the given directory./*from www .ja  v  a 2 s  .  c om*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For good throughput, put multiple documents
 * into your input file(s). An example of this is in the benchmark module, which can create "line doc" files, one
 * document per line, using the <a
 * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (String fileName : files) {
                    indexDocs(writer, new File(file, fileName));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                String path = file.getName().toUpperCase();
                Document doc = null;
                if (path.toLowerCase().endsWith(".pdf")) {
                    System.out.println("Indexing PDF document: " + file);
                    doc = LucenePDFDocument.getDocument(file);
                } else {
                    System.out.println("Skipping " + file);
                    return;
                }

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("uid", LucenePDFDocument.createUID(file)), doc);
                }
            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.apache.solr.core.TestPropInjectDefaults.java

License:Apache License

@Test
public void testMergePolicyDefaults() throws Exception {
    ExposeWriterHandler uh = new ExposeWriterHandler();
    IndexWriter writer = uh.getWriter();
    LogByteSizeMergePolicy mp = (LogByteSizeMergePolicy) writer.getConfig().getMergePolicy();
    assertEquals(32.0, mp.getMaxMergeMB(), 0);
    uh.close();/*from w w w .j  a  v  a2s  .c o m*/
}

From source file:org.apache.solr.core.TestPropInjectDefaults.java

License:Apache License

@Test
public void testPropsDefaults() throws Exception {
    ExposeWriterHandler uh = new ExposeWriterHandler();
    IndexWriter writer = uh.getWriter();
    ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler();
    assertEquals(4, cms.getMaxThreadCount());
    uh.close();/*from  w ww  .  jav  a  2s .c  o m*/
}

From source file:org.apache.solr.core.TestSimpleTextCodec.java

License:Apache License

public void test() throws Exception {
    SolrConfig config = h.getCore().getSolrConfig();
    String codecFactory = config.get("codecFactory/@class");
    assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory);

    assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName());

    RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
    try {/*from  w w w . j a  v a  2  s  .  com*/
        IndexWriter writer = writerRef.get();
        assertEquals("Unexpected codec in IndexWriter config", "SimpleText",
                writer.getConfig().getCodec().getName());
    } finally {
        writerRef.decref();
    }

    assertU(add(doc("id", "1", "text", "textual content goes here")));
    assertU(commit());

    RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher();
    try {
        SolrIndexSearcher searcher = searcherRef.get();
        SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
        SegmentInfo info = infos.info(infos.size() - 1).info;
        assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName());
    } finally {
        searcherRef.decref();
    }

    assertQ(req("q", "id:1"), "*[count(//doc)=1]");
}

From source file:org.apache.solr.handler.admin.SegmentsInfoRequestHandler.java

License:Apache License

private List<String> getMergeCandidatesNames(SolrQueryRequest req, SegmentInfos infos) throws IOException {
    List<String> result = new ArrayList<String>();
    IndexWriter indexWriter = getIndexWriter(req);
    //get chosen merge policy
    MergePolicy mp = indexWriter.getConfig().getMergePolicy();
    //Find merges
    MergeSpecification findMerges = mp.findMerges(MergeTrigger.EXPLICIT, infos, indexWriter);
    if (findMerges != null && findMerges.merges != null && findMerges.merges.size() > 0) {
        for (OneMerge merge : findMerges.merges) {
            //TODO: add merge grouping
            for (SegmentCommitInfo mergeSegmentInfo : merge.segments) {
                result.add(mergeSegmentInfo.info.name);
            }//from   www .j a  va 2s .  c om
        }
    }

    return result;
}