Example usage for org.apache.lucene.index IndexWriter getConfig

List of usage examples for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig() 

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:net.riezebos.thoth.content.search.Indexer.java

License:Apache License

protected void addToIndex(IndexWriter writer, String resourcePath, String resourceType, String title,
        String contents, Map<String, String> metaTags) throws IOException {
    String extension = ThothUtil.getExtension(resourcePath);
    if (extension == null)
        extension = "";
    extension = extension.toLowerCase();

    Document document = new Document();
    document.add(new StringField(INDEX_PATH, resourcePath, Field.Store.YES));
    document.add(new TextField(INDEX_TYPE, resourceType, Store.YES));
    document.add(new TextField(INDEX_TITLE, title, Store.YES));
    document.add(new TextField(INDEX_CONTENTS, contents, Store.NO));
    document.add(new TextField(INDEX_USED, "true", Store.NO));
    document.add(new TextField(INDEX_EXTENSION, extension.toLowerCase(), Store.NO));

    metaTags.entrySet().stream().forEach(entry -> document
            .add(new TextField(entry.getKey().toLowerCase(), String.valueOf(entry.getValue()), Store.NO)));

    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
        // New index, so we just add the document (no old document can be there):
        LOG.debug("Indexer for context " + contentManager.getContextName() + " added " + resourcePath);
        writer.addDocument(document);/*from  ww  w .  j  a  v  a2  s.  c o m*/
    } else {
        // Existing index (an old copy of this document may have been indexed) so
        // we use updateDocument instead to replace the old one matching the exact
        // path, if present:
        LOG.debug("Indexer for context " + contentManager.getContextName() + " updated " + resourcePath);
        writer.updateDocument(new Term(INDEX_PATH, resourcePath), document);
    }
}

From source file:net.riezebos.thoth.content.search.util.TestIndexer.java

License:Apache License

@Override
protected IndexWriter getWriter(boolean wipeIndex) throws IOException {
    LiveIndexWriterConfig config = mock(LiveIndexWriterConfig.class);
    when(config.getOpenMode()).thenReturn(wipeIndex ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);

    IndexWriter indexWriter = mock(IndexWriter.class);
    when(indexWriter.getConfig()).thenReturn(config);
    recordAddDocument(indexWriter);//  w ww .ja v a2 s.  co  m
    recordUpdateDocument(indexWriter);

    return indexWriter;
}

From source file:nl.knaw.huygens.timbuctoo.lucene.demoTwo.IndexFiles.java

License:Apache License

private static void indexFields(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*w w w . ja  v  a  2s.co m*/
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents". Specify
        // a Reader,
        // so that the text of the file is tokenized and indexed, but not
        // stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will
        // fail.
        BufferedReader br = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));
        String line = null;
        String[] splitLine;
        String content = "";
        boolean inContent = false;
        while ((line = br.readLine()) != null) {
            if (inContent && !line.contains(":")) {
                content += " " + line;
            }
            FieldType ft = new FieldType();
            ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
            ft.setStored(true);
            if (!line.isEmpty() && line.contains(":")) {
                //               System.out.println("line: " + line);
                splitLine = line.split(":");
                if (splitLine[0].equals("content")) {
                    inContent = true;
                    if (splitLine.length > 1) {
                        content += splitLine[1];
                    }
                } else if (splitLine[0].equals("end_content")) {
                    inContent = false;
                    //                  System.out.println("content - " + content.trim());
                    doc.add(new Field("content", content.trim(), ft));
                } else {
                    doc.add(new Field(splitLine[0].trim(), splitLine[1].trim(), ft));
                    System.out.println(splitLine[0].trim() + " - " + splitLine[1].trim());
                }
            }
        }
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // we use updateDocument instead to replace the old one matching
            // the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:oldClasses.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//from   w w  w  . j  a  v  a 2 s  .c o  m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use
                // a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the
                // resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named
                // "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and
                // indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special
                // characters
                // will fail.

                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may
                    // have
                    // been indexed) so
                    // we use updateDocument instead to replace the old
                    // one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.abondar.experimental.eventsearch.SearchData.java

public void indexDoc(IndexWriter iw, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {

        Document doc = new Document();
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*w w w  .  ja v a  2 s  .c  o m*/
        ObjectMapper mapper = new ObjectMapper();
        Event eb = mapper.readValue(new File(file.toString()), Event.class);
        doc.add(new TextField("category", eb.getCategory(), Field.Store.YES));

        if (iw.getConfig().getOpenMode() == OpenMode.CREATE) {
            iw.addDocument(doc);
            for (IndexableField ifd : doc.getFields()) {
                System.out.println(ifd.stringValue() + "  " + ifd.name());
            }
            System.out.println("adding " + file);

        } else {

            iw.updateDocument(new Term("path", file.toString()), doc);
            System.out.println("updating " + file);
        }

    }
}

From source file:org.apache.pdfbox.examples.lucene.IndexPDFFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given, recurses over files and directories
 * found under the given directory./*from www .ja  v  a 2 s  .  c om*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For good throughput, put multiple documents
 * into your input file(s). An example of this is in the benchmark module, which can create "line doc" files, one
 * document per line, using the <a
 * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (String fileName : files) {
                    indexDocs(writer, new File(file, fileName));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                String path = file.getName().toUpperCase();
                Document doc = null;
                if (path.toLowerCase().endsWith(".pdf")) {
                    System.out.println("Indexing PDF document: " + file);
                    doc = LucenePDFDocument.getDocument(file);
                } else {
                    System.out.println("Skipping " + file);
                    return;
                }

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("uid", LucenePDFDocument.createUID(file)), doc);
                }
            } finally {
                fis.close();
            }
        }
    }
}

From source file:org.apache.solr.core.TestPropInjectDefaults.java

License:Apache License

@Test
public void testMergePolicyDefaults() throws Exception {
    ExposeWriterHandler uh = new ExposeWriterHandler();
    IndexWriter writer = uh.getWriter();
    LogByteSizeMergePolicy mp = (LogByteSizeMergePolicy) writer.getConfig().getMergePolicy();
    assertEquals(32.0, mp.getMaxMergeMB(), 0);
    uh.close();/*from w w w .j  a  v  a2s  .c o m*/
}

From source file:org.apache.solr.core.TestPropInjectDefaults.java

License:Apache License

@Test
public void testPropsDefaults() throws Exception {
    ExposeWriterHandler uh = new ExposeWriterHandler();
    IndexWriter writer = uh.getWriter();
    ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getConfig().getMergeScheduler();
    assertEquals(4, cms.getMaxThreadCount());
    uh.close();/*from  w ww  .  jav  a  2s .c  o m*/
}

From source file:org.apache.solr.core.TestSimpleTextCodec.java

License:Apache License

public void test() throws Exception {
    SolrConfig config = h.getCore().getSolrConfig();
    String codecFactory = config.get("codecFactory/@class");
    assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory);

    assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName());

    RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
    try {/*from  w w w . j a  v a  2  s  .  com*/
        IndexWriter writer = writerRef.get();
        assertEquals("Unexpected codec in IndexWriter config", "SimpleText",
                writer.getConfig().getCodec().getName());
    } finally {
        writerRef.decref();
    }

    assertU(add(doc("id", "1", "text", "textual content goes here")));
    assertU(commit());

    RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher();
    try {
        SolrIndexSearcher searcher = searcherRef.get();
        SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
        SegmentInfo info = infos.info(infos.size() - 1).info;
        assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName());
    } finally {
        searcherRef.decref();
    }

    assertQ(req("q", "id:1"), "*[count(//doc)=1]");
}

From source file:org.apache.solr.handler.admin.SegmentsInfoRequestHandler.java

License:Apache License

private List<String> getMergeCandidatesNames(SolrQueryRequest req, SegmentInfos infos) throws IOException {
    List<String> result = new ArrayList<String>();
    IndexWriter indexWriter = getIndexWriter(req);
    //get chosen merge policy
    MergePolicy mp = indexWriter.getConfig().getMergePolicy();
    //Find merges
    MergeSpecification findMerges = mp.findMerges(MergeTrigger.EXPLICIT, infos, indexWriter);
    if (findMerges != null && findMerges.merges != null && findMerges.merges.size() > 0) {
        for (OneMerge merge : findMerges.merges) {
            //TODO: add merge grouping
            for (SegmentCommitInfo mergeSegmentInfo : merge.segments) {
                result.add(mergeSegmentInfo.info.name);
            }//from   www .j a  va 2s .  c om
        }
    }

    return result;
}