Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.study.lucene.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from w  ww .j  av a 2 s.c  o m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setInfoStream(System.out);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.tekstosense.stemmer.index.Indexer.java

License:Open Source License

/**
 * Indexer.//  w  w  w . ja va2 s . co m
 *
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 */
private static void indexer() throws IOException {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    Path indexDirectoryPath = new File(INDEX_PATH).toPath();
    FSDirectory indexDirectory = new SimpleFSDirectory(indexDirectoryPath);
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);

    IndexWriter w = new IndexWriter(indexDirectory, conf);
    addDoc(w, "Lucene in Action", "193398817");
    addDoc(w, "Lucene for Dummies", "55320055Z");
    addDoc(w, "Managing Gigabytes", "55063554A");
    addDoc(w, "The Art of Computer Science", "9900333X");
    w.close();
}

From source file:com.test.LuceneDemo.java

License:Apache License

@Test
public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException {
    Analyzer analyzer = new StandardAnalyzer();

    // Store the index in memory:
    Directory directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.open("/tmp/testindex");
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter iwriter = new IndexWriter(directory, config);
    Document doc = new Document();
    String text = "This is the text to be indexed.";
    doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
    iwriter.addDocument(doc);//from ww w  .  j  a  v a  2 s. c  om
    iwriter.close();

    // Now search the index:
    DirectoryReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    // Parse a simple query that searches for "text":
    QueryParser parser = new QueryParser("fieldname", analyzer);
    Query query = parser.parse("indexed");
    ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    // Iterate through the results:
    for (int i = 0; i < hits.length; i++) {
        Document hitDoc = isearcher.doc(hits[i].doc);
        assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    }
    ireader.close();
    directory.close();
}

From source file:com.tripod.lucene.example.TestExampleLuceneBase.java

License:Apache License

@Before
public void setupBase() throws IOException, ParseException {
    analyzer = new StandardAnalyzer();
    directory = new RAMDirectory();

    facetsConfig = new FacetsConfig();
    facetsConfig.setIndexFieldName(ExampleField.COLOR.getName(), ExampleField.COLOR.getName());

    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    try (IndexWriter writer = new IndexWriter(directory, config)) {
        final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);

        // Doc1//  ww  w . j ava 2 s .  c o m
        Document doc1 = new Document();
        doc1.add(new Field(ExampleField.ID.getName(), "1", StringField.TYPE_STORED));
        doc1.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1")));
        doc1.add(new Field(ExampleField.TITLE.getName(), "Title 1", TextField.TYPE_STORED));
        doc1.add(new Field(ExampleField.BODY.getName(), "Body 1 Solr is cool", TextField.TYPE_STORED));
        doc1.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc1.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate1 = dateFormat.parse("2016-10-01T01:00:00Z");
        doc1.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        doc1.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        writer.addDocument(facetsConfig.build(doc1));

        // Doc2
        Document doc2 = new Document();
        doc2.add(new Field(ExampleField.ID.getName(), "2", StringField.TYPE_STORED));
        doc2.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("2")));
        doc2.add(new Field(ExampleField.TITLE.getName(), "Title 2", TextField.TYPE_STORED));
        doc2.add(new Field(ExampleField.BODY.getName(), "Body 2 Lucene is cool", TextField.TYPE_STORED));
        doc2.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED));
        doc2.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED"));

        Date createDate2 = dateFormat.parse("2016-10-01T02:00:00Z");
        doc2.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate2.getTime()));
        doc2.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate2.getTime()));
        writer.addDocument(facetsConfig.build(doc2));

        // Doc3
        Document doc3 = new Document();
        doc3.add(new Field(ExampleField.ID.getName(), "3", StringField.TYPE_STORED));
        doc3.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("3")));
        doc3.add(new Field(ExampleField.TITLE.getName(), "Title 3", TextField.TYPE_STORED));
        doc3.add(new Field(ExampleField.BODY.getName(), "Body 3 Solr is Great, Solr is Fun",
                TextField.TYPE_STORED));
        doc3.add(new Field(ExampleField.COLOR.getName(), "GREEN", StringField.TYPE_STORED));
        doc3.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "GREEN"));

        Date createDate3 = dateFormat.parse("2016-10-01T03:00:00Z");
        doc3.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate3.getTime()));
        doc3.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate3.getTime()));
        writer.addDocument(facetsConfig.build(doc3));

        // Doc4
        Document doc4 = new Document();
        doc4.add(new Field(ExampleField.ID.getName(), "4", StringField.TYPE_STORED));
        doc4.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("4")));
        doc4.add(new Field(ExampleField.TITLE.getName(), "Title 4", TextField.TYPE_STORED));
        doc4.add(new Field(ExampleField.BODY.getName(), "Body 4", TextField.TYPE_STORED));
        doc4.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc4.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate4 = dateFormat.parse("2016-10-01T04:00:00Z");
        doc4.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate4.getTime()));
        doc4.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate4.getTime()));
        writer.addDocument(facetsConfig.build(doc4));

        // Doc5
        Document doc5 = new Document();
        doc5.add(new Field(ExampleField.ID.getName(), "5", StringField.TYPE_STORED));
        doc5.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("5")));
        doc5.add(new Field(ExampleField.TITLE.getName(), "Title 5", TextField.TYPE_STORED));
        doc5.add(new Field(ExampleField.BODY.getName(), "Body 5", TextField.TYPE_STORED));
        doc5.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED));
        doc5.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED"));

        Date createDate5 = dateFormat.parse("2016-10-01T05:00:00Z");
        doc5.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate5.getTime()));
        doc5.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate5.getTime()));
        writer.addDocument(facetsConfig.build(doc5));

        // commit docs
        writer.commit();
    }

    // needs to be opened after the writer is closed otherwise it won't see the test data
    searcherManager = new SearcherManager(directory, null);
}

From source file:com.tripod.lucene.example.TestExampleSummaryQueryService.java

License:Apache License

@Test
public void testRefreshingSearcherManager()
        throws IOException, ParseException, QueryException, InterruptedException {
    // Add a new document
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    try (IndexWriter writer = new IndexWriter(directory, config)) {
        final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);

        Document doc = new Document();
        doc.add(new Field(ExampleField.ID.getName(), "99", StringField.TYPE_STORED));
        doc.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1")));
        doc.add(new Field(ExampleField.TITLE.getName(), "Title 99", TextField.TYPE_STORED));
        doc.add(new Field(ExampleField.BODY.getName(), "Body 99", TextField.TYPE_STORED));
        doc.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate1 = dateFormat.parse("2016-11-01T01:00:00Z");
        doc.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        doc.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        writer.addDocument(facetsConfig.build(doc));

        writer.commit();//from ww w  .  j  ava2s.c  o m
    }

    // Query for the new document and shouldn't get it
    LuceneQuery query = new LuceneQuery("id:99");
    QueryResults<ExampleSummary> results = queryService.search(query);

    assertNotNull(results);
    assertNotNull(results.getResults());
    assertEquals(0, results.getResults().size());

    // Start a refresher for the SearchManager
    SearcherManagerRefresher refresher = new SearcherManagerRefresher(searcherManager, 2000);
    try {
        // Start the refresher and then wait slightly longer than refresh interval
        refresher.start();
        Thread.sleep(3000);

        // Query again should get a result now
        query = new LuceneQuery("id:99");
        results = queryService.search(query);

        assertNotNull(results);
        assertNotNull(results.getResults());
        assertEquals(1, results.getResults().size());
    } finally {
        refresher.stop();
    }
}

From source file:com.twentyn.patentSearch.DocumentIndexer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();/*from   w w w.  j a v a2 s . co  m*/
    Options opts = new Options();
    opts.addOption(Option.builder("i").longOpt("input").hasArg().required()
            .desc("Input file or directory to index").build());
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required()
            .desc("Path to index file to generate").build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
    Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

    /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
     * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
     * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
     * cruft to the index. */
    Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase")
            .addTokenFilter("stop").build();

    IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writerConfig.setRAMBufferSizeMB(1 << 10);
    IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

    String inputFileOrDir = cmdLine.getOptionValue("input");
    File splitFileOrDir = new File(inputFileOrDir);
    if (!(splitFileOrDir.exists())) {
        LOGGER.error("Unable to find directory at " + inputFileOrDir);
        System.exit(1);
    }

    DocumentIndexer indexer = new DocumentIndexer(indexWriter);
    PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
    corpusReader.readPatentCorpus();
    indexer.commitAndClose();
}

From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory) throws IOException {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (this.indexOptions.contains(BlobIndexOption.CREATE)) {
        iwc.setOpenMode(OpenMode.CREATE);
    } else {//from  w  w w  .  ja  va 2 s. co m
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        totalMBs = Math.max(1, totalMBs);
        iwc.setRAMBufferSizeMB(totalMBs);
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();
    return w;
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);//from  w w  w  .  j a va  2 s.c  o m
    }

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        // give half to the index, the other half we keep for service caching context
        totalMBs = Math.max(1, totalMBs / 2);
        iwc.setRAMBufferSizeMB(totalMBs);
    }

    this.writer = new IndexWriter(dir, iwc);
    this.writer.commit();
    this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    return this.writer;
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void upgradeIndex(Directory dir) throws IOException {
    boolean doUpgrade = false;
    IndexWriterConfig iwc = new IndexWriterConfig(null);

    CheckIndex chkIndex = new CheckIndex(dir);

    try {//from   www  .java  2  s  .c o  m
        for (CheckIndex.Status.SegmentInfoStatus segmentInfo : chkIndex.checkIndex().segmentInfos) {
            if (!segmentInfo.version.equals(Version.LATEST)) {
                logInfo("Found Index version %s", segmentInfo.version.toString());
                doUpgrade = true;
                break;
            }
        }
    } finally {
        chkIndex.close();
    }

    if (doUpgrade) {
        logInfo("Upgrading index to %s", Version.LATEST.toString());
        new IndexUpgrader(dir, iwc, false).upgrade();
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        long cacheSizeMB = (totalMBs * 3) / 4;
        cacheSizeMB = Math.max(1, cacheSizeMB);
        iwc.setRAMBufferSizeMB(cacheSizeMB);
        this.linkAccessMemoryLimitMB = totalMBs / 4;
    }//from   w w  w  .  j a va 2 s. co  m

    Directory dir = MMapDirectory.open(directory.toPath());

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);
    }

    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));

    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();

    synchronized (this.searchSync) {
        this.writer = w;
        this.linkAccessTimes.clear();
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
        this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    }
    return this.writer;
}