Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer) 

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.study.lucene.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from w  ww .j  av a 2 s.c  o m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setInfoStream(System.out);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.tekstosense.stemmer.index.Indexer.java

License:Open Source License

/**
 * Indexer.//  w  w  w . ja va2 s . co m
 *
 * @throws IOException
 *             Signals that an I/O exception has occurred.
 */
private static void indexer() throws IOException {
    StandardAnalyzer analyzer = new StandardAnalyzer();
    Path indexDirectoryPath = new File(INDEX_PATH).toPath();
    FSDirectory indexDirectory = new SimpleFSDirectory(indexDirectoryPath);
    IndexWriterConfig conf = new IndexWriterConfig(analyzer);

    IndexWriter w = new IndexWriter(indexDirectory, conf);
    addDoc(w, "Lucene in Action", "193398817");
    addDoc(w, "Lucene for Dummies", "55320055Z");
    addDoc(w, "Managing Gigabytes", "55063554A");
    addDoc(w, "The Art of Computer Science", "9900333X");
    w.close();
}

From source file:com.test.LuceneDemo.java

License:Apache License

@Test
public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException {
    Analyzer analyzer = new StandardAnalyzer();

    // Store the index in memory:
    Directory directory = new RAMDirectory();
    // To store an index on disk, use this instead:
    //Directory directory = FSDirectory.open("/tmp/testindex");
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter iwriter = new IndexWriter(directory, config);
    Document doc = new Document();
    String text = "This is the text to be indexed.";
    doc.add(new Field("fieldname", text, TextField.TYPE_STORED));
    iwriter.addDocument(doc);//from ww w  .  j  a  v a  2 s. c  om
    iwriter.close();

    // Now search the index:
    DirectoryReader ireader = DirectoryReader.open(directory);
    IndexSearcher isearcher = new IndexSearcher(ireader);
    // Parse a simple query that searches for "text":
    QueryParser parser = new QueryParser("fieldname", analyzer);
    Query query = parser.parse("indexed");
    ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
    assertEquals(1, hits.length);
    // Iterate through the results:
    for (int i = 0; i < hits.length; i++) {
        Document hitDoc = isearcher.doc(hits[i].doc);
        assertEquals("This is the text to be indexed.", hitDoc.get("fieldname"));
    }
    ireader.close();
    directory.close();
}

From source file:com.tripod.lucene.example.TestExampleLuceneBase.java

License:Apache License

@Before
public void setupBase() throws IOException, ParseException {
    analyzer = new StandardAnalyzer();
    directory = new RAMDirectory();

    facetsConfig = new FacetsConfig();
    facetsConfig.setIndexFieldName(ExampleField.COLOR.getName(), ExampleField.COLOR.getName());

    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    try (IndexWriter writer = new IndexWriter(directory, config)) {
        final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);

        // Doc1//  ww  w . j ava 2 s .  c o m
        Document doc1 = new Document();
        doc1.add(new Field(ExampleField.ID.getName(), "1", StringField.TYPE_STORED));
        doc1.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1")));
        doc1.add(new Field(ExampleField.TITLE.getName(), "Title 1", TextField.TYPE_STORED));
        doc1.add(new Field(ExampleField.BODY.getName(), "Body 1 Solr is cool", TextField.TYPE_STORED));
        doc1.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc1.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate1 = dateFormat.parse("2016-10-01T01:00:00Z");
        doc1.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        doc1.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        writer.addDocument(facetsConfig.build(doc1));

        // Doc2
        Document doc2 = new Document();
        doc2.add(new Field(ExampleField.ID.getName(), "2", StringField.TYPE_STORED));
        doc2.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("2")));
        doc2.add(new Field(ExampleField.TITLE.getName(), "Title 2", TextField.TYPE_STORED));
        doc2.add(new Field(ExampleField.BODY.getName(), "Body 2 Lucene is cool", TextField.TYPE_STORED));
        doc2.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED));
        doc2.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED"));

        Date createDate2 = dateFormat.parse("2016-10-01T02:00:00Z");
        doc2.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate2.getTime()));
        doc2.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate2.getTime()));
        writer.addDocument(facetsConfig.build(doc2));

        // Doc3
        Document doc3 = new Document();
        doc3.add(new Field(ExampleField.ID.getName(), "3", StringField.TYPE_STORED));
        doc3.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("3")));
        doc3.add(new Field(ExampleField.TITLE.getName(), "Title 3", TextField.TYPE_STORED));
        doc3.add(new Field(ExampleField.BODY.getName(), "Body 3 Solr is Great, Solr is Fun",
                TextField.TYPE_STORED));
        doc3.add(new Field(ExampleField.COLOR.getName(), "GREEN", StringField.TYPE_STORED));
        doc3.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "GREEN"));

        Date createDate3 = dateFormat.parse("2016-10-01T03:00:00Z");
        doc3.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate3.getTime()));
        doc3.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate3.getTime()));
        writer.addDocument(facetsConfig.build(doc3));

        // Doc4
        Document doc4 = new Document();
        doc4.add(new Field(ExampleField.ID.getName(), "4", StringField.TYPE_STORED));
        doc4.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("4")));
        doc4.add(new Field(ExampleField.TITLE.getName(), "Title 4", TextField.TYPE_STORED));
        doc4.add(new Field(ExampleField.BODY.getName(), "Body 4", TextField.TYPE_STORED));
        doc4.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc4.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate4 = dateFormat.parse("2016-10-01T04:00:00Z");
        doc4.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate4.getTime()));
        doc4.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate4.getTime()));
        writer.addDocument(facetsConfig.build(doc4));

        // Doc5
        Document doc5 = new Document();
        doc5.add(new Field(ExampleField.ID.getName(), "5", StringField.TYPE_STORED));
        doc5.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("5")));
        doc5.add(new Field(ExampleField.TITLE.getName(), "Title 5", TextField.TYPE_STORED));
        doc5.add(new Field(ExampleField.BODY.getName(), "Body 5", TextField.TYPE_STORED));
        doc5.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED));
        doc5.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED"));

        Date createDate5 = dateFormat.parse("2016-10-01T05:00:00Z");
        doc5.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate5.getTime()));
        doc5.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate5.getTime()));
        writer.addDocument(facetsConfig.build(doc5));

        // commit docs
        writer.commit();
    }

    // needs to be opened after the writer is closed otherwise it won't see the test data
    searcherManager = new SearcherManager(directory, null);
}

From source file:com.tripod.lucene.example.TestExampleSummaryQueryService.java

License:Apache License

@Test
public void testRefreshingSearcherManager()
        throws IOException, ParseException, QueryException, InterruptedException {
    // Add a new document
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    try (IndexWriter writer = new IndexWriter(directory, config)) {
        final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);

        Document doc = new Document();
        doc.add(new Field(ExampleField.ID.getName(), "99", StringField.TYPE_STORED));
        doc.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1")));
        doc.add(new Field(ExampleField.TITLE.getName(), "Title 99", TextField.TYPE_STORED));
        doc.add(new Field(ExampleField.BODY.getName(), "Body 99", TextField.TYPE_STORED));
        doc.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate1 = dateFormat.parse("2016-11-01T01:00:00Z");
        doc.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        doc.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        writer.addDocument(facetsConfig.build(doc));

        writer.commit();//from ww w  .  j  ava2s.c  o m
    }

    // Query for the new document and shouldn't get it
    LuceneQuery query = new LuceneQuery("id:99");
    QueryResults<ExampleSummary> results = queryService.search(query);

    assertNotNull(results);
    assertNotNull(results.getResults());
    assertEquals(0, results.getResults().size());

    // Start a refresher for the SearchManager
    SearcherManagerRefresher refresher = new SearcherManagerRefresher(searcherManager, 2000);
    try {
        // Start the refresher and then wait slightly longer than refresh interval
        refresher.start();
        Thread.sleep(3000);

        // Query again should get a result now
        query = new LuceneQuery("id:99");
        results = queryService.search(query);

        assertNotNull(results);
        assertNotNull(results.getResults());
        assertEquals(1, results.getResults().size());
    } finally {
        refresher.stop();
    }
}

From source file:com.twentyn.patentSearch.DocumentIndexer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();/*from   w w w.  j a v a2 s . co  m*/
    Options opts = new Options();
    opts.addOption(Option.builder("i").longOpt("input").hasArg().required()
            .desc("Input file or directory to index").build());
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required()
            .desc("Path to index file to generate").build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
    Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

    /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
     * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
     * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
     * cruft to the index. */
    Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase")
            .addTokenFilter("stop").build();

    IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writerConfig.setRAMBufferSizeMB(1 << 10);
    IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

    String inputFileOrDir = cmdLine.getOptionValue("input");
    File splitFileOrDir = new File(inputFileOrDir);
    if (!(splitFileOrDir.exists())) {
        LOGGER.error("Unable to find directory at " + inputFileOrDir);
        System.exit(1);
    }

    DocumentIndexer indexer = new DocumentIndexer(indexWriter);
    PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
    corpusReader.readPatentCorpus();
    indexer.commitAndClose();
}

From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory) throws IOException {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (this.indexOptions.contains(BlobIndexOption.CREATE)) {
        iwc.setOpenMode(OpenMode.CREATE);
    } else {//from  w  w w  .  ja  va 2 s. co m
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        totalMBs = Math.max(1, totalMBs);
        iwc.setRAMBufferSizeMB(totalMBs);
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();
    return w;
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);//from  w w  w  .  j a va  2 s.c  o m
    }

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        // give half to the index, the other half we keep for service caching context
        totalMBs = Math.max(1, totalMBs / 2);
        iwc.setRAMBufferSizeMB(totalMBs);
    }

    this.writer = new IndexWriter(dir, iwc);
    this.writer.commit();
    this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    return this.writer;
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

private void upgradeIndex(Directory dir) throws IOException {
    boolean doUpgrade = false;
    IndexWriterConfig iwc = new IndexWriterConfig(null);

    CheckIndex chkIndex = new CheckIndex(dir);

    try {//from   www  .java  2  s  .c o  m
        for (CheckIndex.Status.SegmentInfoStatus segmentInfo : chkIndex.checkIndex().segmentInfos) {
            if (!segmentInfo.version.equals(Version.LATEST)) {
                logInfo("Found Index version %s", segmentInfo.version.toString());
                doUpgrade = true;
                break;
            }
        }
    } finally {
        chkIndex.close();
    }

    if (doUpgrade) {
        logInfo("Upgrading index to %s", Version.LATEST.toString());
        new IndexUpgrader(dir, iwc, false).upgrade();
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
    }
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception {
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        long cacheSizeMB = (totalMBs * 3) / 4;
        cacheSizeMB = Math.max(1, cacheSizeMB);
        iwc.setRAMBufferSizeMB(cacheSizeMB);
        this.linkAccessMemoryLimitMB = totalMBs / 4;
    }//from   w w  w  .  j a va 2 s. co  m

    Directory dir = MMapDirectory.open(directory.toPath());

    // Upgrade the index in place if necessary.
    if (doUpgrade && DirectoryReader.indexExists(dir)) {
        upgradeIndex(dir);
    }

    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));

    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();

    synchronized (this.searchSync) {
        this.writer = w;
        this.linkAccessTimes.clear();
        this.indexUpdateTimeMicros = Utils.getNowMicrosUtc();
        this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros;
    }
    return this.writer;
}