Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:com.stratio.cassandra.lucene.index.FSIndex.java

License:Apache License

/**
 * Builds a new {@link FSIndex}./*from  ww  w . ja  v a 2 s.co m*/
 *
 * @param name the index name
 * @param mbeanName the JMX MBean object name
 * @param path the directory path
 * @param analyzer the index writer analyzer
 * @param refresh the index reader refresh frequency in seconds
 * @param ramBufferMB the index writer RAM buffer size in MB
 * @param maxMergeMB the directory max merge size in MB
 * @param maxCachedMB the directory max cache size in MB
 * @param refreshTask action to be done during refresh
 */
public FSIndex(String name, String mbeanName, Path path, Analyzer analyzer, double refresh, int ramBufferMB,
        int maxMergeMB, int maxCachedMB, Runnable refreshTask) {
    try {
        this.path = path;
        this.name = name;

        // Open or create directory
        FSDirectory fsDirectory = FSDirectory.open(path);
        directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

        // Setup index writer
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        indexWriterConfig.setRAMBufferSizeMB(ramBufferMB);
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        indexWriterConfig.setUseCompoundFile(true);
        indexWriterConfig.setMergePolicy(new TieredMergePolicy());
        indexWriter = new IndexWriter(directory, indexWriterConfig);

        // Setup NRT search
        SearcherFactory searcherFactory = new SearcherFactory() {
            @Override
            public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) {
                if (refreshTask != null) {
                    refreshTask.run();
                }
                IndexSearcher searcher = new IndexSearcher(reader);
                searcher.setSimilarity(new NoIDFSimilarity());
                return searcher;
            }
        };
        TrackingIndexWriter trackingWriter = new TrackingIndexWriter(indexWriter);
        searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
        searcherReopener = new ControlledRealTimeReopenThread<>(trackingWriter, searcherManager, refresh,
                refresh);
        searcherReopener.start();

        // Register JMX MBean
        mbean = new ObjectName(mbeanName);
        ManagementFactory.getPlatformMBeanServer().registerMBean(this, this.mbean);

    } catch (Exception e) {
        throw new IndexException(logger, e, "Error while creating index %s", name);
    }
}

From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java

License:Apache License

/**
 * Builds a new {@code RowDirectory} using the specified directory path and analyzer.
 *
 * @param keyspace        The keyspace name.
 * @param table           The table name.
 * @param name            The index name.
 * @param path            The path of the directory in where the Lucene files will be stored.
 * @param ramBufferMB     The index writer buffer size in MB.
 * @param maxMergeMB      NRTCachingDirectory max merge size in MB.
 * @param maxCachedMB     NRTCachingDirectory max cached MB.
 * @param analyzer        The default {@link Analyzer}.
 * @param refreshSeconds  The index readers refresh time in seconds. Writings are not visible until this time.
 * @param refreshCallback A runnable to be run on index refresh.
 * @throws IOException If Lucene throws IO errors.
 *//*ww  w .  ja v a 2s .co  m*/
public LuceneIndex(String keyspace, String table, String name, Path path, Integer ramBufferMB,
        Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds,
        Runnable refreshCallback) throws IOException {
    this.path = path;
    this.refreshCallback = refreshCallback;
    this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name);

    // Open or create directory
    FSDirectory fsDirectory = FSDirectory.open(path);
    directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB);

    // Setup index writer
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setRAMBufferSizeMB(ramBufferMB);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    config.setUseCompoundFile(true);
    config.setMergePolicy(new TieredMergePolicy());
    indexWriter = new IndexWriter(directory, config);

    // Setup NRT search
    SearcherFactory searcherFactory = new SearcherFactory() {
        public IndexSearcher newSearcher(IndexReader reader) throws IOException {
            LuceneIndex.this.refreshCallBack();
            IndexSearcher searcher = new IndexSearcher(reader);
            searcher.setSimilarity(new NoIDFSimilarity());
            return searcher;
        }
    };
    TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter);
    searcherManager = new SearcherManager(indexWriter, true, searcherFactory);
    searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager,
            refreshSeconds, refreshSeconds);
    searcherReopener.start(); // Start the refresher thread

    // Register JMX MBean
    try {
        objectName = new ObjectName(
                String.format("com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s",
                        keyspace, table, name));
        ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName);
    } catch (MBeanException | OperationsException e) {
        Log.error(e, "Error while registering MBean");
    }
}

From source file:com.study.lucene.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//  www  .j a  v a  2 s  .  com
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setInfoStream(System.out);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.sxc.lucene.searching.PhraseQueryTest.java

License:Apache License

protected void setUp() throws IOException {
    dir = FSDirectory.open(new File("D:/programming/lucene/PhraseQueryTest"));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47,
            new SmartChineseAnalyzer(Version.LUCENE_47));
    config.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, config);

    Document doc = new Document();
    doc.add(new TextField("field", // 1
            "the quick brown fox jumped over the lazy dog", // 1
            Field.Store.YES)); // 1
    writer.addDocument(doc);//from w  ww .  ja  v a  2  s.c o  m
    writer.close();

    searcher = new IndexSearcher(DirectoryReader.open(dir));
}

From source file:com.tamingtext.classifier.mlt.TrainMoreLikeThis.java

License:Apache License

protected void openIndexWriter(String pathname) throws IOException {
    //<start id="lucene.examples.index.setup"/>
    Directory directory //<co id="luc.index.dir"/>
            = FSDirectory.open(new File(pathname));
    Analyzer analyzer //<co id="luc.index.analyzer"/>
            = new EnglishAnalyzer(Version.LUCENE_36);

    if (nGramSize > 1) { //<co id="luc.index.shingle"/>
        ShingleAnalyzerWrapper sw = new ShingleAnalyzerWrapper(analyzer, nGramSize, // min shingle size
                nGramSize, // max shingle size
                "-", // token separator
                true, // output unigrams
                true); // output unigrams if no shingles
        analyzer = sw;//from   www.j  a  v a  2  s .com
    }

    IndexWriterConfig config //<co id="luc.index.create"/>
            = new IndexWriterConfig(Version.LUCENE_36, analyzer);
    config.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(directory, config);
    /* <calloutlist>
    <callout arearefs="luc.index.dir">Create Index Directory</callout>
    <callout arearefs="luc.index.analyzer">Setup Analyzer</callout>
    <callout arearefs="luc.index.shingle">Setup Shingle Filter</callout>
    <callout arearefs="luc.index.create">Create <classname>IndexWriter</classname></callout>
    </calloutlist> */
    //<end id="lucene.examples.index.setup"/>
    this.writer = writer;
}

From source file:com.tistory.devyongsik.demo.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {

    String docsPath = "/Users/need4spd/Java/"; //1.      
    String indexPath = "/Users/need4spd/Java/lucene_index/"; //2.    

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);/*from w  w w .  j a va 2 s.c o m*/
    }

    Date start = new Date();

    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        //3.     IndexWriter  .
        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); //       Analyzer
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

        boolean create = true; //4.        

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE); //5.    .    .
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); //6.      .
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0); //7. IndexWriterConfig   .      
        //      .

        IndexWriter writer = new IndexWriter(dir, iwc); //8.  IndexWriter .
        indexDocs(writer, docDir); //9.         .

        // NOTE: if you want to maximize search performance,
        // you can optionally call optimize here.  This can be
        // a costly operation, so generally it's only worth
        // it when your index is relatively static (ie you're
        // done adding documents to it):
        //
        // writer.optimize();

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.tuplejump.stargate.lucene.BasicIndexer.java

License:Apache License

private IndexWriter getIndexWriter(Version luceneV) throws IOException {
    file = LuceneUtils.getDirectory(keyspaceName, cfName, indexName, vNodeName);
    IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer);
    config.setRAMBufferSizeMB(256);//  w  w w . j  av a  2s . c o  m
    config.setOpenMode(OPEN_MODE);
    directory = FSDirectory.open(file);
    logger.warn(
            indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]");
    return new IndexWriter(directory, config);
}

From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java

License:Apache License

private IndexWriter getIndexWriter(Version luceneV) throws IOException {
    file = Utils.getDirectory(keyspaceName, cfName, indexName, vNodeName);
    IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer);
    config.setRAMBufferSizeMB(256);/*from w  w w.ja  v  a  2  s  .  c o m*/
    config.setOpenMode(OPEN_MODE);
    directory = new NRTCachingDirectory(FSDirectory.open(file), 100, 100);
    logger.warn(
            indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]");
    return new IndexWriter(directory, config);
}

From source file:com.twentyn.patentSearch.DocumentIndexer.java

License:Open Source License

public static void main(String[] args) throws Exception {
    System.out.println("Starting up...");
    System.out.flush();/*from  w  ww . ja  va 2s.  com*/
    Options opts = new Options();
    opts.addOption(Option.builder("i").longOpt("input").hasArg().required()
            .desc("Input file or directory to index").build());
    opts.addOption(Option.builder("x").longOpt("index").hasArg().required()
            .desc("Path to index file to generate").build());
    opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build());
    opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build());

    HelpFormatter helpFormatter = new HelpFormatter();
    CommandLineParser cmdLineParser = new DefaultParser();
    CommandLine cmdLine = null;
    try {
        cmdLine = cmdLineParser.parse(opts, args);
    } catch (ParseException e) {
        System.out.println("Caught exception when parsing command line: " + e.getMessage());
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(1);
    }

    if (cmdLine.hasOption("help")) {
        helpFormatter.printHelp("DocumentIndexer", opts);
        System.exit(0);
    }

    if (cmdLine.hasOption("verbose")) {
        // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2
        LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
        Configuration ctxConfig = ctx.getConfiguration();
        LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME);
        logConfig.setLevel(Level.DEBUG);

        ctx.updateLoggers();
        LOGGER.debug("Verbose logging enabled");
    }

    LOGGER.info("Opening index at " + cmdLine.getOptionValue("index"));
    Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath());

    /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one
     * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination.  This custom
     * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much
     * cruft to the index. */
    Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase")
            .addTokenFilter("stop").build();

    IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
    writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    writerConfig.setRAMBufferSizeMB(1 << 10);
    IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig);

    String inputFileOrDir = cmdLine.getOptionValue("input");
    File splitFileOrDir = new File(inputFileOrDir);
    if (!(splitFileOrDir.exists())) {
        LOGGER.error("Unable to find directory at " + inputFileOrDir);
        System.exit(1);
    }

    DocumentIndexer indexer = new DocumentIndexer(indexWriter);
    PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir);
    corpusReader.readPatentCorpus();
    indexer.commitAndClose();
}

From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java

License:Open Source License

public IndexWriter createWriter(File directory) throws IOException {
    Directory dir = MMapDirectory.open(directory.toPath());
    Analyzer analyzer = new SimpleAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (this.indexOptions.contains(BlobIndexOption.CREATE)) {
        iwc.setOpenMode(OpenMode.CREATE);
    } else {//from   w w  w . ja v  a  2 s .c om
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT);
    if (totalMBs != null) {
        totalMBs = Math.max(1, totalMBs);
        iwc.setRAMBufferSizeMB(totalMBs);
    }
    IndexWriter w = new IndexWriter(dir, iwc);
    w.commit();
    return w;
}