List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:com.stratio.cassandra.lucene.index.FSIndex.java
License:Apache License
/** * Builds a new {@link FSIndex}./*from ww w . ja v a 2 s.co m*/ * * @param name the index name * @param mbeanName the JMX MBean object name * @param path the directory path * @param analyzer the index writer analyzer * @param refresh the index reader refresh frequency in seconds * @param ramBufferMB the index writer RAM buffer size in MB * @param maxMergeMB the directory max merge size in MB * @param maxCachedMB the directory max cache size in MB * @param refreshTask action to be done during refresh */ public FSIndex(String name, String mbeanName, Path path, Analyzer analyzer, double refresh, int ramBufferMB, int maxMergeMB, int maxCachedMB, Runnable refreshTask) { try { this.path = path; this.name = name; // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setRAMBufferSizeMB(ramBufferMB); indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); indexWriterConfig.setUseCompoundFile(true); indexWriterConfig.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, indexWriterConfig); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { @Override public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) { if (refreshTask != null) { refreshTask.run(); } IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingWriter, searcherManager, refresh, refresh); searcherReopener.start(); // Register JMX MBean mbean = new ObjectName(mbeanName); ManagementFactory.getPlatformMBeanServer().registerMBean(this, this.mbean); } catch (Exception e) { throw new IndexException(logger, e, "Error while creating index %s", name); } }
From source file:com.stratio.cassandra.lucene.service.LuceneIndex.java
License:Apache License
/** * Builds a new {@code RowDirectory} using the specified directory path and analyzer. * * @param keyspace The keyspace name. * @param table The table name. * @param name The index name. * @param path The path of the directory in where the Lucene files will be stored. * @param ramBufferMB The index writer buffer size in MB. * @param maxMergeMB NRTCachingDirectory max merge size in MB. * @param maxCachedMB NRTCachingDirectory max cached MB. * @param analyzer The default {@link Analyzer}. * @param refreshSeconds The index readers refresh time in seconds. Writings are not visible until this time. * @param refreshCallback A runnable to be run on index refresh. * @throws IOException If Lucene throws IO errors. *//*ww w . ja v a 2s .co m*/ public LuceneIndex(String keyspace, String table, String name, Path path, Integer ramBufferMB, Integer maxMergeMB, Integer maxCachedMB, Analyzer analyzer, Double refreshSeconds, Runnable refreshCallback) throws IOException { this.path = path; this.refreshCallback = refreshCallback; this.logName = String.format("Lucene index %s.%s.%s", keyspace, table, name); // Open or create directory FSDirectory fsDirectory = FSDirectory.open(path); directory = new NRTCachingDirectory(fsDirectory, maxMergeMB, maxCachedMB); // Setup index writer IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setRAMBufferSizeMB(ramBufferMB); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setUseCompoundFile(true); config.setMergePolicy(new TieredMergePolicy()); indexWriter = new IndexWriter(directory, config); // Setup NRT search SearcherFactory searcherFactory = new SearcherFactory() { public IndexSearcher newSearcher(IndexReader reader) throws IOException { LuceneIndex.this.refreshCallBack(); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new NoIDFSimilarity()); return searcher; } }; TrackingIndexWriter trackingIndexWriter = new TrackingIndexWriter(indexWriter); searcherManager = new SearcherManager(indexWriter, true, searcherFactory); searcherReopener = new ControlledRealTimeReopenThread<>(trackingIndexWriter, searcherManager, refreshSeconds, refreshSeconds); searcherReopener.start(); // Start the refresher thread // Register JMX MBean try { objectName = new ObjectName( String.format("com.stratio.cassandra.lucene:type=LuceneIndexes,keyspace=%s,table=%s,index=%s", keyspace, table, name)); ManagementFactory.getPlatformMBeanServer().registerMBean(this, objectName); } catch (MBeanException | OperationsException e) { Log.error(e, "Error while registering MBean"); } }
From source file:com.study.lucene.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;// www .j a v a 2 s . com boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setInfoStream(System.out); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.sxc.lucene.searching.PhraseQueryTest.java
License:Apache License
protected void setUp() throws IOException { dir = FSDirectory.open(new File("D:/programming/lucene/PhraseQueryTest")); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, new SmartChineseAnalyzer(Version.LUCENE_47)); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); Document doc = new Document(); doc.add(new TextField("field", // 1 "the quick brown fox jumped over the lazy dog", // 1 Field.Store.YES)); // 1 writer.addDocument(doc);//from w ww . ja v a 2 s.c o m writer.close(); searcher = new IndexSearcher(DirectoryReader.open(dir)); }
From source file:com.tamingtext.classifier.mlt.TrainMoreLikeThis.java
License:Apache License
protected void openIndexWriter(String pathname) throws IOException { //<start id="lucene.examples.index.setup"/> Directory directory //<co id="luc.index.dir"/> = FSDirectory.open(new File(pathname)); Analyzer analyzer //<co id="luc.index.analyzer"/> = new EnglishAnalyzer(Version.LUCENE_36); if (nGramSize > 1) { //<co id="luc.index.shingle"/> ShingleAnalyzerWrapper sw = new ShingleAnalyzerWrapper(analyzer, nGramSize, // min shingle size nGramSize, // max shingle size "-", // token separator true, // output unigrams true); // output unigrams if no shingles analyzer = sw;//from www.j a v a 2 s .com } IndexWriterConfig config //<co id="luc.index.create"/> = new IndexWriterConfig(Version.LUCENE_36, analyzer); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(directory, config); /* <calloutlist> <callout arearefs="luc.index.dir">Create Index Directory</callout> <callout arearefs="luc.index.analyzer">Setup Analyzer</callout> <callout arearefs="luc.index.shingle">Setup Shingle Filter</callout> <callout arearefs="luc.index.create">Create <classname>IndexWriter</classname></callout> </calloutlist> */ //<end id="lucene.examples.index.setup"/> this.writer = writer; }
From source file:com.tistory.devyongsik.demo.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String docsPath = "/Users/need4spd/Java/"; //1. String indexPath = "/Users/need4spd/Java/lucene_index/"; //2. final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1);/*from w w w . j a va 2 s.c o m*/ } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); //3. IndexWriter . Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); // Analyzer IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); boolean create = true; //4. if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); //5. . . } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); //6. . } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); //7. IndexWriterConfig . // . IndexWriter writer = new IndexWriter(dir, iwc); //8. IndexWriter . indexDocs(writer, docDir); //9. . // NOTE: if you want to maximize search performance, // you can optionally call optimize here. This can be // a costly operation, so generally it's only worth // it when your index is relatively static (ie you're // done adding documents to it): // // writer.optimize(); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.tuplejump.stargate.lucene.BasicIndexer.java
License:Apache License
private IndexWriter getIndexWriter(Version luceneV) throws IOException { file = LuceneUtils.getDirectory(keyspaceName, cfName, indexName, vNodeName); IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer); config.setRAMBufferSizeMB(256);// w w w . j av a 2s . c o m config.setOpenMode(OPEN_MODE); directory = FSDirectory.open(file); logger.warn( indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]"); return new IndexWriter(directory, config); }
From source file:com.tuplejump.stargate.lucene.NearRealTimeIndexer.java
License:Apache License
private IndexWriter getIndexWriter(Version luceneV) throws IOException { file = Utils.getDirectory(keyspaceName, cfName, indexName, vNodeName); IndexWriterConfig config = new IndexWriterConfig(luceneV, analyzer); config.setRAMBufferSizeMB(256);/*from w w w.ja v a 2 s . c o m*/ config.setOpenMode(OPEN_MODE); directory = new NRTCachingDirectory(FSDirectory.open(file), 100, 100); logger.warn( indexName + " SG Index - Opened dir[" + file.getAbsolutePath() + "] - Openmode[" + OPEN_MODE + "]"); return new IndexWriter(directory, config); }
From source file:com.twentyn.patentSearch.DocumentIndexer.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/*from w ww . ja va 2s. com*/ Options opts = new Options(); opts.addOption(Option.builder("i").longOpt("input").hasArg().required() .desc("Input file or directory to index").build()); opts.addOption(Option.builder("x").longOpt("index").hasArg().required() .desc("Path to index file to generate").build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination. This custom * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much * cruft to the index. */ Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase") .addTokenFilter("stop").build(); IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writerConfig.setRAMBufferSizeMB(1 << 10); IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig); String inputFileOrDir = cmdLine.getOptionValue("input"); File splitFileOrDir = new File(inputFileOrDir); if (!(splitFileOrDir.exists())) { LOGGER.error("Unable to find directory at " + inputFileOrDir); System.exit(1); } DocumentIndexer indexer = new DocumentIndexer(indexWriter); PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir); corpusReader.readPatentCorpus(); indexer.commitAndClose(); }
From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory) throws IOException { Directory dir = MMapDirectory.open(directory.toPath()); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (this.indexOptions.contains(BlobIndexOption.CREATE)) { iwc.setOpenMode(OpenMode.CREATE); } else {//from w w w . ja v a 2 s .c om iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { totalMBs = Math.max(1, totalMBs); iwc.setRAMBufferSizeMB(totalMBs); } IndexWriter w = new IndexWriter(dir, iwc); w.commit(); return w; }