List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:com.study.lucene.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w ww .j av a 2 s.c o m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setInfoStream(System.out); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.tekstosense.stemmer.index.Indexer.java
License:Open Source License
/** * Indexer.// w w w . ja va2 s . co m * * @throws IOException * Signals that an I/O exception has occurred. */ private static void indexer() throws IOException { StandardAnalyzer analyzer = new StandardAnalyzer(); Path indexDirectoryPath = new File(INDEX_PATH).toPath(); FSDirectory indexDirectory = new SimpleFSDirectory(indexDirectoryPath); IndexWriterConfig conf = new IndexWriterConfig(analyzer); IndexWriter w = new IndexWriter(indexDirectory, conf); addDoc(w, "Lucene in Action", "193398817"); addDoc(w, "Lucene for Dummies", "55320055Z"); addDoc(w, "Managing Gigabytes", "55063554A"); addDoc(w, "The Art of Computer Science", "9900333X"); w.close(); }
From source file:com.test.LuceneDemo.java
License:Apache License
@Test public void test() throws IOException, org.apache.lucene.queryparser.classic.ParseException { Analyzer analyzer = new StandardAnalyzer(); // Store the index in memory: Directory directory = new RAMDirectory(); // To store an index on disk, use this instead: //Directory directory = FSDirectory.open("/tmp/testindex"); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = new IndexWriter(directory, config); Document doc = new Document(); String text = "This is the text to be indexed."; doc.add(new Field("fieldname", text, TextField.TYPE_STORED)); iwriter.addDocument(doc);//from ww w . j a v a 2 s. c om iwriter.close(); // Now search the index: DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); // Parse a simple query that searches for "text": QueryParser parser = new QueryParser("fieldname", analyzer); Query query = parser.parse("indexed"); ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs; assertEquals(1, hits.length); // Iterate through the results: for (int i = 0; i < hits.length; i++) { Document hitDoc = isearcher.doc(hits[i].doc); assertEquals("This is the text to be indexed.", hitDoc.get("fieldname")); } ireader.close(); directory.close(); }
From source file:com.tripod.lucene.example.TestExampleLuceneBase.java
License:Apache License
@Before public void setupBase() throws IOException, ParseException { analyzer = new StandardAnalyzer(); directory = new RAMDirectory(); facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(ExampleField.COLOR.getName(), ExampleField.COLOR.getName()); IndexWriterConfig config = new IndexWriterConfig(analyzer); try (IndexWriter writer = new IndexWriter(directory, config)) { final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT); // Doc1// ww w . j ava 2 s . c o m Document doc1 = new Document(); doc1.add(new Field(ExampleField.ID.getName(), "1", StringField.TYPE_STORED)); doc1.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1"))); doc1.add(new Field(ExampleField.TITLE.getName(), "Title 1", TextField.TYPE_STORED)); doc1.add(new Field(ExampleField.BODY.getName(), "Body 1 Solr is cool", TextField.TYPE_STORED)); doc1.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED)); doc1.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE")); Date createDate1 = dateFormat.parse("2016-10-01T01:00:00Z"); doc1.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime())); doc1.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime())); writer.addDocument(facetsConfig.build(doc1)); // Doc2 Document doc2 = new Document(); doc2.add(new Field(ExampleField.ID.getName(), "2", StringField.TYPE_STORED)); doc2.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("2"))); doc2.add(new Field(ExampleField.TITLE.getName(), "Title 2", TextField.TYPE_STORED)); doc2.add(new Field(ExampleField.BODY.getName(), "Body 2 Lucene is cool", TextField.TYPE_STORED)); doc2.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED)); doc2.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED")); Date createDate2 = dateFormat.parse("2016-10-01T02:00:00Z"); doc2.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate2.getTime())); doc2.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate2.getTime())); writer.addDocument(facetsConfig.build(doc2)); // Doc3 Document doc3 = new Document(); doc3.add(new Field(ExampleField.ID.getName(), "3", StringField.TYPE_STORED)); doc3.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("3"))); doc3.add(new Field(ExampleField.TITLE.getName(), "Title 3", TextField.TYPE_STORED)); doc3.add(new Field(ExampleField.BODY.getName(), "Body 3 Solr is Great, Solr is Fun", TextField.TYPE_STORED)); doc3.add(new Field(ExampleField.COLOR.getName(), "GREEN", StringField.TYPE_STORED)); doc3.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "GREEN")); Date createDate3 = dateFormat.parse("2016-10-01T03:00:00Z"); doc3.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate3.getTime())); doc3.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate3.getTime())); writer.addDocument(facetsConfig.build(doc3)); // Doc4 Document doc4 = new Document(); doc4.add(new Field(ExampleField.ID.getName(), "4", StringField.TYPE_STORED)); doc4.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("4"))); doc4.add(new Field(ExampleField.TITLE.getName(), "Title 4", TextField.TYPE_STORED)); doc4.add(new Field(ExampleField.BODY.getName(), "Body 4", TextField.TYPE_STORED)); doc4.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED)); doc4.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE")); Date createDate4 = dateFormat.parse("2016-10-01T04:00:00Z"); doc4.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate4.getTime())); doc4.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate4.getTime())); writer.addDocument(facetsConfig.build(doc4)); // Doc5 Document doc5 = new Document(); doc5.add(new Field(ExampleField.ID.getName(), "5", StringField.TYPE_STORED)); doc5.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("5"))); doc5.add(new Field(ExampleField.TITLE.getName(), "Title 5", TextField.TYPE_STORED)); doc5.add(new Field(ExampleField.BODY.getName(), "Body 5", TextField.TYPE_STORED)); doc5.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED)); doc5.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED")); Date createDate5 = dateFormat.parse("2016-10-01T05:00:00Z"); doc5.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate5.getTime())); doc5.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate5.getTime())); writer.addDocument(facetsConfig.build(doc5)); // commit docs writer.commit(); } // needs to be opened after the writer is closed otherwise it won't see the test data searcherManager = new SearcherManager(directory, null); }
From source file:com.tripod.lucene.example.TestExampleSummaryQueryService.java
License:Apache License
@Test public void testRefreshingSearcherManager() throws IOException, ParseException, QueryException, InterruptedException { // Add a new document final IndexWriterConfig config = new IndexWriterConfig(analyzer); try (IndexWriter writer = new IndexWriter(directory, config)) { final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT); Document doc = new Document(); doc.add(new Field(ExampleField.ID.getName(), "99", StringField.TYPE_STORED)); doc.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1"))); doc.add(new Field(ExampleField.TITLE.getName(), "Title 99", TextField.TYPE_STORED)); doc.add(new Field(ExampleField.BODY.getName(), "Body 99", TextField.TYPE_STORED)); doc.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED)); doc.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE")); Date createDate1 = dateFormat.parse("2016-11-01T01:00:00Z"); doc.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime())); doc.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime())); writer.addDocument(facetsConfig.build(doc)); writer.commit();//from ww w . j ava2s.c o m } // Query for the new document and shouldn't get it LuceneQuery query = new LuceneQuery("id:99"); QueryResults<ExampleSummary> results = queryService.search(query); assertNotNull(results); assertNotNull(results.getResults()); assertEquals(0, results.getResults().size()); // Start a refresher for the SearchManager SearcherManagerRefresher refresher = new SearcherManagerRefresher(searcherManager, 2000); try { // Start the refresher and then wait slightly longer than refresh interval refresher.start(); Thread.sleep(3000); // Query again should get a result now query = new LuceneQuery("id:99"); results = queryService.search(query); assertNotNull(results); assertNotNull(results.getResults()); assertEquals(1, results.getResults().size()); } finally { refresher.stop(); } }
From source file:com.twentyn.patentSearch.DocumentIndexer.java
License:Open Source License
public static void main(String[] args) throws Exception { System.out.println("Starting up..."); System.out.flush();/*from w w w. j a v a2 s . co m*/ Options opts = new Options(); opts.addOption(Option.builder("i").longOpt("input").hasArg().required() .desc("Input file or directory to index").build()); opts.addOption(Option.builder("x").longOpt("index").hasArg().required() .desc("Path to index file to generate").build()); opts.addOption(Option.builder("h").longOpt("help").desc("Print this help message and exit").build()); opts.addOption(Option.builder("v").longOpt("verbose").desc("Print verbose log output").build()); HelpFormatter helpFormatter = new HelpFormatter(); CommandLineParser cmdLineParser = new DefaultParser(); CommandLine cmdLine = null; try { cmdLine = cmdLineParser.parse(opts, args); } catch (ParseException e) { System.out.println("Caught exception when parsing command line: " + e.getMessage()); helpFormatter.printHelp("DocumentIndexer", opts); System.exit(1); } if (cmdLine.hasOption("help")) { helpFormatter.printHelp("DocumentIndexer", opts); System.exit(0); } if (cmdLine.hasOption("verbose")) { // With help from http://stackoverflow.com/questions/23434252/programmatically-change-log-level-in-log4j2 LoggerContext ctx = (LoggerContext) LogManager.getContext(false); Configuration ctxConfig = ctx.getConfiguration(); LoggerConfig logConfig = ctxConfig.getLoggerConfig(LogManager.ROOT_LOGGER_NAME); logConfig.setLevel(Level.DEBUG); ctx.updateLoggers(); LOGGER.debug("Verbose logging enabled"); } LOGGER.info("Opening index at " + cmdLine.getOptionValue("index")); Directory indexDir = FSDirectory.open(new File(cmdLine.getOptionValue("index")).toPath()); /* The standard analyzer is too aggressive with chemical entities (it strips structural annotations, for one * thing), and the whitespace analyzer doesn't do any case normalization or stop word elimination. This custom * analyzer appears to treat chemical entities better than the standard analyzer without admitting too much * cruft to the index. */ Analyzer analyzer = CustomAnalyzer.builder().withTokenizer("whitespace").addTokenFilter("lowercase") .addTokenFilter("stop").build(); IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer); writerConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); writerConfig.setRAMBufferSizeMB(1 << 10); IndexWriter indexWriter = new IndexWriter(indexDir, writerConfig); String inputFileOrDir = cmdLine.getOptionValue("input"); File splitFileOrDir = new File(inputFileOrDir); if (!(splitFileOrDir.exists())) { LOGGER.error("Unable to find directory at " + inputFileOrDir); System.exit(1); } DocumentIndexer indexer = new DocumentIndexer(indexWriter); PatentCorpusReader corpusReader = new PatentCorpusReader(indexer, splitFileOrDir); corpusReader.readPatentCorpus(); indexer.commitAndClose(); }
From source file:com.vmware.dcp.services.common.LuceneBlobIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory) throws IOException { Directory dir = MMapDirectory.open(directory.toPath()); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (this.indexOptions.contains(BlobIndexOption.CREATE)) { iwc.setOpenMode(OpenMode.CREATE); } else {//from w w w . ja va 2 s. co m iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { totalMBs = Math.max(1, totalMBs); iwc.setRAMBufferSizeMB(totalMBs); } IndexWriter w = new IndexWriter(dir, iwc); w.commit(); return w; }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception { Directory dir = MMapDirectory.open(directory.toPath()); Analyzer analyzer = new SimpleAnalyzer(); // Upgrade the index in place if necessary. if (doUpgrade && DirectoryReader.indexExists(dir)) { upgradeIndex(dir);//from w w w . j a va 2 s.c o m } IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { // give half to the index, the other half we keep for service caching context totalMBs = Math.max(1, totalMBs / 2); iwc.setRAMBufferSizeMB(totalMBs); } this.writer = new IndexWriter(dir, iwc); this.writer.commit(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros; return this.writer; }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void upgradeIndex(Directory dir) throws IOException { boolean doUpgrade = false; IndexWriterConfig iwc = new IndexWriterConfig(null); CheckIndex chkIndex = new CheckIndex(dir); try {//from www .java 2 s .c o m for (CheckIndex.Status.SegmentInfoStatus segmentInfo : chkIndex.checkIndex().segmentInfos) { if (!segmentInfo.version.equals(Version.LATEST)) { logInfo("Found Index version %s", segmentInfo.version.toString()); doUpgrade = true; break; } } } finally { chkIndex.close(); } if (doUpgrade) { logInfo("Upgrading index to %s", Version.LATEST.toString()); new IndexUpgrader(dir, iwc, false).upgrade(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
public IndexWriter createWriter(File directory, boolean doUpgrade) throws Exception { Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); Long totalMBs = getHost().getServiceMemoryLimitMB(getSelfLink(), MemoryLimitType.EXACT); if (totalMBs != null) { long cacheSizeMB = (totalMBs * 3) / 4; cacheSizeMB = Math.max(1, cacheSizeMB); iwc.setRAMBufferSizeMB(cacheSizeMB); this.linkAccessMemoryLimitMB = totalMBs / 4; }//from w w w . j a va 2 s. co m Directory dir = MMapDirectory.open(directory.toPath()); // Upgrade the index in place if necessary. if (doUpgrade && DirectoryReader.indexExists(dir)) { upgradeIndex(dir); } iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); IndexWriter w = new IndexWriter(dir, iwc); w.commit(); synchronized (this.searchSync) { this.writer = w; this.linkAccessTimes.clear(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); this.indexWriterCreationTimeMicros = this.indexUpdateTimeMicros; } return this.writer; }