List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:BlockBuilding.AbstractBlockBuilding.java
License:Apache License
protected IndexWriter openWriter(Directory directory) { try {/*w ww .j av a2 s . com*/ Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); return new IndexWriter(directory, config); } catch (IOException ex) { LOGGER.log(Level.SEVERE, null, ex); return null; } }
From source file:book.Indexer.java
License:Apache License
public Indexer(String indexDir) throws IOException { Directory dir = FSDirectory.open(new File(indexDir).toPath()); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); writer = new IndexWriter(dir, iwc); // 3 modified }
From source file:br.bireme.ngrams.NGIndex.java
private IndexWriter getIndexWriter(final String indexPath, final Analyzer analyzer, final boolean append) throws IOException { assert indexPath != null; assert analyzer != null; final File dir = new File(indexPath); final Directory directory = FSDirectory.open(dir.toPath()); final IndexWriterConfig cfg = new IndexWriterConfig(analyzer); if (append) { cfg.setOpenMode(IndexWriterConfig.OpenMode.APPEND); } else {/*w ww . j ava 2 s . c o m*/ new File(dir, "write.lock").delete(); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } return new IndexWriter(directory, cfg); }
From source file:br.pucminas.ri.jsearch.queryexpansion.RocchioQueryExpansion.java
License:Open Source License
private Directory createIndex(ArrayList<Document> relevantDocs) throws CorruptIndexException, LockObtainFailedException, IOException { Directory index = new RAMDirectory(); PorterStemAnalyzer analyzer = new PorterStemAnalyzer(); IndexWriterConfig conf = new IndexWriterConfig(analyzer); try (IndexWriter idxWriter = new IndexWriter(index, conf)) { for (Document d : relevantDocs) { idxWriter.addDocument(d);/* ww w. j a v a 2 s . co m*/ } } return index; }
From source file:bzh.terrevirtuelle.navisu.gazetteer.impl.lucene.GeoNameResolver.java
License:Apache License
/** * Build the gazetteer index line by line * * @param gazetteerPath path of the gazetteer file * @param indexerPath path to the created Lucene index directory. * @param reverseGeocodingEnabled// w ww. ja va 2s. c o m * @throws IOException * @throws RuntimeException */ public void buildIndex(String gazetteerPath, String indexerPath, boolean reverseGeocodingEnabled) throws IOException { File indexfile = new File(indexerPath); indexDir = FSDirectory.open(indexfile.toPath()); if (!DirectoryReader.indexExists(indexDir)) { IndexWriterConfig config = new IndexWriterConfig(analyzer); indexWriter = new IndexWriter(indexDir, config); Logger logger = Logger.getLogger(this.getClass().getName()); logger.log(Level.WARNING, "Start Building Index for Gazatteer"); BufferedReader filereader = new BufferedReader( new InputStreamReader(new FileInputStream(gazetteerPath), "UTF-8")); String line; int count = 0; while ((line = filereader.readLine()) != null) { try { count += 1; if (count % 100000 == 0) { logger.log(Level.INFO, "Indexed Row Count: " + count); } addDoc(indexWriter, line, reverseGeocodingEnabled); } catch (RuntimeException re) { logger.log(Level.WARNING, "Skipping... Error on line: {0}", line); re.printStackTrace(); } } logger.log(Level.WARNING, "Building Finished"); filereader.close(); indexWriter.close(); } }
From source file:ch.algotrader.rest.index.SecurityIndexer.java
License:Open Source License
private void buildIndex(Collection<Security> securities) { Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); try (IndexWriter iwriter = new IndexWriter(index, config)) { Collection<Collection<IndexableField>> securityDocuments = securities.stream() .map(Security::convertToVO).map(this::createDocument).collect(Collectors.toList()); iwriter.addDocuments(securityDocuments); } catch (IOException ex) { throw new UnrecoverableCoreException("Unexpected I/O error building security index", ex); }//from w w w .j a v a 2s.c o m }
From source file:ch.ksfx.web.services.lucene.AsynchronousIndexer.java
License:Open Source License
public void run() { while (true) { try {/* w ww . j av a 2 s . c o m*/ if (writer == null || !writer.isOpen()) { IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(dir, iwc); } IndexEvent indexEvent = queuedIndexEvents.take(); String sortableDateTime = null; if (indexEvent.getDeleteEvent()) { if (indexEvent instanceof DeleteSeriesObservationsEvent) { writer.deleteDocuments(new Term("series_id", indexEvent.getSeriesId().toString())); } else { sortableDateTime = DateFormatUtil.formatToLexicographicallySortableTimeAndDateString( indexEvent.getObservationTime()); writer.deleteDocuments(new Term("internal_id", indexEvent.getSeriesId().toString() + sortableDateTime + indexEvent.getSourceId())); } if (queuedIndexEvents.size() == 0) { writer.close(); } continue; } Observation obs = observationDAO.getObservationForTimeSeriesIdObservationTimeAndSourceId( indexEvent.getSeriesId(), indexEvent.getObservationTime(), indexEvent.getSourceId()); Document doc = new Document(); sortableDateTime = DateFormatUtil .formatToLexicographicallySortableTimeAndDateString(obs.getObservationTime()); String isoDateTime = DateFormatUtil.formatToISO8601TimeAndDateString(obs.getObservationTime()); doc.add(new StringField("internal_id", obs.getTimeSeriesId().toString() + sortableDateTime + obs.getSourceId(), Field.Store.NO)); doc.add(new StringField("series_id", obs.getTimeSeriesId().toString(), Field.Store.YES)); doc.add(new StringField("observation_time", isoDateTime, Field.Store.YES)); doc.add(new StringField("sortable_observation_time", sortableDateTime, Field.Store.NO)); doc.add(new StringField("source_id", obs.getSourceId(), Field.Store.YES)); addField(doc, "source_uri", obs.getSourceId()); addField(doc, "scalar_value", obs.getScalarValue()); for (String key : obs.getComplexValue().keySet()) { addField(doc, key, obs.getComplexValue().get(key)); } //System.out.println("Meta data to index: " + obs.getMetaData() + " Size: " + obs.getMetaData().size()); for (String key : obs.getMetaData().keySet()) { System.out.println("Indexing meta data: " + key + " --> " + obs.getMetaData().get(key)); addField(doc, key, obs.getMetaData().get(key)); } writer.updateDocument(new Term("internal_id", obs.getTimeSeriesId().toString() + sortableDateTime + obs.getSourceId()), doc); if (queuedIndexEvents.size() == 0) { writer.close(); } } catch (Exception e) { logger.error("Error while Asynchronous Indexing", e); } } }
From source file:cn.codepub.redis.directory.Main.java
License:Apache License
public static void testRedisDirectoryWithShardedJedisPool() throws IOException { long start = System.currentTimeMillis(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new WhitespaceAnalyzer()) .setOpenMode(IndexWriterConfig.OpenMode.CREATE); //indexWriterConfig.setInfoStream(System.out); //indexWriterConfig.setRAMBufferSizeMB(2048); //LogByteSizeMergePolicy logByteSizeMergePolicy = new LogByteSizeMergePolicy(); //logByteSizeMergePolicy.setMinMergeMB(1); //logByteSizeMergePolicy.setMaxMergeMB(64); //logByteSizeMergePolicy.setMaxCFSSegmentSizeMB(64); //indexWriterConfig.setRAMBufferSizeMB(1024).setMergePolicy(logByteSizeMergePolicy).setUseCompoundFile(false); //GenericObjectPoolConfig genericObjectPoolConfig = new GenericObjectPoolConfig(); //?/*from www . j a v a2s. c o m*/ //genericObjectPoolConfig.setMaxWaitMillis(3000); //10s List<JedisShardInfo> shards = new ArrayList<>(); JedisShardInfo si = new JedisShardInfo("localhost", 6379, Constants.TIME_OUT); //JedisShardInfo si2 = new JedisShardInfo("localhost", 6380); shards.add(si); //shards.add(si2); JedisPoolConfig jedisPoolConfig = new JedisPoolConfig(); ShardedJedisPool shardedJedisPool = new ShardedJedisPool(jedisPoolConfig, shards); RedisDirectory redisDirectory = new RedisDirectory(new ShardedJedisPoolStream(shardedJedisPool)); IndexWriter indexWriter = new IndexWriter(redisDirectory, indexWriterConfig); for (int i = 0; i < 10000000; i++) { indexWriter.addDocument(addDocument(i)); } indexWriter.commit(); indexWriter.close(); redisDirectory.close(); long end = System.currentTimeMillis(); log.error("RedisDirectoryWithShardedJedisPool consumes {}s!", (end - start) / 1000); shardedJedisPool = new ShardedJedisPool(jedisPoolConfig, shards); start = System.currentTimeMillis(); IndexSearcher indexSearcher = new IndexSearcher( DirectoryReader.open(new RedisDirectory(new ShardedJedisPoolStream(shardedJedisPool)))); int total = 0; for (int i = 0; i < 10000000; i++) { TermQuery key1 = new TermQuery(new Term("key1", "key" + i)); TopDocs search = indexSearcher.search(key1, 10); total += search.totalHits; } System.out.println(total); end = System.currentTimeMillis(); log.error("RedisDirectoryWithShardedJedisPool search consumes {}ms!", (end - start)); }
From source file:cn.larry.search.book.index.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//w w w. j a v a2 s. c om boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmxm or -Xmx1g): // // iwc.setRAMBufferSizeMB(.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.adanac.module.blog.search.LuceneHelper.java
License:Apache License
private static void generateIndex(String path, String id, String title, String content, List<Map<String, String>> dataList) { try {/* w w w . j a va2 s .c o m*/ Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path)); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); indexWriterConfig.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, indexWriterConfig); for (Map<String, String> data : dataList) { Document document = new Document(); Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES); Field indexedContentField = new TextField("indexedContent", data.get(title) + SEPARATOR + data.get(content), Field.Store.YES); document.add(idField); document.add(indexedContentField); writer.addDocument(document); if (logger.isInfoEnabled()) { logger.info("add index for : [" + data.get(title) + "]"); } } writer.close(); } catch (Exception e) { logger.error("add index failed ...", e); } }