List of usage examples for org.apache.lucene.index IndexWriterConfig setInfoStream
public IndexWriterConfig setInfoStream(PrintStream printStream)
From source file:IndexAndSearchOpenStreetMaps1D.java
License:Apache License
private static void createIndex() throws IOException { long t0 = System.nanoTime(); CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); int BUFFER_SIZE = 1 << 16; // 64K InputStream is = Files .newInputStream(Paths.get("/lucenedata/open-street-maps/latlon.subsetPlusAllLondon.txt")); BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); Directory dir = FSDirectory.open(Paths.get("/c/tmp/bkdtest1d" + (USE_NF ? "_nf" : ""))); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //iwc.setMaxBufferedDocs(109630); //iwc.setRAMBufferSizeMB(IndexWriterConfig.DISABLE_AUTO_FLUSH); iwc.setRAMBufferSizeMB(256.0);/* w w w . jav a 2 s . c o m*/ iwc.setMergePolicy(new LogDocMergePolicy()); iwc.setMergeScheduler(new SerialMergeScheduler()); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); IndexWriter w = new IndexWriter(dir, iwc); int count = 0; byte[] scratch = new byte[4]; while (true) { String line = reader.readLine(); if (line == null) { break; } String[] parts = line.split(","); //long id = Long.parseLong(parts[0]); int lat = (int) (1000000. * Double.parseDouble(parts[1])); //int lon = (int) (1000000. * Double.parseDouble(parts[2])); Document doc = new Document(); if (USE_NF) { doc.add(new LegacyIntField("latnum", lat, Field.Store.NO)); //doc.add(new LongField("lonnum", lon, Field.Store.NO)); } else { doc.add(new IntPoint("lat", lat)); //doc.add(new SortedNumericDocValuesField("lon", lon)); } w.addDocument(doc); count++; if (count % 1000000 == 0) { System.out.println(count + "..."); } } //w.forceMerge(1); w.commit(); System.out.println(w.maxDoc() + " total docs"); w.close(); long t1 = System.nanoTime(); System.out.println(((t1 - t0) / 1000000000.0) + " sec to build index"); }
From source file:IndexTaxis.java
License:Apache License
public static void main(String[] args) throws Exception { Path indexPath = Paths.get(args[0]); Directory dir = FSDirectory.open(indexPath); int threadCount = Integer.parseInt(args[1]); Path docsPath = Paths.get(args[2]); IndexWriterConfig iwc = new IndexWriterConfig(); //System.out.println("NOW SET INFO STREAM"); iwc.setRAMBufferSizeMB(1024.);//from ww w .j a v a 2 s .c om iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).disableAutoIOThrottle(); final IndexWriter w = new IndexWriter(dir, iwc); BufferedInputStream docs = new BufferedInputStream(Files.newInputStream(docsPath, StandardOpenOption.READ)); // parse the header fields List<String> fieldsList = new ArrayList<>(); StringBuilder builder = new StringBuilder(); while (true) { int x = docs.read(); if (x == -1) { throw new IllegalArgumentException( "hit EOF while trying to read CSV header; are you sure you have the right CSV file!"); } byte b = (byte) x; if (b == NEWLINE) { fieldsList.add(builder.toString()); break; } else if (b == COMMA) { fieldsList.add(builder.toString()); builder.setLength(0); } else { // this is OK because headers are all ascii: builder.append((char) b); } } final String[] fields = fieldsList.toArray(new String[fieldsList.size()]); Thread[] threads = new Thread[threadCount]; final AtomicInteger docCounter = new AtomicInteger(); final AtomicLong bytesCounter = new AtomicLong(); startNS = System.nanoTime(); for (int i = 0; i < threadCount; i++) { final int threadID = i; threads[i] = new Thread() { @Override public void run() { try { _run(); } catch (Exception e) { throw new RuntimeException(e); } } private void _run() throws IOException { while (true) { Chunk chunk = readChunk(docs); if (chunk == null) { break; } indexOneChunk(fields, chunk, w, docCounter, bytesCounter); } } }; threads[i].start(); } for (int i = 0; i < threadCount; i++) { threads[i].join(); } System.out.println("Indexing done; now close"); w.close(); docs.close(); }
From source file:com.mathworks.xzheng.indexing.Fragments.java
License:Apache License
public void setInfoStream() throws Exception { Directory dir = null;//ww w .j ava 2 s . com Analyzer analyzer = null; IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); // START IndexWriter writer = new IndexWriter(dir, config); config.setInfoStream(System.out); // END }
From source file:com.mathworks.xzheng.indexing.VerboseIndexing.java
License:Apache License
private void index() throws IOException { Directory dir = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)); IndexWriter writer = new IndexWriter(dir, config); config.setInfoStream(System.out); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.add(new Field("keyword", "goober", Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc);/*from w ww . ja va 2 s . c o m*/ } //writer.optimize(); writer.forceMerge(Integer.MAX_VALUE); writer.close(); }
From source file:com.study.lucene.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w w w. j a va2s.c o m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setInfoStream(System.out); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:io.datalayer.lucene.helper.AosUtil.java
License:Apache License
public void setInfoStream() throws Exception { Directory dir = null;/*from w ww . j a va 2s . com*/ IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_46, AosAnalyser.NO_LIMIT_TOKEN_COUNT_SIMPLE_ANALYSER); conf.setInfoStream(System.out); IndexWriter writer = new IndexWriter(dir, conf); }
From source file:io.datalayer.lucene.index.VerboseIndexing.java
License:Apache License
private void index() throws IOException { Directory dir = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, AosAnalyser.NO_LIMIT_TOKEN_COUNT_SIMPLE_ANALYSER); config.setInfoStream(System.out); IndexWriter writer = new IndexWriter(dir, config); for (int i = 0; i < 100; i++) { Document doc = new Document(); doc.add(new StoredField("keyword", "goober")); writer.addDocument(doc);//from w w w . j a v a 2s.co m } writer.close(); }
From source file:org.apache.blur.lucene.index.BlurIndexWriterTest.java
License:Apache License
@Test public void testIndexRelocationFencing() throws CorruptIndexException, LockObtainFailedException, IOException, InterruptedException { final AtomicBoolean fail1 = new AtomicBoolean(); final AtomicBoolean fail2 = new AtomicBoolean(); final Path hdfsDirPath = new Path(toUri("./target/tmp/BlurIndexWriterTest")); final Directory directory = new RAMDirectory(); Thread thread1 = new Thread(new Runnable() { @Override/* w w w. jav a 2 s .c o m*/ public void run() { BlurIndexWriter writer = null; try { BlurLockFactory blurLockFactory = new BlurLockFactory(_configuration, hdfsDirPath, "node1", "1"); directory.setLockFactory(blurLockFactory); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); conf.setInfoStream(getInfoStream()); writer = new BlurIndexWriter(directory, conf); writer.addIndexes(addDir("1")); waitToLooseLock(); writer.prepareCommit(); fail1.set(true); } catch (IOException e) { e.printStackTrace(); if (writer != null) { try { writer.rollback(); } catch (IOException e1) { e1.printStackTrace(); } } if (writer != null) { try { writer.close(); } catch (IOException e1) { e1.printStackTrace(); } } } } }); Thread thread2 = new Thread(new Runnable() { @Override public void run() { try { waitForDirInThread1ToBeAdded(directory); BlurLockFactory blurLockFactory = new BlurLockFactory(_configuration, hdfsDirPath, "node2", "2"); directory.setLockFactory(blurLockFactory); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_43, new KeywordAnalyzer()); conf.setInfoStream(getInfoStream()); BlurIndexWriter writer = new BlurIndexWriter(directory, conf); obtainLock(); writer.addIndexes(addDir("2")); writer.commit(); writer.close(); } catch (IOException e) { e.printStackTrace(); fail2.set(true); } } }); thread1.start(); thread2.start(); thread1.join(); thread2.join(); if (fail1.get()) { fail(); } if (fail2.get()) { fail(); } DirectoryReader reader = DirectoryReader.open(directory); List<AtomicReaderContext> leaves = reader.leaves(); assertEquals(leaves.size(), 1); assertEquals(reader.numDocs(), 1); Document document = reader.document(0); assertEquals("2", document.get("f")); reader.close(); }
From source file:org.apache.solr.update.SolrIndexConfig.java
License:Apache License
public IndexWriterConfig toIndexWriterConfig(IndexSchema schema) { // so that we can update the analyzer on core reload, we pass null // for the default analyzer, and explicitly pass an analyzer on // appropriate calls to IndexWriter IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, null); if (maxBufferedDocs != -1) iwc.setMaxBufferedDocs(maxBufferedDocs); if (ramBufferSizeMB != -1) iwc.setRAMBufferSizeMB(ramBufferSizeMB); if (termIndexInterval != -1) iwc.setTermIndexInterval(termIndexInterval); if (writeLockTimeout != -1) iwc.setWriteLockTimeout(writeLockTimeout); iwc.setSimilarity(schema.getSimilarity()); iwc.setMergePolicy(buildMergePolicy(schema)); iwc.setMergeScheduler(buildMergeScheduler(schema)); iwc.setInfoStream(infoStream); // do this after buildMergePolicy since the backcompat logic // there may modify the effective useCompoundFile iwc.setUseCompoundFile(getUseCompoundFile()); if (maxIndexingThreads != -1) { iwc.setMaxThreadStates(maxIndexingThreads); }/*from w w w . jav a 2 s. c om*/ if (mergedSegmentWarmerInfo != null) { // TODO: add infostream -> normal logging system (there is an issue somewhere) IndexReaderWarmer warmer = schema.getResourceLoader().newInstance(mergedSegmentWarmerInfo.className, IndexReaderWarmer.class, null, new Class[] { InfoStream.class }, new Object[] { iwc.getInfoStream() }); iwc.setMergedSegmentWarmer(warmer); } return iwc; }
From source file:org.elasticsearch.index.engine.internal.AsynchronousEngine.java
License:Apache License
private IndexWriter createWriter() throws IOException { try {/*ww w. j av a 2 s.c o m*/ boolean create = !Lucene.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); config.setInfoStream(new LoggerInfoStream(indexSettings, shardId)); config.setMergeScheduler(mergeScheduler.newMergeScheduler()); MergePolicy mergePolicy = mergePolicyProvider.getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); config.setMergePolicy(mergePolicy); config.setSimilarity(similarityService.similarity()); config.setRAMBufferSizeMB(indexingBufferSize.mbFrac()); config.setMaxThreadStates(indexConcurrency); config.setCodec(codecService.codec(codecName)); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ config.setWriteLockTimeout(5000); config.setUseCompoundFile(this.compoundOnFlush); config.setCheckIntegrityAtMerge(checksumOnMerge); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh config.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(AtomicReader reader) throws IOException { try { assert isMergedSegment(reader); if (warmer != null) { final Engine.Searcher searcher = new SimpleSearcher("warmer", new IndexSearcher(reader)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (!closed) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), config); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }