List of usage examples for org.apache.lucene.index IndexWriterConfig setInfoStream
public IndexWriterConfig setInfoStream(PrintStream printStream)
From source file:org.elasticsearch.index.engine.InternalEngine.java
License:Apache License
private IndexWriter createWriter(boolean create) throws IOException { try {/*from w w w. j av a2 s. c o m*/ final IndexWriterConfig iwc = new IndexWriterConfig(engineConfig.getAnalyzer()); iwc.setCommitOnClose(false); // we by default don't commit on close iwc.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); iwc.setIndexDeletionPolicy(deletionPolicy); // with tests.verbose, lucene sets this up: plumb to align with filesystem stream boolean verbose = false; try { verbose = Boolean.parseBoolean(System.getProperty("tests.verbose")); } catch (Throwable ignore) { } iwc.setInfoStream(verbose ? InfoStream.getDefault() : new LoggerInfoStream(logger)); iwc.setMergeScheduler(mergeScheduler); MergePolicy mergePolicy = config().getMergePolicy(); // Give us the opportunity to upgrade old segments while performing // background merges mergePolicy = new ElasticsearchMergePolicy(mergePolicy); iwc.setMergePolicy(mergePolicy); iwc.setSimilarity(engineConfig.getSimilarity()); iwc.setRAMBufferSizeMB(engineConfig.getIndexingBufferSize().mbFrac()); iwc.setCodec(engineConfig.getCodec()); /* We set this timeout to a highish value to work around * the default poll interval in the Lucene lock that is * 1000ms by default. We might need to poll multiple times * here but with 1s poll this is only executed twice at most * in combination with the default writelock timeout*/ iwc.setWriteLockTimeout(5000); iwc.setUseCompoundFile(this.engineConfig.isCompoundOnFlush()); // Warm-up hook for newly-merged segments. Warming up segments here is better since it will be performed at the end // of the merge operation and won't slow down _refresh iwc.setMergedSegmentWarmer(new IndexReaderWarmer() { @Override public void warm(LeafReader reader) throws IOException { try { LeafReader esLeafReader = new ElasticsearchLeafReader(reader, shardId); assert isMergedSegment(esLeafReader); if (warmer != null) { final Engine.Searcher searcher = new Searcher("warmer", searcherFactory.newSearcher(esLeafReader, null)); final IndicesWarmer.WarmerContext context = new IndicesWarmer.WarmerContext(shardId, searcher); warmer.warmNewReaders(context); } } catch (Throwable t) { // Don't fail a merge if the warm-up failed if (isClosed.get() == false) { logger.warn("Warm-up failed", t); } if (t instanceof Error) { // assertion/out-of-memory error, don't ignore those throw (Error) t; } } } }); return new IndexWriter(store.directory(), iwc); } catch (LockObtainFailedException ex) { boolean isLocked = IndexWriter.isLocked(store.directory()); logger.warn("Could not lock IndexWriter isLocked [{}]", ex, isLocked); throw ex; } }
From source file:org.eu.bitzone.Leia.java
License:Apache License
/** * Optimize the index./*from www . j a va 2 s.co m*/ */ public void optimize(final Object dialog) { final Thread t = new Thread() { @Override public void run() { IndexWriter iw = null; final Object optimizeButton = find(dialog, "optimizeButton"); setBoolean(optimizeButton, "enabled", false); final Object closeButton = find(dialog, "closeButton"); setBoolean(closeButton, "enabled", false); final Object msg = find(dialog, "msg"); final Object stat = find(dialog, "stat"); setString(stat, "text", "Running ..."); final PanelPrintWriter ppw = new PanelPrintWriter(Leia.this, msg); final boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected"); final boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected"); final boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected"); final boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected"); final Object tiiSpin = find(dialog, "tii"); final Object segnumSpin = find(dialog, "segnum"); final int tii = Integer.parseInt(getString(tiiSpin, "text")); final int segnum = Integer.parseInt(getString(segnumSpin, "text")); try { if (is != null) { is = null; } if (ir != null) { ir.close(); } if (ar != null) { ar.close(); } IndexDeletionPolicy policy; if (keep) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } final IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); if (!useLast) { final IndexCommit ic = ((DirectoryReader) ir).getIndexCommit(); if (ic != null) { cfg.setIndexCommit(ic); } } cfg.setIndexDeletionPolicy(policy); cfg.setTermIndexInterval(tii); final MergePolicy p = cfg.getMergePolicy(); cfg.setUseCompoundFile(useCompound); if (useCompound) { p.setNoCFSRatio(1.0); } cfg.setInfoStream(ppw); iw = new IndexWriter(dir, cfg); final long startSize = Util.calcTotalFileSize(pName, dir); final long startTime = System.currentTimeMillis(); if (expunge) { iw.forceMergeDeletes(); } else { if (segnum > 1) { iw.forceMerge(segnum, true); } else { iw.forceMerge(1, true); } } iw.commit(); final long endTime = System.currentTimeMillis(); final long endSize = Util.calcTotalFileSize(pName, dir); final long deltaSize = startSize - endSize; final String sign = deltaSize < 0 ? " Increased " : " Reduced "; final String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize)) + Util.normalizeUnit(Math.abs(deltaSize)); final String timeMsg = String.valueOf(endTime - startTime) + " ms"; showStatus(sizeMsg + " in " + timeMsg); iw.close(); setString(stat, "text", "Finished OK."); } catch (final Exception e) { e.printStackTrace(ppw); setString(stat, "text", "ERROR - aborted."); errorMsg("ERROR optimizing: " + e.toString()); if (iw != null) { try { iw.close(); } catch (final Exception e1) { } } } finally { setBoolean(closeButton, "enabled", true); } try { actionReopen(); is = new IndexSearcher(ir); // add dialog again add(dialog); } catch (final Exception e) { e.printStackTrace(ppw); errorMsg("ERROR reopening after optimize:\n" + e.getMessage()); } } }; t.start(); }
From source file:org.getopt.luke.Luke.java
License:Apache License
/** * Optimize the index.//from w w w.j av a 2s . c o m */ public void optimize(final Object dialog) { Thread t = new Thread() { public void run() { IndexWriter iw = null; Object optimizeButton = find(dialog, "optimizeButton"); setBoolean(optimizeButton, "enabled", false); Object closeButton = find(dialog, "closeButton"); setBoolean(closeButton, "enabled", false); Object msg = find(dialog, "msg"); Object stat = find(dialog, "stat"); setString(stat, "text", "Running ..."); PanelPrintWriter ppw = new PanelPrintWriter(Luke.this, msg); boolean useCompound = getBoolean(find(dialog, "optCompound"), "selected"); boolean expunge = getBoolean(find(dialog, "optExpunge"), "selected"); boolean keep = getBoolean(find(dialog, "optKeepAll"), "selected"); boolean useLast = getBoolean(find(dialog, "optLastCommit"), "selected"); Object tiiSpin = find(dialog, "tii"); Object segnumSpin = find(dialog, "segnum"); int tii = Integer.parseInt(getString(tiiSpin, "text")); int segnum = Integer.parseInt(getString(segnumSpin, "text")); try { if (is != null) is = null; if (ir != null) ir.close(); if (ar != null) ar.close(); IndexDeletionPolicy policy; if (keep) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); if (!useLast) { IndexCommit ic = ((DirectoryReader) ir).getIndexCommit(); if (ic != null) { cfg.setIndexCommit(ic); } } cfg.setIndexDeletionPolicy(policy); cfg.setTermIndexInterval(tii); cfg.setUseCompoundFile(useCompound); cfg.setInfoStream(ppw); iw = new IndexWriter(dir, cfg); long startSize = Util.calcTotalFileSize(pName, dir); long startTime = System.currentTimeMillis(); if (expunge) { iw.forceMergeDeletes(); } else { if (segnum > 1) { iw.forceMerge(segnum, true); } else { iw.forceMerge(1, true); } } iw.commit(); long endTime = System.currentTimeMillis(); long endSize = Util.calcTotalFileSize(pName, dir); long deltaSize = startSize - endSize; String sign = deltaSize < 0 ? " Increased " : " Reduced "; String sizeMsg = sign + Util.normalizeSize(Math.abs(deltaSize)) + Util.normalizeUnit(Math.abs(deltaSize)); String timeMsg = String.valueOf(endTime - startTime) + " ms"; showStatus(sizeMsg + " in " + timeMsg); iw.close(); setString(stat, "text", "Finished OK."); } catch (Exception e) { e.printStackTrace(ppw); setString(stat, "text", "ERROR - aborted."); errorMsg("ERROR optimizing: " + e.toString()); if (iw != null) try { iw.close(); } catch (Exception e1) { } } finally { setBoolean(closeButton, "enabled", true); } try { actionReopen(); is = new IndexSearcher(ir); // add dialog again add(dialog); } catch (Exception e) { e.printStackTrace(ppw); errorMsg("ERROR reopening after optimize:\n" + e.getMessage()); } } }; t.start(); }
From source file:org.hibernate.search.test.util.logging.LoggerInfoStreamTest.java
License:LGPL
@Test public void testEnableInfoStream() throws Exception { LoggerInfoStream infoStream = new LoggerInfoStream(); RAMDirectory directory = new RAMDirectory(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(new StandardAnalyzer()); indexWriterConfig.setInfoStream(infoStream); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); Document doc = new Document(); doc.add(new StringField("f1", "value1", Field.Store.YES)); indexWriter.addDocument(doc);// w ww. j av a2 s . c om indexWriter.commit(); indexWriter.close(); List<LoggingEvent> loggingEvents = testAppender .searchByLoggerAndMessage(LogCategory.INFOSTREAM_LOGGER_CATEGORY.getName(), "IW:"); assertFalse(loggingEvents.isEmpty()); }
From source file:perf.AutoPrefixPerf.java
License:Apache License
public static void main(String[] args) throws Exception { String numbersFile = args[0]; String queriesFile = args[1]; Path indexPath = Paths.get(args[2]); int precStep = Integer.parseInt(args[3]); boolean useNumericField = (precStep != 0); int maxTermsInPrefix; int minTermsInPrefix; if (useNumericField == false) { minTermsInPrefix = Integer.parseInt(args[4]); maxTermsInPrefix = Integer.parseInt(args[5]); } else {//w w w . ja v a2 s .com minTermsInPrefix = 0; maxTermsInPrefix = 0; } BytesRefBuilder binaryToken = new BytesRefBuilder(); binaryToken.grow(8); binaryToken.setLength(8); Directory dir = FSDirectory.open(indexPath); if (Files.notExists(indexPath) == false) { IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer()); iwc.setMaxBufferedDocs(30000); iwc.setRAMBufferSizeMB(-1); iwc.setMergePolicy(new LogDocMergePolicy()); final PostingsFormat pf; if (useNumericField) { // Disable auto-prefix when testing NumericField! if (minTermsInPrefix != 0) { throw new IllegalArgumentException("only precStep or minTermsInPrefix should be non-zero"); } pf = new Lucene50PostingsFormat(25, 48, 0, 0); } else { /* if (minTermsInPrefix == 0) { throw new IllegalArgumentException("one of precStep or minTermsInPrefix must be non-zero"); } */ pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, maxTermsInPrefix); //pf = new Lucene50PostingsFormat(25, 48, minTermsInPrefix, Integer.MAX_VALUE); } iwc.setCodec(new Lucene53Codec() { @Override public PostingsFormat getPostingsFormatForField(String field) { return pf; } }); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); iwc.setMergeScheduler(new SerialMergeScheduler()); //TieredMergePolicy tmp = (TieredMergePolicy) iwc.getMergePolicy(); //tmp.setFloorSegmentMB(.1); //ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) iwc.getMergeScheduler(); // More concurrency (for SSD) //cms.setMaxMergesAndThreads(5, 3); final IndexWriter w = new IndexWriter(dir, iwc); Document doc = new Document(); Field field; if (useNumericField) { FieldType longFieldType = new FieldType(LongField.TYPE_NOT_STORED); longFieldType.setNumericPrecisionStep(precStep); longFieldType.freeze(); field = new LongField("number", 0L, longFieldType); doc.add(field); } else { FieldType longFieldType = new FieldType(TextField.TYPE_NOT_STORED); longFieldType.setIndexOptions(IndexOptions.DOCS_ONLY); longFieldType.setOmitNorms(true); longFieldType.setIndexRanges(true); longFieldType.freeze(); field = new Field("number", new BinaryTokenStream(binaryToken.get()), longFieldType); doc.add(field); } long startMS = System.currentTimeMillis(); // 64K buffer: InputStream is = new FileInputStream(numbersFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16); int count = 0; while (true) { String line = reader.readLine(); if (line == null) { break; } Long v = Long.parseLong(line.trim()); if (useNumericField) { field.setLongValue(v); } else { //NumericUtils.longToPrefixCoded(v, 0, binaryToken); longToBytes(v, binaryToken); //if (bytesToLong(binaryToken.get()) != v) { // throw new RuntimeException("wrong long: v=" + v + " vs " + bytesToLong(binaryToken.get())); //} } w.addDocument(doc); count++; if (count % 200000 == 0) { long ms = System.currentTimeMillis(); System.out.println("Indexed " + count + ": " + ((ms - startMS) / 1000.0) + " sec"); } } reader.close(); System.out.println( "Final Indexed " + count + ": " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec"); // nocommit just to make debugging easier: //System.out.println("Optimize..."); //w.forceMerge(1); System.out.println("Close..."); w.close(); System.out.println("After close: " + ((System.currentTimeMillis() - startMS) / 1000.0) + " sec"); // Print CheckIndex: ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8), true); CheckIndex.Status status = checker.checkIndex(); System.out.println("Done CheckIndex:"); System.out.println(bos.toString(IOUtils.UTF_8)); if (status.clean == false) { throw new IllegalStateException("CheckIndex failed"); } SegmentInfos infos = new SegmentInfos(); infos.read(dir); long totBytes = 0; for (SegmentCommitInfo info : infos) { totBytes += info.sizeInBytes(); } System.out.println("\nTotal index size: " + totBytes + " bytes"); } else { System.out.println("Skip indexing: index already exists"); } List<Query> queries = new ArrayList<>(); InputStream is = new FileInputStream(queriesFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16); while (true) { String line = reader.readLine(); if (line == null) { break; } String[] numbers = line.trim().split(" "); if (numbers.length != 2) { throw new IllegalArgumentException("could not parse query line: " + line); } long minValue = Long.parseLong(numbers[0]); long maxValue = Long.parseLong(numbers[1]); if (useNumericField) { queries.add(NumericRangeQuery.newLongRange("number", precStep, minValue, maxValue, true, true)); } else { longToBytes(minValue, binaryToken); BytesRef minTerm = binaryToken.toBytesRef(); longToBytes(maxValue, binaryToken); BytesRef maxTerm = binaryToken.toBytesRef(); queries.add(new TermRangeQuery("number", minTerm, maxTerm, true, true)); } if (queries.size() == 200) { break; } } DirectoryReader r = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(r); s.setQueryCache(null); // don't bench the cache printQueryTerms((MultiTermQuery) queries.get(0), s); long bestMS = Long.MAX_VALUE; for (int iter = 0; iter < 10; iter++) { long startMS = System.currentTimeMillis(); long totalHits = 0; long hash = 0; for (Query query : queries) { TopDocs hits = s.search(query, 10); totalHits += hits.totalHits; hash = hash * 31 + hits.totalHits; } long ms = System.currentTimeMillis() - startMS; System.out.println("iter " + iter + ": " + ms + " msec; totalHits=" + totalHits + " hash=" + hash); if (ms < bestMS) { System.out.println(" **"); bestMS = ms; } } /* long t0 = System.currentTimeMillis(); long bytesUsed = 0; for(int i=0;i<1000;i++) { for(AtomicReaderContext ctx : r.leaves()) { bytesUsed += ((SegmentReader) ctx.reader()).ramBytesUsed(); } } System.out.println((System.currentTimeMillis() - t0) + " msec for 1000 ramBytesUsed: " + (bytesUsed / 1000)); */ r.close(); dir.close(); }
From source file:perf.IndexAndSearchOpenStreetMaps.java
License:Apache License
private static void createIndex(boolean fast, boolean doForceMerge, boolean doDistanceSort) throws IOException, InterruptedException { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder().onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); int BUFFER_SIZE = 1 << 16; // 64K InputStream is;//from w w w . j av a2s .co m if (SMALL) { is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.subsetPlusAllLondon.txt")); } else { is = Files.newInputStream(Paths.get(DATA_LOCATION, "latlon.txt")); } BufferedReader reader = new BufferedReader(new InputStreamReader(is, decoder), BUFFER_SIZE); int NUM_THREADS; if (fast) { NUM_THREADS = 4; } else { NUM_THREADS = 1; } int CHUNK = 10000; long t0 = System.nanoTime(); AtomicLong totalCount = new AtomicLong(); for (int part = 0; part < NUM_PARTS; part++) { Directory dir = FSDirectory.open(Paths.get(getName(part, doDistanceSort))); IndexWriterConfig iwc = new IndexWriterConfig(null); iwc.setCodec(getCodec(fast)); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); if (fast) { ((TieredMergePolicy) iwc.getMergePolicy()).setMaxMergedSegmentMB(Double.POSITIVE_INFINITY); iwc.setRAMBufferSizeMB(1024); } else { iwc.setMaxBufferedDocs(109630); iwc.setMergePolicy(new LogDocMergePolicy()); iwc.setMergeScheduler(new SerialMergeScheduler()); } iwc.setInfoStream(new PrintStreamInfoStream(System.out)); IndexWriter w = new IndexWriter(dir, iwc); Thread[] threads = new Thread[NUM_THREADS]; AtomicBoolean finished = new AtomicBoolean(); Object lock = new Object(); final int finalPart = part; for (int t = 0; t < NUM_THREADS; t++) { threads[t] = new Thread() { @Override public void run() { String[] lines = new String[CHUNK]; int chunkCount = 0; while (finished.get() == false) { try { int count = CHUNK; synchronized (lock) { for (int i = 0; i < CHUNK; i++) { String line = reader.readLine(); if (line == null) { count = i; finished.set(true); break; } lines[i] = line; } if (finalPart == 0 && totalCount.get() + count >= 2000000000) { finished.set(true); } } for (int i = 0; i < count; i++) { String[] parts = lines[i].split(","); //long id = Long.parseLong(parts[0]); double lat = Double.parseDouble(parts[1]); double lon = Double.parseDouble(parts[2]); Document doc = new Document(); if (useGeoPoint) { doc.add(new GeoPointField("point", lat, lon, Field.Store.NO)); } else if (useGeo3D || useGeo3DLarge) { doc.add(new Geo3DPoint("point", lat, lon)); } else { doc.add(new LatLonPoint("point", lat, lon)); if (doDistanceSort) { doc.add(new LatLonDocValuesField("point", lat, lon)); } } w.addDocument(doc); long x = totalCount.incrementAndGet(); if (x % 1000000 == 0) { System.out.println(x + "..."); } } chunkCount++; if (false && SMALL == false && chunkCount == 20000) { System.out.println("NOW BREAK EARLY"); break; } } catch (IOException ioe) { throw new RuntimeException(ioe); } } } }; threads[t].start(); } for (Thread thread : threads) { thread.join(); } System.out.println("Part " + part + " is done: w.maxDoc()=" + w.maxDoc()); w.commit(); System.out.println("done commit"); long t1 = System.nanoTime(); System.out.println(((t1 - t0) / 1000000000.0) + " sec to index part " + part); if (doForceMerge) { w.forceMerge(1); long t2 = System.nanoTime(); System.out.println(((t2 - t1) / 1000000000.0) + " sec to force merge part " + part); } w.close(); } //System.out.println(totalCount.get() + " total docs"); //System.out.println("Force merge..."); //w.forceMerge(1); //long t2 = System.nanoTime(); //System.out.println(((t2-t1)/1000000000.0) + " sec to force merge"); //w.close(); //long t3 = System.nanoTime(); //System.out.println(((t3-t2)/1000000000.0) + " sec to close"); //System.out.println(((t3-t2)/1000000000.0) + " sec to close"); }
From source file:perf.IndexGeoNames.java
License:Apache License
public static void main(String[] args) throws Exception { String geoNamesFile = args[0]; File indexPath = new File(args[1]); int numThreads = Integer.parseInt(args[2]); int precStep = Integer.parseInt(args[3]); if (indexPath.exists()) { throw new IllegalArgumentException("please remove indexPath \"" + indexPath + "\" before running"); }// w w w . java 2 s . c om Directory dir = FSDirectory.open(indexPath); //IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48)); IndexWriterConfig iwc = new IndexWriterConfig(new StandardAnalyzer()); //iwc.setRAMBufferSizeMB(350); iwc.setInfoStream(new PrintStreamInfoStream(System.out)); if (normal == false) { iwc.setRAMBufferSizeMB(1024); iwc.setMergePolicy(NoMergePolicy.INSTANCE); //iwc.setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES); } else { // 5/5 segments: iwc.setMaxBufferedDocs(157234); iwc.setRAMBufferSizeMB(-1); } //((ConcurrentMergeScheduler) iwc.getMergeScheduler()).setMaxMergesAndThreads(3, 1); final IndexWriter w = new IndexWriter(dir, iwc); final Field.Store store = Field.Store.NO; final FieldType doubleFieldType = new FieldType( store == Field.Store.NO ? DoubleField.TYPE_NOT_STORED : DoubleField.TYPE_STORED); doubleFieldType.setNumericPrecisionStep(precStep); doubleFieldType.freeze(); final FieldType longFieldType = new FieldType( store == Field.Store.NO ? LongField.TYPE_NOT_STORED : LongField.TYPE_STORED); longFieldType.setNumericPrecisionStep(precStep); longFieldType.freeze(); final FieldType intFieldType = new FieldType( store == Field.Store.NO ? IntField.TYPE_NOT_STORED : IntField.TYPE_STORED); intFieldType.setNumericPrecisionStep(precStep); intFieldType.freeze(); // 64K buffer: InputStream is = new FileInputStream(geoNamesFile); final BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16); final AtomicInteger docsIndexed = new AtomicInteger(); final long startMS = System.currentTimeMillis(); Thread[] threads = new Thread[numThreads]; // With reuse it's ~ 38% faster (41.8 sec vs 67.0 sec): final boolean reuseDocAndFields = false; for (int i = 0; i < numThreads; i++) { threads[i] = new Thread() { @Override public void run() { ParsePosition datePos = new ParsePosition(0); SimpleDateFormat dateParser = new SimpleDateFormat("yyyy-MM-dd", Locale.US); if (reuseDocAndFields) { Document doc = new Document(); IntField geoNameID = new IntField("geoNameID", 0, intFieldType); doc.add(geoNameID); TextField nameField = new TextField("name", "", store); doc.add(nameField); TextField asciiNameField = new TextField("asciiName", "", store); doc.add(asciiNameField); TextField alternateNameField = new TextField("alternateNames", "", store); doc.add(alternateNameField); StringField featureClassField = new StringField("featureClass", "", store); doc.add(featureClassField); StringField featureCodeField = new StringField("featureCode", "", store); doc.add(featureCodeField); StringField countryCodeField = new StringField("countryCode", "", store); doc.add(countryCodeField); StringField cc2Field = new StringField("cc2", "", store); doc.add(cc2Field); StringField admin1Field = new StringField("admin1", "", store); doc.add(admin1Field); StringField admin2Field = new StringField("admin2", "", store); doc.add(admin2Field); StringField admin3Field = new StringField("admin3", "", store); doc.add(admin3Field); StringField admin4Field = new StringField("admin4", "", store); doc.add(admin4Field); StringField tzField = new StringField("timezone", "", store); doc.add(tzField); while (true) { try { // Curiously BufferedReader.readLine seems to be thread-safe... String line = reader.readLine(); if (line == null) { break; } String[] values = line.split("\t"); geoNameID.setIntValue(Integer.parseInt(values[0])); nameField.setStringValue(values[1]); asciiNameField.setStringValue(values[2]); alternateNameField.setStringValue(values[3]); /* if (values[4].isEmpty() == false) { double v = Double.parseDouble(values[4]); doc.add(new DoubleField("latitude", v, doubleFieldType)); doc.add(new DoubleDocValuesField("latitude", v)); } if (values[5].isEmpty() == false) { double v = Double.parseDouble(values[5]); doc.add(new DoubleField("longitude", v, doubleFieldType)); doc.add(new DoubleDocValuesField("longitude", v)); } */ featureClassField.setStringValue(values[6]); featureCodeField.setStringValue(values[7]); countryCodeField.setStringValue(values[8]); cc2Field.setStringValue(values[9]); admin1Field.setStringValue(values[10]); admin2Field.setStringValue(values[11]); admin3Field.setStringValue(values[12]); admin4Field.setStringValue(values[13]); /* if (values[14].isEmpty() == false) { long v = Long.parseLong(values[14]); doc.add(new LongField("population", v, longFieldType)); doc.add(new NumericDocValuesField("population", v)); } if (values[15].isEmpty() == false) { long v = Long.parseLong(values[15]); doc.add(new LongField("elevation", v, longFieldType)); doc.add(new NumericDocValuesField("elevation", v)); } if (values[16].isEmpty() == false) { doc.add(new IntField("dem", Integer.parseInt(values[16]), intFieldType)); } */ tzField.setStringValue(values[17]); /* if (values[18].isEmpty() == false) { datePos.setIndex(0); Date date = dateParser.parse(values[18], datePos); doc.add(new LongField("modified", date.getTime(), longFieldType)); } */ w.addDocument(doc); int count = docsIndexed.incrementAndGet(); if (count % 200000 == 0) { long ms = System.currentTimeMillis(); System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec"); } } catch (Exception e) { throw new RuntimeException(e); } } } else { while (true) { try { // Curiously BufferedReader.readLine seems to be thread-safe... String line = reader.readLine(); if (line == null) { break; } String[] values = line.split("\t"); Document doc = new Document(); doc.add(new IntField("geoNameID", Integer.parseInt(values[0]), intFieldType)); doc.add(new TextField("name", values[1], store)); doc.add(new TextField("asciiName", values[2], store)); doc.add(new TextField("alternateNames", values[3], store)); if (values[4].isEmpty() == false) { double v = Double.parseDouble(values[4]); doc.add(new DoubleField("latitude", v, doubleFieldType)); doc.add(new DoubleDocValuesField("latitude", v)); } if (values[5].isEmpty() == false) { double v = Double.parseDouble(values[5]); doc.add(new DoubleField("longitude", v, doubleFieldType)); doc.add(new DoubleDocValuesField("longitude", v)); } doc.add(new StringField("featureClass", values[6], store)); doc.add(new StringField("featureCode", values[7], store)); doc.add(new StringField("countryCode", values[8], store)); doc.add(new StringField("cc2", values[9], store)); doc.add(new StringField("admin1Code", values[10], store)); doc.add(new StringField("admin2Code", values[11], store)); doc.add(new StringField("admin3Code", values[12], store)); doc.add(new StringField("admin4Code", values[13], store)); if (values[14].isEmpty() == false) { long v = Long.parseLong(values[14]); doc.add(new LongField("population", v, longFieldType)); doc.add(new NumericDocValuesField("population", v)); } if (values[15].isEmpty() == false) { long v = Long.parseLong(values[15]); doc.add(new LongField("elevation", v, longFieldType)); doc.add(new NumericDocValuesField("elevation", v)); } if (values[16].isEmpty() == false) { doc.add(new IntField("dem", Integer.parseInt(values[16]), intFieldType)); } doc.add(new StringField("timezone", values[17], store)); if (values[18].isEmpty() == false) { datePos.setIndex(0); Date date = dateParser.parse(values[18], datePos); doc.add(new LongField("modified", date.getTime(), longFieldType)); } w.addDocument(doc); int count = docsIndexed.incrementAndGet(); if (count % 200000 == 0) { long ms = System.currentTimeMillis(); System.out.println(count + ": " + ((ms - startMS) / 1000.0) + " sec"); } } catch (Exception e) { throw new RuntimeException(e); } } } } }; threads[i].start(); } DirectoryReader r = DirectoryReader.open(w, true); for (int i = 0; i < 100; i++) { DirectoryReader r2 = DirectoryReader.openIfChanged(r); if (r2 != null) { r.close(); r = r2; } Thread.sleep(500); } if (r != null) { r.close(); r = null; } for (int i = 0; i < numThreads; i++) { threads[i].join(); } long ms = System.currentTimeMillis(); System.out.println(docsIndexed + ": " + ((ms - startMS) / 1000.0) + " sec"); //System.out.println("tot conflicts: " + BytesRefHash.totConflict); //w.shutdown(normal); w.close(); dir.close(); }
From source file:stroom.index.server.IndexShardWriterImpl.java
License:Apache License
private synchronized boolean doOpen(final boolean create) { boolean success = false; try {//from www . ja v a2s . c om // Never open deleted index shards. if (IndexShardStatus.DELETED.equals(indexShard.getStatus())) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Shard is deleted " + indexShard); } return false; } // Don't open old index shards for writing. final Version currentVersion = LuceneVersionUtil .getLuceneVersion(LuceneVersionUtil.getCurrentVersion()); final Version shardVersion = LuceneVersionUtil.getLuceneVersion(indexShard.getIndexVersion()); if (!shardVersion.equals(currentVersion)) { if (LOGGER.isDebugEnabled()) { LOGGER.debug("Shard version is different to current version " + indexShard); } return false; } final long startMs = System.currentTimeMillis(); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Opening " + indexShard); } if (create) { // Make sure the index directory does not exist. If one does // then throw an exception // as we don't want to overwrite an index. if (Files.isDirectory(dir)) { // This is a workaround for lingering .nfs files. Files.list(dir).forEach(file -> { if (Files.isDirectory(file) || !file.getFileName().startsWith(".")) { throw new IndexException("Attempting to create a new index in \"" + dir.toAbsolutePath().toString() + "\" but one already exists."); } }); } else { // Try and make all required directories. try { Files.createDirectories(dir); } catch (final IOException e) { throw new IndexException("Unable to create directories for new index in \"" + dir.toAbsolutePath().toString() + "\""); } } } // Create lucene directory object. directory = new NIOFSDirectory(dir, SimpleFSLockFactory.INSTANCE); analyzerWrapper.setVersion(shardVersion); final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzerWrapper); // In debug mode we do extra trace in LUCENE and we also count // certain logging info like merge and flush // counts, so you can get this later using the trace method. if (LOGGER.isDebugEnabled()) { loggerPrintStream = new LoggerPrintStream(LOGGER); for (final String term : LOG_WATCH_TERMS.values()) { loggerPrintStream.addWatchTerm(term); } indexWriterConfig.setInfoStream(loggerPrintStream); } // IndexWriter to use for adding data to the index. indexWriter = new IndexWriter(directory, indexWriterConfig); final LiveIndexWriterConfig liveIndexWriterConfig = indexWriter.getConfig(); liveIndexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB); // TODO : We might still want to write separate segments I'm not // sure on pros/cons? liveIndexWriterConfig.setUseCompoundFile(false); liveIndexWriterConfig.setMaxBufferedDocs(Integer.MAX_VALUE); // Check the number of committed docs in this shard. documentCount.set(indexWriter.numDocs()); lastDocumentCount = documentCount.get(); if (create) { if (lastDocumentCount != 0) { LOGGER.error("Index should be new but already contains docs: " + lastDocumentCount); } } else if (indexShard.getDocumentCount() != lastDocumentCount) { LOGGER.error("Mismatch document count. Index says " + lastDocumentCount + " DB says " + indexShard.getDocumentCount()); } // We have opened the index so update the DB object. setStatus(IndexShardStatus.OPEN); // Output some debug. if (LOGGER.isDebugEnabled()) { LOGGER.debug("getIndexWriter() - Opened " + indexShard + " in " + (System.currentTimeMillis() - startMs) + "ms"); } success = true; } catch (final LockObtainFailedException t) { LOGGER.warn(t.getMessage()); } catch (final Throwable t) { LOGGER.error(t.getMessage(), t); } return success; }