Java tutorial
/* * Copyright 2011 Peter Karich info@jetsli.de * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.jetsli.lumeo; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import de.jetsli.lumeo.util.Helper; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.SearcherWarmer; import org.apache.lucene.search.TotalHitCountCollector; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import java.util.Map; import java.io.File; import org.apache.lucene.store.Directory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.NRTManager; import org.apache.lucene.search.NRTManagerReopenThread; import de.jetsli.lumeo.util.StopWatch; import java.util.LinkedHashMap; import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.search.SearcherManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Execute via * bash$ export JAVA_OPTS="-Xms512m -Xmx512m" * bash$ mvn clean install * bash$ mvn exec:java -Dexec.mainClass="de.jetsli.lumeo.LucPerfTest" * * @author Peter Karich, info@jetsli.de */ public class LucPerfTest { Random rand; String exception; Logger logger = LoggerFactory.getLogger(getClass()); NRTManager nrtManager; long latestGen; Analyzer keyAna = new KeywordAnalyzer(); Map<String, Analyzer> anas = new LinkedHashMap<String, Analyzer>() { { put("test", keyAna); } }; NRTManagerReopenThread reopenThread; Directory dir; IndexWriter writer; int docs = 0; Version version = Version.LUCENE_40; File file = new File("/tmp/luc-perf"); public static void main(String[] args) { new LucPerfTest().start(); } public void start() { setUp(); testPerf(); } public void setUp() { rand = new Random(1); docs = 0; } public void testPerf() { new PerfRunner(1000000, 26f) { @Override public void reinit() throws Exception { super.reinit(); if (nrtManager != null) { nrtManager.close(); reopenThread.close(); writer.waitForMerges(); writer.close(); dir.close(); } Helper.deleteDir(file); docs = 0; IndexWriterConfig cfg = new IndexWriterConfig(version, keyAna); cfg.setRAMBufferSizeMB(128); // cfg.setCodec(new Lucene40Codec() { // // @Override public PostingsFormat getPostingsFormatForField(String field) { // if ("_id".equals(field)) // return new Pulsing40PostingsFormat(); // else // return new Lucene40PostingsFormat(); // } // }); LogByteSizeMergePolicy mp = new LogByteSizeMergePolicy(); mp.setUseCompoundFile(false); cfg.setMergePolicy(mp); dir = FSDirectory.open(file); cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE); writer = new IndexWriter(dir, cfg); nrtManager = new NRTManager(writer, new SearcherWarmer() { @Override public void warm(IndexSearcher s) throws IOException { // TODO get some random vertices via getVertices? } }); int priority = Math.min(Thread.currentThread().getPriority() + 2, Thread.MAX_PRIORITY); reopenThread = new NRTManagerReopenThread(nrtManager, 5.0, 0.03); reopenThread.setName("NRT Reopen Thread"); reopenThread.setPriority(priority); reopenThread.setDaemon(true); reopenThread.start(); } final BytesRef bytes = new BytesRef(); @Override public void innerRun(int trial, int i) { long id = i; Document newDoc = new Document(); NumericField idField = new NumericField("_id", 6, NumericField.TYPE_STORED).setLongValue(id); Field uIdField = new Field("_uid", "" + id, StringField.TYPE_STORED); Field typeField = new Field("_type", "test", StringField.TYPE_STORED); newDoc.add(idField); newDoc.add(uIdField); newDoc.add(typeField); // Analyzer ana = anas.get(newDoc.get("_type")); try { NumericUtils.longToPrefixCoded(id, 0, bytes); latestGen = nrtManager.updateDocument(new Term("_id", bytes), newDoc, keyAna); docs++; } catch (IOException ex) { logger.error("Cannot update " + i, ex); } } @Override protected void finalAssert() throws Exception { // logger.info("wait for " + latestGen + ", current:" + nrtManager.getCurrentSearchingGen(true)); nrtManager.waitForGeneration(latestGen, true); // writer.commit(); // writer.waitForMerges(); SearcherManager mng = nrtManager.getSearcherManager(true); // mng.maybeReopen(); IndexSearcher searcher = mng.acquire(); try { TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(new MatchAllDocsQuery(), coll); long total = coll.getTotalHits(); if (docs != total) throw new IllegalStateException(total + " vs. " + docs); } finally { nrtManager.getSearcherManager(true).release(searcher); } } }.run(); } abstract class PerfRunner implements Runnable { private StopWatch sw = new StopWatch(); protected final int TRIALS = 6; protected final int items; protected final float expectedTime; PerfRunner(int items, float expectedTime) { this.expectedTime = expectedTime; this.items = items; } abstract void innerRun(int trial, int i); public void reinit() throws Exception { rand = new Random(1); } public void warmJvm() { logger.info(version + ": warming jvm. items:" + items); try { reinit(); } catch (Exception ex) { logger.error("Cannot reinit", ex); } sw.start(); for (int i = 0; i < items; i++) { innerRun(-1, i); } } @Override public void run() { warmJvm(); logger.info("starting benchmark " + sw.stop().getSeconds()); float allSecs = 0; for (int trial = 0; trial < TRIALS; trial++) { sw = new StopWatch().start(); try { reinit(); } catch (Exception ex) { logger.error("Cannot reinit", ex); } float reinitTime = sw.stop().getSeconds(); sw = new StopWatch("perf" + trial).start(); for (int i = 0; i < items; i++) { innerRun(trial, i); } float indexingTime = sw.stop().getSeconds(); sw = new StopWatch().start(); try { finalAssert(); } catch (Exception ex) { logger.info("couldn't call finalAssert", ex); } System.gc(); logger.info("indexing:" + indexingTime + ", finalAssert:" + sw.stop().getSeconds() + ", reinit:" + reinitTime + " freeMB:" + Runtime.getRuntime().freeMemory() / (1 << 20)); // allSecs += reinitTime; allSecs += indexingTime; // allSecs += sw.getSeconds(); } float res = allSecs / TRIALS; logger.info("finished benchmark with " + res + " seconds"); // assertTrue("mean of benchmark should be less than " + expectedTime + " seconds but was " + res, res < expectedTime); } protected void finalAssert() throws Exception { } } }