Java tutorial
package org.apache.blur.lucene.warmup; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import org.apache.blur.log.Log; import org.apache.blur.log.LogFactory; import org.apache.blur.lucene.warmup.IndexTracerResult.FILE_TYPE; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.Terms; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; /** * IndexWarmup is used to pre-read portions of the index by field. Usage:<br/> * <br/> * int maxSampleSize = 1000; <br/> * Directory dir = FSDirectory.open(new File("/path/index"));<br/> * dir = new TraceableDirectory(dir);<br/> * DirectoryReader reader = DirectoryReader.open(dir);<br/> * <br/> * IndexWarmup indexWarmup = new IndexWarmup(new AtomicBoolean());<br/> * Map<String, List<IndexTracerResult>> sampleIndex = * indexWarmup.sampleIndex(reader, "");<br/> * indexWarmup.warm(reader, sampleIndex, "uuid", null);<br/> * indexWarmup.warm(reader, sampleIndex, "test", "test");<br/> * indexWarmup.warm(reader, sampleIndex, "nothing", null);<br/> * indexWarmup.warm(reader, sampleIndex, "id2", "tst");<br/> */ public class IndexWarmup { private static final Log LOG = LogFactory.getLog(IndexWarmup.class); private static final String SAMPLE_EXT = ".sample"; private static final long _5_SECONDS = TimeUnit.SECONDS.toNanos(5); private static final int DEFAULT_THROTTLE = 2000000; private final AtomicBoolean _isClosed; private final int _maxSampleSize; private final long _maxBytesPerSec; public IndexWarmup(AtomicBoolean isClosed, int maxSampleSize, long maxBytesPerSec) { _isClosed = isClosed; _maxSampleSize = maxSampleSize; _maxBytesPerSec = maxBytesPerSec; } public IndexWarmup(AtomicBoolean isClosed, int maxSampleSize) { this(isClosed, maxSampleSize, DEFAULT_THROTTLE); } private static ThreadLocal<Boolean> runTrace = new ThreadLocal<Boolean>() { @Override protected Boolean initialValue() { return Boolean.FALSE; } }; public static boolean isRunTrace() { return runTrace.get(); } public static void enableRunTrace() { runTrace.set(Boolean.TRUE); } public static void disableRunTrace() { runTrace.set(Boolean.FALSE); } public void warm(IndexReader reader, Map<String, List<IndexTracerResult>> sampleIndex, String fieldName, String context) throws IOException { for (Entry<String, List<IndexTracerResult>> segment : sampleIndex.entrySet()) { warm(reader, segment.getValue(), fieldName, context); } } public void warm(IndexReader reader, List<IndexTracerResult> traces, String fieldName, String context) throws IOException { int index = find(traces, fieldName); if (index < 0) { // not found return; } IndexTracerResult trace = traces.get(index); for (FILE_TYPE type : FILE_TYPE.values()) { if (trace.isFilePositionCaptured(type)) { long startingPosition = trace.getPosition(type); long endingPosition = Long.MAX_VALUE; int nextIndex = findNextSetTrace(traces, index + 1, type); if (nextIndex >= 0) { IndexTracerResult next = traces.get(nextIndex); endingPosition = next.getPosition(type); } String fileName = trace.getFileName(type); String segmentName = trace.getSegmentName(); warm(reader, segmentName, fileName, fieldName, startingPosition, endingPosition, context); } } } private void warm(IndexReader reader, String segmentName, String fileName, String fieldName, long startingPosition, long endingPosition, String context) throws IOException { Directory dir = getDirectory(reader, segmentName, context); if (dir == null) { LOG.info("Context [{0}] cannot find segment [{1}]", context, segmentName); return; } if (endingPosition == Long.MAX_VALUE) { endingPosition = dir.fileLength(fileName) - 1; } if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return; } long length = endingPosition - startingPosition; final long totalLength = length; IndexInput input = new ThrottledIndexInput(dir.openInput(fileName, IOContext.READ), _maxBytesPerSec); try { input.seek(startingPosition); byte[] buf = new byte[8192]; long start = System.nanoTime(); long bytesReadPerPass = 0; while (length > 0) { long now = System.nanoTime(); if (start + _5_SECONDS < now) { double seconds = (now - start) / 1000000000.0; double rateMbPerSec = (bytesReadPerPass / seconds) / 1000 / 1000; double complete = (((double) totalLength - (double) length) / (double) totalLength) * 100.0; LOG.debug( "Context [{3}] warming field [{0}] in file [{1}] is [{2}%] complete at rate of [{4} MB/s]", fieldName, fileName, complete, context, rateMbPerSec); start = System.nanoTime(); bytesReadPerPass = 0; if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return; } } int len = (int) Math.min(length, buf.length); input.readBytes(buf, 0, len); length -= len; bytesReadPerPass += len; } long now = System.nanoTime(); double seconds = (now - start) / 1000000000.0; if (seconds < 1) { seconds = 1; } double rateMbPerSec = (bytesReadPerPass / seconds) / 1000 / 1000; LOG.debug("Context [{3}] warming field [{0}] in file [{1}] is [{2}%] complete at rate of [{4} MB/s]", fieldName, fileName, 100, context, rateMbPerSec); } finally { input.close(); } } private Directory getDirectory(IndexReader reader, String segmentName, String context) { if (reader instanceof AtomicReader) { return getDirectory((AtomicReader) reader, segmentName, context); } for (IndexReaderContext ctext : reader.getContext().leaves()) { if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return null; } AtomicReaderContext atomicReaderContext = (AtomicReaderContext) ctext; AtomicReader atomicReader = atomicReaderContext.reader(); if (atomicReader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) atomicReader; if (segmentReader.getSegmentName().equals(segmentName)) { return segmentReader.directory(); } } } return null; } private Directory getDirectory(AtomicReader atomicReader, String segmentName, String context) { if (atomicReader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) atomicReader; if (segmentReader.getSegmentName().equals(segmentName)) { return segmentReader.directory(); } } return null; } private int findNextSetTrace(List<IndexTracerResult> traces, int startingIndex, FILE_TYPE type) { int size = traces.size(); for (int i = startingIndex; i < size; i++) { IndexTracerResult trace = traces.get(i); if (trace.isFilePositionCaptured(type)) { return i; } } return -1; } private int find(List<IndexTracerResult> traces, String fieldName) { int index = 0; for (IndexTracerResult trace : traces) { if (trace.getField().equals(fieldName)) { return index; } index++; } return -1; } public Map<String, List<IndexTracerResult>> sampleIndex(IndexReader reader, String context) throws IOException { Map<String, List<IndexTracerResult>> results = new HashMap<String, List<IndexTracerResult>>(); for (IndexReaderContext ctext : reader.getContext().leaves()) { if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return null; } AtomicReaderContext atomicReaderContext = (AtomicReaderContext) ctext; AtomicReader atomicReader = atomicReaderContext.reader(); results.putAll(sampleIndex(atomicReader, context)); } return results; } public Map<String, List<IndexTracerResult>> sampleIndex(AtomicReader atomicReader, String context) throws IOException { Map<String, List<IndexTracerResult>> results = new HashMap<String, List<IndexTracerResult>>(); if (atomicReader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) atomicReader; Directory directory = segmentReader.directory(); if (!(directory instanceof TraceableDirectory)) { LOG.info("Context [{1}] cannot warmup directory [{0}] needs to be a TraceableDirectory.", directory, context); return results; } IndexTracer tracer = new IndexTracer((TraceableDirectory) directory, _maxSampleSize); String fileName = getSampleFileName(segmentReader.getSegmentName()); List<IndexTracerResult> segmentTraces = new ArrayList<IndexTracerResult>(); if (directory.fileExists(fileName)) { IndexInput input = directory.openInput(fileName, IOContext.READONCE); segmentTraces = read(input); input.close(); } else { Fields fields = atomicReader.fields(); for (String field : fields) { LOG.debug("Context [{1}] sampling field [{0}].", field, context); Terms terms = fields.terms(field); boolean hasOffsets = terms.hasOffsets(); boolean hasPayloads = terms.hasPayloads(); boolean hasPositions = terms.hasPositions(); tracer.initTrace(segmentReader, field, hasPositions, hasPayloads, hasOffsets); IndexTracerResult result = tracer.runTrace(terms); segmentTraces.add(result); } if (_isClosed.get()) { LOG.info("Context [{0}] index closed", context); return null; } IndexOutput output = directory.createOutput(fileName, IOContext.DEFAULT); write(segmentTraces, output); output.close(); } results.put(segmentReader.getSegmentName(), segmentTraces); } return results; } public static String getSampleFileName(String segmentName) { return segmentName + SAMPLE_EXT; } private List<IndexTracerResult> read(IndexInput input) throws IOException { int count = input.readVInt(); List<IndexTracerResult> results = new ArrayList<IndexTracerResult>(count); for (int i = 0; i < count; i++) { results.add(IndexTracerResult.read(input)); } return results; } private void write(List<IndexTracerResult> segmentTraces, IndexOutput output) throws IOException { output.writeVInt(segmentTraces.size()); for (IndexTracerResult r : segmentTraces) { r.write(output); } } }