Java tutorial
/* * Copyright (C) 2014 Indeed Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except * in compliance with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either * express or implied. See the License for the specific language governing permissions and * limitations under the License. */ package com.indeed.lsmtree.recordcache; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.indeed.lsmtree.core.StorageType; import com.indeed.lsmtree.core.Store; import com.indeed.lsmtree.core.StoreBuilder; import com.indeed.util.compress.SnappyCodec; import com.indeed.util.core.Either; import com.indeed.util.serialization.Serializer; import com.indeed.util.varexport.Export; import fj.P; import fj.P2; import org.apache.commons.collections.comparators.ComparableComparator; import org.apache.log4j.Logger; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import static com.indeed.util.core.Either.Left; import static com.indeed.util.core.Either.Right; /** * This class creates an LSM-tree based cache from recordLog files. We call it "standalone" because * it does not need to access the recordLog files after initially loading them. * * This cache stores the <V> value in the underlying cache directly. This contrasts with * {@link PersistentRecordCache}, which creates an underlying cache * of key -> (segment number of the recordLog file where the value is located) and then looks up the * value in the relevant recordLog file using the segment number. * * NOTE: Since the dataset will occasionally double in size during compaction, this class is only * recommended for use on recordLog datasets of under ~100GB. * * @param <K> the type of keys maintained by this cache * @param <V> the type of cached values * * @author jplaisance * @author dcahill */ public final class StandalonePersistentRecordCache<K, V> implements RecordCache<K, V> { private static final Logger log = Logger.getLogger(StandalonePersistentRecordCache.class); private final Store<K, V> index; private final RecordLogDirectoryPoller.Functions indexUpdateFunctions; private StandalonePersistentRecordCache(final Store<K, V> index, final File checkpointDir) throws IOException { this.index = index; indexUpdateFunctions = new RecordLogDirectoryPoller.Functions() { AtomicLong indexPutTime = new AtomicLong(0); AtomicLong indexDeleteTime = new AtomicLong(0); AtomicInteger indexPuts = new AtomicInteger(0); AtomicInteger indexDeletes = new AtomicInteger(0); AtomicInteger count = new AtomicInteger(0); @Override public void process(final long position, Operation op) throws IOException { count.incrementAndGet(); if (count.get() % 1000 == 0) { final int puts = indexPuts.get(); if (log.isDebugEnabled() && puts > 0) { log.debug("avg index put time: " + indexPutTime.get() / puts / 1000d + " us"); } final int deletes = indexDeletes.get(); if (log.isDebugEnabled() && deletes > 0) { log.debug("avg index delete time: " + indexDeleteTime.get() / deletes / 1000d + " us"); } } if (op.getClass() == Put.class) { final Put<K, V> put = (Put) op; final long start = System.nanoTime(); synchronized (index) { index.put(put.getKey(), put.getValue()); } indexPutTime.addAndGet(System.nanoTime() - start); indexPuts.incrementAndGet(); } else if (op.getClass() == Delete.class) { final Delete<K> delete = (Delete) op; for (K k : delete.getKeys()) { final long start = System.nanoTime(); synchronized (index) { index.delete(k); } indexDeleteTime.addAndGet(System.nanoTime() - start); indexDeletes.incrementAndGet(); } } else if (op.getClass() == Checkpoint.class) { final Checkpoint checkpoint = (Checkpoint) op; if (checkpointDir != null) { sync(); index.checkpoint(new File(checkpointDir, String.valueOf(checkpoint.getTimestamp()))); } } else { log.warn("operation class unknown"); } } @Override public void sync() throws IOException { final long start = System.nanoTime(); index.sync(); log.debug("sync time: " + (System.nanoTime() - start) / 1000d + " us"); } }; } @Export(name = "index-active-space-usage") public long getIndexActiveSpaceUsage() throws IOException { return index.getActiveSpaceUsage(); } @Export(name = "index-total-space-usage") public long getIndexTotalSpaceUsage() throws IOException { return index.getTotalSpaceUsage(); } @Export(name = "index-reserverd-space-usage") public long getIndexReservedSpaceUsage() { return index.getReservedSpaceUsage(); } @Export(name = "index-free-space") public long getIndexFreeSpace() throws IOException { return index.getFreeSpace(); } public V get(K key, CacheStats cacheStats) { final Map<K, V> results = getAll(Collections.singleton(key), cacheStats); if (results.size() > 0) { return results.get(key); } return null; } public Map<K, V> getAll(Collection<K> keys, CacheStats cacheStats) { final Map<K, V> results = Maps.newHashMap(); for (K key : keys) { final long start = System.nanoTime(); try { V value = index.get(key); results.put(key, value); } catch (Exception e) { log.error("index read error while fetching key " + key, e); cacheStats.indexReadErrors++; } cacheStats.indexTime += System.nanoTime() - start; } cacheStats.misses = keys.size() - results.size(); log.debug("misses: " + (keys.size() - results.size())); return results; } public Iterator<Either<Exception, P2<K, V>>> getStreaming(Iterator<K> keys, AtomicInteger progress, AtomicInteger skipped) { log.info("starting store lookups"); final List<Either<Exception, P2<K, V>>> ret = Lists.newArrayList(); int notFound = 0; while (keys.hasNext()) { final K key = keys.next(); final V value; try { value = index.get(key); } catch (IOException e) { log.error("error", e); return Iterators.singletonIterator(Left.<Exception, P2<K, V>>of(new IndexReadException(e))); } if (value != null) { ret.add(Right.<Exception, P2<K, V>>of(P.p(key, value))); } else { notFound++; } } if (progress != null) progress.addAndGet(notFound); if (skipped != null) skipped.addAndGet(notFound); log.info("store lookups complete"); return ret.iterator(); } @Override public RecordLogDirectoryPoller.Functions getFunctions() { return indexUpdateFunctions; } @Override public void close() throws IOException { index.close(); } public void waitForCompactions() throws InterruptedException { index.waitForCompactions(); } public static class Builder<K, V> { private File indexDir; private File checkpointDir; private Serializer<K> keySerializer; private Serializer<V> valueSerializer; private boolean dedicatedIndexPartition; private Comparator<K> comparator = new ComparableComparator(); private boolean mlockIndex = false; private boolean mlockBloomFilters = false; private long bloomFilterMemory = -1; public StandalonePersistentRecordCache<K, V> build() throws IOException { if (indexDir == null) throw new IllegalArgumentException("indexDir must be set"); if (keySerializer == null) throw new IllegalArgumentException("keySerializer must be set"); if (valueSerializer == null) throw new IllegalArgumentException("valueSerializer must be set"); SnappyCodec codec = new SnappyCodec(); StoreBuilder<K, V> indexBuilder = new StoreBuilder<K, V>(indexDir, keySerializer, valueSerializer); indexBuilder.setMaxVolatileGenerationSize(8 * 1024 * 1024); indexBuilder.setCodec(codec); indexBuilder.setStorageType(StorageType.BLOCK_COMPRESSED); indexBuilder.setComparator(comparator); indexBuilder.setDedicatedPartition(dedicatedIndexPartition); indexBuilder.setMlockFiles(mlockIndex); indexBuilder.setMlockBloomFilters(mlockBloomFilters); if (bloomFilterMemory >= 0) indexBuilder.setBloomFilterMemory(bloomFilterMemory); final Store<K, V> index = indexBuilder.build(); return new StandalonePersistentRecordCache<K, V>(index, checkpointDir); } public Builder<K, V> setIndexDir(final File indexDir) { this.indexDir = indexDir; return this; } public Builder<K, V> setKeySerializer(final Serializer<K> keySerializer) { this.keySerializer = keySerializer; return this; } public Builder<K, V> setValueSerializer(final Serializer<V> valueSerializer) { this.valueSerializer = valueSerializer; return this; } public Builder<K, V> setComparator(final Comparator<K> comparator) { this.comparator = comparator; return this; } public Builder<K, V> setCheckpointDir(final File checkpointDir) { this.checkpointDir = checkpointDir; return this; } public boolean isDedicatedIndexPartition() { return dedicatedIndexPartition; } public Builder<K, V> setDedicatedIndexPartition(final boolean dedicatedIndexPartition) { this.dedicatedIndexPartition = dedicatedIndexPartition; return this; } public boolean isMlockIndex() { return mlockIndex; } public Builder<K, V> setMlockIndex(final boolean mlockIndex) { this.mlockIndex = mlockIndex; return this; } public boolean isMlockBloomFilters() { return mlockBloomFilters; } public Builder<K, V> setMlockBloomFilters(final boolean mlockBloomFilters) { this.mlockBloomFilters = mlockBloomFilters; return this; } public long getBloomFilterMemory() { return bloomFilterMemory; } public Builder<K, V> setBloomFilterMemory(final long bloomFilterMemory) { this.bloomFilterMemory = bloomFilterMemory; return this; } } }