Java tutorial
/* * Druid - a distributed column store. * Copyright (C) 2012 Metamarkets Group Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package com.metamx.druid.kv; import com.google.common.base.Charsets; import com.google.common.collect.Ordering; import com.google.common.io.Closeables; import com.google.common.primitives.Ints; import com.metamx.common.IAE; import java.io.ByteArrayOutputStream; import java.io.Closeable; import java.io.IOException; import java.nio.ByteBuffer; import java.nio.channels.WritableByteChannel; import java.util.Arrays; import java.util.Iterator; /** * A generic, flat storage mechanism. Use static methods fromArray() or fromIterable() to construct. If input * is sorted, supports binary search index lookups. If input is not sorted, only supports array-like index lookups. * <p/> * V1 Storage Format: * <p/> * byte 1: version (0x1) * byte 2 == 0x1 => allowReverseLookup * bytes 3-6 => numBytesUsed * bytes 7-10 => numElements * bytes 10-((numElements * 4) + 10): integers representing *end* offsets of byte serialized values * bytes ((numElements * 4) + 10)-(numBytesUsed + 2): 4-byte integer representing length of value, followed by bytes for value */ public class GenericIndexed<T> implements Indexed<T> { private static final byte version = 0x1; public static <T> GenericIndexed<T> fromArray(T[] objects, ObjectStrategy<T> strategy) { return fromIterable(Arrays.asList(objects), strategy); } public static <T> GenericIndexed<T> fromIterable(Iterable<T> objectsIterable, ObjectStrategy<T> strategy) { Iterator<T> objects = objectsIterable.iterator(); if (!objects.hasNext()) { final ByteBuffer buffer = ByteBuffer.allocate(4).putInt(0); buffer.flip(); return new GenericIndexed<T>(buffer, strategy, true); } boolean allowReverseLookup = true; int count = 1; T prevVal = objects.next(); while (objects.hasNext()) { T next = objects.next(); if (!(strategy.compare(prevVal, next) < 0)) { allowReverseLookup = false; } if (prevVal instanceof Closeable) { Closeables.closeQuietly((Closeable) prevVal); } prevVal = next; ++count; } if (prevVal instanceof Closeable) { Closeables.closeQuietly((Closeable) prevVal); } ByteArrayOutputStream headerBytes = new ByteArrayOutputStream(4 + (count * 4)); ByteArrayOutputStream valueBytes = new ByteArrayOutputStream(); int offset = 0; try { headerBytes.write(Ints.toByteArray(count)); for (T object : objectsIterable) { final byte[] bytes = strategy.toBytes(object); offset += 4 + bytes.length; headerBytes.write(Ints.toByteArray(offset)); valueBytes.write(Ints.toByteArray(bytes.length)); valueBytes.write(bytes); if (object instanceof Closeable) { Closeables.closeQuietly((Closeable) object); } } } catch (IOException e) { throw new RuntimeException(e); } ByteBuffer theBuffer = ByteBuffer.allocate(headerBytes.size() + valueBytes.size()); theBuffer.put(headerBytes.toByteArray()); theBuffer.put(valueBytes.toByteArray()); theBuffer.flip(); return new GenericIndexed<T>(theBuffer.asReadOnlyBuffer(), strategy, allowReverseLookup); } private final ByteBuffer theBuffer; private final ObjectStrategy<T> strategy; private final boolean allowReverseLookup; private final int size; private final int valuesOffset; GenericIndexed(ByteBuffer buffer, ObjectStrategy<T> strategy, boolean allowReverseLookup) { this.theBuffer = buffer; this.strategy = strategy; this.allowReverseLookup = allowReverseLookup; size = theBuffer.getInt(); valuesOffset = theBuffer.position() + (size << 2); } @Override public Class<? extends T> getClazz() { return strategy.getClazz(); } @Override public int size() { return size; } @Override public T get(int index) { if (index < 0) { throw new IAE("Index[%s] < 0", index); } if (index >= size) { throw new IAE(String.format("Index[%s] >= size[%s]", index, size)); } ByteBuffer myBuffer = theBuffer.asReadOnlyBuffer(); int startOffset = 4; int endOffset; if (index == 0) { endOffset = myBuffer.getInt(); } else { myBuffer.position(myBuffer.position() + ((index - 1) * 4)); startOffset = myBuffer.getInt() + 4; endOffset = myBuffer.getInt(); } if (startOffset == endOffset) { return null; } myBuffer.position(valuesOffset + startOffset); return strategy.fromByteBuffer(myBuffer, endOffset - startOffset); } @Override public int indexOf(T value) { if (!allowReverseLookup) { throw new UnsupportedOperationException("Reverse lookup not allowed."); } value = (value != null && value.equals("")) ? null : value; int minIndex = 0; int maxIndex = size - 1; while (minIndex <= maxIndex) { int currIndex = (minIndex + maxIndex) >>> 1; T currValue = get(currIndex); int comparison = strategy.compare(currValue, value); if (comparison == 0) { return currIndex; } if (comparison < 0) { minIndex = currIndex + 1; } else { maxIndex = currIndex - 1; } } return -(minIndex + 1); } public long getSerializedSize() { return theBuffer.remaining() + 2 + 4 + 4; } public void writeToChannel(WritableByteChannel channel) throws IOException { channel.write(ByteBuffer.wrap(new byte[] { version, allowReverseLookup ? (byte) 0x1 : (byte) 0x0 })); channel.write(ByteBuffer.wrap(Ints.toByteArray(theBuffer.remaining() + 4))); channel.write(ByteBuffer.wrap(Ints.toByteArray(size))); channel.write(theBuffer.asReadOnlyBuffer()); } public static <T> GenericIndexed<T> read(ByteBuffer buffer, ObjectStrategy<T> strategy) { byte versionFromBuffer = buffer.get(); if (version == versionFromBuffer) { boolean allowReverseLookup = buffer.get() == 0x1; int size = buffer.getInt(); ByteBuffer bufferToUse = buffer.asReadOnlyBuffer(); bufferToUse.limit(bufferToUse.position() + size); buffer.position(bufferToUse.limit()); return new GenericIndexed<T>(bufferToUse, strategy, allowReverseLookup); } throw new IAE("Unknown version[%s]", versionFromBuffer); } public static ObjectStrategy<String> stringStrategy = new ObjectStrategy<String>() { @Override public Class<? extends String> getClazz() { return String.class; } @Override public String fromByteBuffer(ByteBuffer buffer, int numBytes) { byte[] bytes = new byte[numBytes]; buffer.get(bytes); return new String(bytes, Charsets.UTF_8); } @Override public byte[] toBytes(String val) { if (val == null) { return new byte[] {}; } return val.getBytes(Charsets.UTF_8); } @Override public int compare(String o1, String o2) { return Ordering.natural().nullsFirst().compare(o1, o2); } }; @Override public Iterator<T> iterator() { return IndexedIterable.create(this).iterator(); } }