Java tutorial
/* * Druid - a distributed column store. * Copyright (C) 2012 Metamarkets Group Inc. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ package com.metamx.druid; import com.google.common.base.Function; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.io.Closeables; import com.google.common.primitives.Ints; import com.metamx.common.IAE; import com.metamx.common.ISE; import com.metamx.common.guava.BaseSequence; import com.metamx.common.guava.FunctionalIterator; import com.metamx.common.guava.Sequence; import com.metamx.druid.aggregation.AggregatorFactory; import com.metamx.druid.aggregation.BufferAggregator; import com.metamx.druid.aggregation.post.PostAggregator; import com.metamx.druid.collect.ResourceHolder; import com.metamx.druid.collect.StupidPool; import com.metamx.druid.index.brita.Filters; import com.metamx.druid.index.v1.processing.Cursor; import com.metamx.druid.index.v1.processing.DimensionSelector; import com.metamx.druid.input.MapBasedRow; import com.metamx.druid.input.Row; import com.metamx.druid.query.dimension.DimensionSpec; import com.metamx.druid.query.group.GroupByQuery; import org.joda.time.Interval; import javax.annotation.Nullable; import java.nio.ByteBuffer; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.TreeMap; /** */ public class GroupByQueryEngine { private final GroupByQueryEngineConfig config; private final StupidPool<ByteBuffer> intermediateResultsBufferPool; public GroupByQueryEngine(GroupByQueryEngineConfig config, StupidPool<ByteBuffer> intermediateResultsBufferPool) { this.config = config; this.intermediateResultsBufferPool = intermediateResultsBufferPool; } public Sequence<Row> process(final GroupByQuery query, StorageAdapter storageAdapter) { final List<Interval> intervals = query.getQuerySegmentSpec().getIntervals(); if (intervals.size() != 1) { throw new IAE("Should only have one interval, got[%s]", intervals); } final Iterable<Cursor> cursors = storageAdapter.makeCursors( Filters.convertDimensionFilters(query.getDimFilter()), intervals.get(0), query.getGranularity()); final ResourceHolder<ByteBuffer> bufferHolder = intermediateResultsBufferPool.take(); return new BaseSequence<Row, Iterator<Row>>(new BaseSequence.IteratorMaker<Row, Iterator<Row>>() { @Override public Iterator<Row> make() { return FunctionalIterator.create(cursors.iterator()) .transformCat(new Function<Cursor, Iterator<Row>>() { @Override public Iterator<Row> apply(@Nullable final Cursor cursor) { return new RowIterator(query, cursor, bufferHolder.get()); } }); } @Override public void cleanup(Iterator<Row> iterFromMake) { Closeables.closeQuietly(bufferHolder); } }); } private static class RowUpdater { private final ByteBuffer metricValues; private final BufferAggregator[] aggregators; private final PositionMaintainer positionMaintainer; private final TreeMap<ByteBuffer, Integer> positions; public RowUpdater(ByteBuffer metricValues, BufferAggregator[] aggregators, PositionMaintainer positionMaintainer) { this.metricValues = metricValues; this.aggregators = aggregators; this.positionMaintainer = positionMaintainer; this.positions = Maps.newTreeMap(); } public int getNumRows() { return positions.size(); } public TreeMap<ByteBuffer, Integer> getPositions() { return positions; } private List<ByteBuffer> updateValues(ByteBuffer key, List<DimensionSelector> dims) { if (dims.size() > 0) { List<ByteBuffer> retVal = null; for (Integer dimValue : dims.get(0).getRow()) { ByteBuffer newKey = key.duplicate(); newKey.putInt(dimValue); final List<ByteBuffer> unaggregatedBuffers = updateValues(newKey, dims.subList(1, dims.size())); if (unaggregatedBuffers != null) { if (retVal == null) { retVal = Lists.newArrayList(); } retVal.addAll(unaggregatedBuffers); } } return retVal; } else { key.clear(); Integer position = positions.get(key); int[] increments = positionMaintainer.getIncrements(); int thePosition; if (position == null) { ByteBuffer keyCopy = ByteBuffer.allocate(key.limit()); keyCopy.put(key.asReadOnlyBuffer()); keyCopy.clear(); position = positionMaintainer.getNext(); if (position == null) { return Lists.newArrayList(keyCopy); } positions.put(keyCopy, position); thePosition = position; for (int i = 0; i < aggregators.length; ++i) { aggregators[i].init(metricValues, thePosition); thePosition += increments[i]; } } thePosition = position; for (int i = 0; i < aggregators.length; ++i) { aggregators[i].aggregate(metricValues, thePosition); thePosition += increments[i]; } return null; } } } private class PositionMaintainer { private final int[] increments; private final int increment; private final int max; private long nextVal; public PositionMaintainer(int start, int[] increments, int max) { this.nextVal = (long) start; this.increments = increments; int theIncrement = 0; for (int i = 0; i < increments.length; i++) { theIncrement += increments[i]; } increment = theIncrement; this.max = max - increment; // Make sure there is enough room for one more increment } public Integer getNext() { if (nextVal > max) { return null; } else { int retVal = (int) nextVal; nextVal += increment; return retVal; } } public int[] getIncrements() { return increments; } } private class RowIterator implements Iterator<Row> { private final GroupByQuery query; private final Cursor cursor; private final ByteBuffer metricsBuffer; private final List<DimensionSpec> dimensionSpecs; private final List<DimensionSelector> dimensions; private final String[] dimNames; private final List<AggregatorFactory> aggregatorSpecs; private final BufferAggregator[] aggregators; private final String[] metricNames; private final int[] sizesRequired; private List<ByteBuffer> unprocessedKeys; private Iterator<Row> delegate; public RowIterator(GroupByQuery query, Cursor cursor, ByteBuffer metricsBuffer) { this.query = query; this.cursor = cursor; this.metricsBuffer = metricsBuffer; unprocessedKeys = null; delegate = Iterators.emptyIterator(); dimensionSpecs = query.getDimensions(); dimensions = Lists.newArrayListWithExpectedSize(dimensionSpecs.size()); dimNames = new String[dimensionSpecs.size()]; for (int i = 0; i < dimensionSpecs.size(); ++i) { final DimensionSpec dimSpec = dimensionSpecs.get(i); dimensions.add(cursor.makeDimensionSelector(dimSpec.getDimension())); dimNames[i] = dimSpec.getOutputName(); } aggregatorSpecs = query.getAggregatorSpecs(); aggregators = new BufferAggregator[aggregatorSpecs.size()]; metricNames = new String[aggregatorSpecs.size()]; sizesRequired = new int[aggregatorSpecs.size()]; for (int i = 0; i < aggregatorSpecs.size(); ++i) { AggregatorFactory aggregatorSpec = aggregatorSpecs.get(i); aggregators[i] = aggregatorSpec.factorizeBuffered(cursor); metricNames[i] = aggregatorSpec.getName(); sizesRequired[i] = aggregatorSpec.getMaxIntermediateSize(); } } @Override public boolean hasNext() { return delegate.hasNext() || !cursor.isDone(); } @Override public Row next() { if (delegate.hasNext()) { return delegate.next(); } if (cursor.isDone()) { throw new NoSuchElementException(); } final PositionMaintainer positionMaintainer = new PositionMaintainer(0, sizesRequired, metricsBuffer.limit()); final RowUpdater rowUpdater = new RowUpdater(metricsBuffer, aggregators, positionMaintainer); if (unprocessedKeys != null) { for (ByteBuffer key : unprocessedKeys) { final List<ByteBuffer> unprocUnproc = rowUpdater.updateValues(key, ImmutableList.<DimensionSelector>of()); if (unprocUnproc != null) { throw new ISE("Not enough memory to process the request."); } } cursor.advance(); } while (!cursor.isDone()) { ByteBuffer key = ByteBuffer.allocate(dimensions.size() * Ints.BYTES); unprocessedKeys = rowUpdater.updateValues(key, dimensions); if (unprocessedKeys != null || rowUpdater.getNumRows() > config.getMaxIntermediateRows()) { break; } cursor.advance(); } delegate = FunctionalIterator.create(rowUpdater.getPositions().entrySet().iterator()) .transform(new Function<Map.Entry<ByteBuffer, Integer>, Row>() { private final long timestamp = cursor.getTime().getMillis(); private final int[] increments = positionMaintainer.getIncrements(); @Override public Row apply(@Nullable Map.Entry<ByteBuffer, Integer> input) { Map<String, Object> theEvent = Maps.newLinkedHashMap(); ByteBuffer keyBuffer = input.getKey().duplicate(); for (int i = 0; i < dimensions.size(); ++i) { theEvent.put(dimNames[i], dimensions.get(i).lookupName(keyBuffer.getInt())); } int position = input.getValue(); for (int i = 0; i < aggregators.length; ++i) { theEvent.put(metricNames[i], aggregators[i].get(metricsBuffer, position)); position += increments[i]; } for (PostAggregator postAggregator : query.getPostAggregatorSpecs()) { theEvent.put(postAggregator.getName(), postAggregator.compute(theEvent)); } return new MapBasedRow(timestamp, theEvent); } }); return delegate.next(); } @Override public void remove() { throw new UnsupportedOperationException(); } } }