Java tutorial
/* * Licensed to Metamarkets Group Inc. (Metamarkets) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Metamarkets licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package io.druid.segment; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.common.hash.Hashing; import com.metamx.common.Pair; import com.metamx.common.logger.Logger; import io.druid.data.input.MapBasedInputRow; import io.druid.granularity.QueryGranularity; import io.druid.jackson.DefaultObjectMapper; import io.druid.query.aggregation.AggregatorFactory; import io.druid.query.aggregation.CountAggregatorFactory; import io.druid.query.aggregation.DoubleSumAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory; import io.druid.query.aggregation.hyperloglog.HyperUniquesSerde; import io.druid.segment.incremental.IncrementalIndex; import io.druid.segment.incremental.IndexSizeExceededException; import io.druid.segment.incremental.OnheapIncrementalIndex; import io.druid.segment.serde.ComplexMetrics; import io.druid.timeline.TimelineObjectHolder; import io.druid.timeline.VersionedIntervalTimeline; import io.druid.timeline.partition.NoneShardSpec; import io.druid.timeline.partition.PartitionChunk; import io.druid.timeline.partition.ShardSpec; import org.joda.time.DateTime; import org.joda.time.Interval; import javax.annotation.Nullable; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.Arrays; import java.util.List; import java.util.Map; /** */ public class SchemalessIndex { private static final Logger log = new Logger(SchemalessIndex.class); private static final ObjectMapper jsonMapper = new DefaultObjectMapper(); private static final String testFile = "druid.sample.json"; private static final String TIMESTAMP = "timestamp"; private static final List<String> METRICS = Arrays.asList("index"); private static final AggregatorFactory[] METRIC_AGGS = new AggregatorFactory[] { new DoubleSumAggregatorFactory("index", "index"), new CountAggregatorFactory("count"), new HyperUniquesAggregatorFactory("quality_uniques", "quality") }; private static final AggregatorFactory[] METRIC_AGGS_NO_UNIQ = new AggregatorFactory[] { new DoubleSumAggregatorFactory("index", "index"), new CountAggregatorFactory("count") }; private static final IndexSpec indexSpec = new IndexSpec(); private static final List<Map<String, Object>> events = Lists.newArrayList(); private static final Map<Integer, Map<Integer, QueryableIndex>> incrementalIndexes = Maps.newHashMap(); private static final Map<Integer, Map<Integer, QueryableIndex>> mergedIndexes = Maps.newHashMap(); private static final List<QueryableIndex> rowPersistedIndexes = Lists.newArrayList(); private static IncrementalIndex index = null; private static QueryableIndex mergedIndex = null; static { if (ComplexMetrics.getSerdeForType("hyperUnique") == null) { ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(Hashing.murmur3_128())); } } public static IncrementalIndex getIncrementalIndex() { synchronized (log) { if (index != null) { return index; } index = makeIncrementalIndex(testFile, METRIC_AGGS); return index; } } public static QueryableIndex getIncrementalIndex(int index1, int index2) { synchronized (log) { if (events.isEmpty()) { makeEvents(); } Map<Integer, QueryableIndex> entry = incrementalIndexes.get(index1); if (entry != null) { QueryableIndex index = entry.get(index2); if (index != null) { return index; } } else { entry = Maps.<Integer, QueryableIndex>newHashMap(); incrementalIndexes.put(index1, entry); } IncrementalIndex theIndex = null; int count = 0; for (final Map<String, Object> event : events) { if (count != index1 && count != index2) { count++; continue; } final long timestamp = new DateTime(event.get(TIMESTAMP)).getMillis(); if (theIndex == null) { theIndex = new OnheapIncrementalIndex(timestamp, QueryGranularity.MINUTE, METRIC_AGGS, 1000); } final List<String> dims = Lists.newArrayList(); for (final Map.Entry<String, Object> val : event.entrySet()) { if (!val.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(val.getKey())) { dims.add(val.getKey()); } } try { theIndex.add(new MapBasedInputRow(timestamp, dims, event)); } catch (IndexSizeExceededException e) { Throwables.propagate(e); } count++; } QueryableIndex retVal = TestIndex.persistRealtimeAndLoadMMapped(theIndex); entry.put(index2, retVal); return retVal; } } public static QueryableIndex getMergedIncrementalIndex() { synchronized (log) { if (mergedIndex != null) { return mergedIndex; } try { IncrementalIndex top = makeIncrementalIndex("druid.sample.json.top", METRIC_AGGS); IncrementalIndex bottom = makeIncrementalIndex("druid.sample.json.bottom", METRIC_AGGS); File tmpFile = File.createTempFile("yay", "who"); tmpFile.delete(); File topFile = new File(tmpFile, "top"); File bottomFile = new File(tmpFile, "bottom"); File mergedFile = new File(tmpFile, "merged"); topFile.mkdirs(); topFile.deleteOnExit(); bottomFile.mkdirs(); bottomFile.deleteOnExit(); mergedFile.mkdirs(); mergedFile.deleteOnExit(); IndexMerger.persist(top, topFile, null, indexSpec); IndexMerger.persist(bottom, bottomFile, null, indexSpec); mergedIndex = io.druid.segment.IndexIO.loadIndex(IndexMerger.mergeQueryableIndex( Arrays.asList(IndexIO.loadIndex(topFile), IndexIO.loadIndex(bottomFile)), METRIC_AGGS, mergedFile, indexSpec)); return mergedIndex; } catch (IOException e) { mergedIndex = null; throw Throwables.propagate(e); } } } public static QueryableIndex getMergedIncrementalIndex(int index1, int index2) { synchronized (log) { if (rowPersistedIndexes.isEmpty()) { makeRowPersistedIndexes(); } Map<Integer, QueryableIndex> entry = mergedIndexes.get(index1); if (entry != null) { QueryableIndex index = entry.get(index2); if (index != null) { return index; } } else { entry = Maps.<Integer, QueryableIndex>newHashMap(); mergedIndexes.put(index1, entry); } try { File tmpFile = File.createTempFile("yay", "who"); tmpFile.delete(); File mergedFile = new File(tmpFile, "merged"); mergedFile.mkdirs(); mergedFile.deleteOnExit(); QueryableIndex index = IndexIO.loadIndex(IndexMerger.mergeQueryableIndex( Arrays.asList(rowPersistedIndexes.get(index1), rowPersistedIndexes.get(index2)), METRIC_AGGS, mergedFile, indexSpec)); entry.put(index2, index); return index; } catch (IOException e) { throw Throwables.propagate(e); } } } public static QueryableIndex getMergedIncrementalIndex(int[] indexes) { synchronized (log) { if (rowPersistedIndexes.isEmpty()) { makeRowPersistedIndexes(); } try { File tmpFile = File.createTempFile("yay", "who"); tmpFile.delete(); File mergedFile = new File(tmpFile, "merged"); mergedFile.mkdirs(); mergedFile.deleteOnExit(); List<QueryableIndex> indexesToMerge = Lists.newArrayList(); for (int i = 0; i < indexes.length; i++) { indexesToMerge.add(rowPersistedIndexes.get(indexes[i])); } QueryableIndex index = IndexIO.loadIndex( IndexMerger.mergeQueryableIndex(indexesToMerge, METRIC_AGGS, mergedFile, indexSpec)); return index; } catch (IOException e) { throw Throwables.propagate(e); } } } public static QueryableIndex getAppendedIncrementalIndex(Iterable<Pair<String, AggregatorFactory[]>> files, List<Interval> intervals) { return makeAppendedMMappedIndex(files, intervals); } public static QueryableIndex getMergedIncrementalIndexDiffMetrics() { return getMergedIncrementalIndex(Arrays.<Pair<String, AggregatorFactory[]>>asList( new Pair<String, AggregatorFactory[]>("druid.sample.json.top", METRIC_AGGS_NO_UNIQ), new Pair<String, AggregatorFactory[]>("druid.sample.json.bottom", METRIC_AGGS))); } public static QueryableIndex getMergedIncrementalIndex(Iterable<Pair<String, AggregatorFactory[]>> files) { return makeMergedMMappedIndex(files); } private static void makeEvents() { URL resource = TestIndex.class.getClassLoader().getResource(testFile); String filename = resource.getFile(); log.info("Realtime loading index file[%s]", filename); try { for (Object obj : jsonMapper.readValue(new File(filename), List.class)) { final Map<String, Object> event = jsonMapper.convertValue(obj, Map.class); events.add(event); } } catch (Exception e) { Throwables.propagate(e); } } private static void makeRowPersistedIndexes() { synchronized (log) { try { if (events.isEmpty()) { makeEvents(); } for (final Map<String, Object> event : events) { final long timestamp = new DateTime(event.get(TIMESTAMP)).getMillis(); final List<String> dims = Lists.newArrayList(); for (Map.Entry<String, Object> entry : event.entrySet()) { if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) { dims.add(entry.getKey()); } } final IncrementalIndex rowIndex = new OnheapIncrementalIndex(timestamp, QueryGranularity.MINUTE, METRIC_AGGS, 1000); rowIndex.add(new MapBasedInputRow(timestamp, dims, event)); File tmpFile = File.createTempFile("billy", "yay"); tmpFile.delete(); tmpFile.mkdirs(); tmpFile.deleteOnExit(); IndexMerger.persist(rowIndex, tmpFile, null, indexSpec); rowPersistedIndexes.add(IndexIO.loadIndex(tmpFile)); } } catch (IOException e) { throw Throwables.propagate(e); } } } private static IncrementalIndex makeIncrementalIndex(final String resourceFilename, AggregatorFactory[] aggs) { URL resource = TestIndex.class.getClassLoader().getResource(resourceFilename); log.info("Realtime loading resource[%s]", resource); String filename = resource.getFile(); log.info("Realtime loading index file[%s]", filename); final IncrementalIndex retVal = new OnheapIncrementalIndex( new DateTime("2011-01-12T00:00:00.000Z").getMillis(), QueryGranularity.MINUTE, aggs, 1000); try { final List<Object> events = jsonMapper.readValue(new File(filename), List.class); for (Object obj : events) { final Map<String, Object> event = jsonMapper.convertValue(obj, Map.class); final List<String> dims = Lists.newArrayList(); for (Map.Entry<String, Object> entry : event.entrySet()) { if (!entry.getKey().equalsIgnoreCase(TIMESTAMP) && !METRICS.contains(entry.getKey())) { dims.add(entry.getKey()); } } retVal.add(new MapBasedInputRow(new DateTime(event.get(TIMESTAMP)).getMillis(), dims, event)); } } catch (IOException e) { index = null; throw Throwables.propagate(e); } return retVal; } private static List<File> makeFilesToMap(File tmpFile, Iterable<Pair<String, AggregatorFactory[]>> files) throws IOException { List<File> filesToMap = Lists.newArrayList(); for (Pair<String, AggregatorFactory[]> file : files) { IncrementalIndex index = makeIncrementalIndex(file.lhs, file.rhs); File theFile = new File(tmpFile, file.lhs); theFile.mkdirs(); theFile.deleteOnExit(); filesToMap.add(theFile); IndexMerger.persist(index, theFile, null, indexSpec); } return filesToMap; } private static QueryableIndex makeAppendedMMappedIndex(Iterable<Pair<String, AggregatorFactory[]>> files, final List<Interval> intervals) { try { File tmpFile = File.createTempFile("yay", "boo"); tmpFile.delete(); File mergedFile = new File(tmpFile, "merged"); mergedFile.mkdirs(); mergedFile.deleteOnExit(); List<File> filesToMap = makeFilesToMap(tmpFile, files); VersionedIntervalTimeline<Integer, File> timeline = new VersionedIntervalTimeline<Integer, File>( Ordering.natural().nullsFirst()); ShardSpec noneShardSpec = new NoneShardSpec(); for (int i = 0; i < intervals.size(); i++) { timeline.add(intervals.get(i), i, noneShardSpec.createChunk(filesToMap.get(i))); } final List<IndexableAdapter> adapters = Lists.newArrayList(Iterables.concat( // TimelineObjectHolder is actually an iterable of iterable of indexable adapters Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<Integer, File>, Iterable<IndexableAdapter>>() { @Override public Iterable<IndexableAdapter> apply( final TimelineObjectHolder<Integer, File> timelineObjectHolder) { return Iterables.transform(timelineObjectHolder.getObject(), // Each chunk can be used to build the actual IndexableAdapter new Function<PartitionChunk<File>, IndexableAdapter>() { @Override public IndexableAdapter apply(PartitionChunk<File> chunk) { try { return new RowboatFilteringIndexAdapter( new QueryableIndexIndexableAdapter( IndexIO.loadIndex(chunk.getObject())), new Predicate<Rowboat>() { @Override public boolean apply(Rowboat input) { return timelineObjectHolder.getInterval() .contains(input.getTimestamp()); } }); } catch (IOException e) { throw Throwables.propagate(e); } } }); } }))); return IndexIO.loadIndex(IndexMerger.append(adapters, mergedFile, indexSpec)); } catch (IOException e) { throw Throwables.propagate(e); } } private static QueryableIndex makeMergedMMappedIndex(Iterable<Pair<String, AggregatorFactory[]>> files) { try { File tmpFile = File.createTempFile("yay", "who"); tmpFile.delete(); File mergedFile = new File(tmpFile, "merged"); mergedFile.mkdirs(); mergedFile.deleteOnExit(); List<File> filesToMap = makeFilesToMap(tmpFile, files); return IndexIO.loadIndex(IndexMerger.mergeQueryableIndex( Lists.newArrayList(Iterables.transform(filesToMap, new Function<File, QueryableIndex>() { @Override public QueryableIndex apply(@Nullable File input) { try { return IndexIO.loadIndex(input); } catch (IOException e) { throw Throwables.propagate(e); } } })), METRIC_AGGS, mergedFile, indexSpec)); } catch (IOException e) { throw Throwables.propagate(e); } } }