List of usage examples for org.joda.time Interval Interval
public Interval(Object interval, Chronology chronology)
From source file:io.druid.java.util.common.JodaUtils.java
License:Apache License
public static ArrayList<Interval> condenseIntervals(Iterable<Interval> intervals) { ArrayList<Interval> retVal = Lists.newArrayList(); final SortedSet<Interval> sortedIntervals; if (intervals instanceof SortedSet) { sortedIntervals = (SortedSet<Interval>) intervals; } else {/*ww w . j a va 2 s . co m*/ sortedIntervals = Sets.newTreeSet(Comparators.intervalsByStartThenEnd()); for (Interval interval : intervals) { sortedIntervals.add(interval); } } if (sortedIntervals.isEmpty()) { return Lists.newArrayList(); } Iterator<Interval> intervalsIter = sortedIntervals.iterator(); Interval currInterval = intervalsIter.next(); while (intervalsIter.hasNext()) { Interval next = intervalsIter.next(); if (currInterval.abuts(next)) { currInterval = new Interval(currInterval.getStart(), next.getEnd()); } else if (currInterval.overlaps(next)) { DateTime nextEnd = next.getEnd(); DateTime currEnd = currInterval.getEnd(); currInterval = new Interval(currInterval.getStart(), nextEnd.isAfter(currEnd) ? nextEnd : currEnd); } else { retVal.add(currInterval); currInterval = next; } } retVal.add(currInterval); return retVal; }
From source file:io.druid.query.groupby.strategy.GroupByStrategyV2.java
License:Apache License
/** * If "query" has a single universal timestamp, return it. Otherwise return null. This is useful * for keeping timestamps in sync across partial queries that may have different intervals. * * @param query the query//from w w w .j a va 2s. com * * @return universal timestamp, or null */ public static DateTime getUniversalTimestamp(final GroupByQuery query) { final Granularity gran = query.getGranularity(); final String timestampStringFromContext = query.getContextValue(CTX_KEY_FUDGE_TIMESTAMP, ""); if (!timestampStringFromContext.isEmpty()) { return new DateTime(Long.parseLong(timestampStringFromContext)); } else if (Granularities.ALL.equals(gran)) { final long timeStart = query.getIntervals().get(0).getStartMillis(); return gran.getIterable(new Interval(timeStart, timeStart + 1)).iterator().next().getStart(); } else { return null; } }
From source file:io.druid.query.IntervalChunkingQueryRunner.java
License:Apache License
private Iterable<Interval> splitInterval(Interval interval, Period period) { if (interval.getEndMillis() == interval.getStartMillis()) { return Lists.newArrayList(interval); }// w w w. j a v a2s .com List<Interval> intervals = Lists.newArrayList(); Iterator<Long> timestamps = new PeriodGranularity(period, null, null) .iterable(interval.getStartMillis(), interval.getEndMillis()).iterator(); long start = Math.max(timestamps.next(), interval.getStartMillis()); while (timestamps.hasNext()) { long end = timestamps.next(); intervals.add(new Interval(start, end)); start = end; } if (start < interval.getEndMillis()) { intervals.add(new Interval(start, interval.getEndMillis())); } return intervals; }
From source file:io.druid.query.metadata.SegmentAnalyzer.java
License:Apache License
private ColumnAnalysis analyzeStringColumn(final ColumnCapabilities capabilities, final StorageAdapter storageAdapter, final String columnName) { int cardinality = 0; long size = 0; Comparable min = null;//w ww. j a v a 2 s .c om Comparable max = null; if (analyzingCardinality()) { cardinality = storageAdapter.getDimensionCardinality(columnName); } if (analyzingSize()) { final long start = storageAdapter.getMinTime().getMillis(); final long end = storageAdapter.getMaxTime().getMillis(); final Sequence<Cursor> cursors = storageAdapter.makeCursors(null, new Interval(start, end), VirtualColumns.EMPTY, Granularities.ALL, false, null); size = cursors.accumulate(0L, new Accumulator<Long, Cursor>() { @Override public Long accumulate(Long accumulated, Cursor cursor) { DimensionSelector selector = cursor .makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)); if (selector == null) { return accumulated; } long current = accumulated; while (!cursor.isDone()) { final IndexedInts vals = selector.getRow(); for (int i = 0; i < vals.size(); ++i) { final String dimVal = selector.lookupName(vals.get(i)); if (dimVal != null && !dimVal.isEmpty()) { current += StringUtils.estimatedBinaryLengthAsUTF8(dimVal); } } cursor.advance(); } return current; } }); } if (analyzingMinMax()) { min = storageAdapter.getMinValue(columnName); max = storageAdapter.getMaxValue(columnName); } return new ColumnAnalysis(capabilities.getType().name(), capabilities.hasMultipleValues(), size, cardinality, min, max, null); }
From source file:io.druid.query.metadata.SegmentMetadataQueryQueryToolChest.java
License:Apache License
@Override public <T extends LogicalSegment> List<T> filterSegments(SegmentMetadataQuery query, List<T> segments) { if (!query.isUsingDefaultInterval()) { return segments; }//from w w w . j av a 2s . co m if (segments.size() <= 1) { return segments; } final T max = segments.get(segments.size() - 1); DateTime targetEnd = max.getInterval().getEnd(); final Interval targetInterval = new Interval(config.getDefaultHistory(), targetEnd); return Lists.newArrayList(Iterables.filter(segments, new Predicate<T>() { @Override public boolean apply(T input) { return (input.getInterval().overlaps(targetInterval)); } })); }
From source file:io.druid.query.TimewarpOperator.java
License:Apache License
public QueryRunner<T> postProcess(final QueryRunner<T> baseRunner, final long now) { return new QueryRunner<T>() { @Override//from www. j av a 2 s .co m public Sequence<T> run(final Query<T> query, final Map<String, Object> responseContext) { final long offset = computeOffset(now); final Interval interval = query.getIntervals().get(0); final Interval modifiedInterval = new Interval( Math.min(interval.getStartMillis() + offset, now + offset), Math.min(interval.getEndMillis() + offset, now + offset)); return Sequences .map(baseRunner.run( query.withQuerySegmentSpec( new MultipleIntervalSegmentSpec(Arrays.asList(modifiedInterval))), responseContext), new Function<T, T>() { @Override public T apply(T input) { if (input instanceof Result) { Result res = (Result) input; Object value = res.getValue(); if (value instanceof TimeBoundaryResultValue) { TimeBoundaryResultValue boundary = (TimeBoundaryResultValue) value; DateTime minTime = null; try { minTime = boundary.getMinTime(); } catch (IllegalArgumentException e) { } final DateTime maxTime = boundary.getMaxTime(); return (T) ((TimeBoundaryQuery) query).buildResult( new DateTime(Math .min(res.getTimestamp().getMillis() - offset, now)), minTime != null ? minTime.minus(offset) : null, maxTime != null ? new DateTime( Math.min(maxTime.getMillis() - offset, now)) : null) .iterator().next(); } return (T) new Result(res.getTimestamp().minus(offset), value); } else if (input instanceof MapBasedRow) { MapBasedRow row = (MapBasedRow) input; return (T) new MapBasedRow(row.getTimestamp().minus(offset), row.getEvent()); } // default to noop for unknown result types return input; } }); } }; }
From source file:io.druid.segment.incremental.IncrementalIndexStorageAdapter.java
License:Apache License
@Override public Sequence<Cursor> makeCursors(final Filter filter, final Interval interval, final QueryGranularity gran) { if (index.isEmpty()) { return Sequences.empty(); }//from ww w . j a v a 2 s. co m Interval actualIntervalTmp = interval; final Interval dataInterval = new Interval(getMinTime().getMillis(), gran.next(gran.truncate(getMaxTime().getMillis()))); if (!actualIntervalTmp.overlaps(dataInterval)) { return Sequences.empty(); } if (actualIntervalTmp.getStart().isBefore(dataInterval.getStart())) { actualIntervalTmp = actualIntervalTmp.withStart(dataInterval.getStart()); } if (actualIntervalTmp.getEnd().isAfter(dataInterval.getEnd())) { actualIntervalTmp = actualIntervalTmp.withEnd(dataInterval.getEnd()); } final Interval actualInterval = actualIntervalTmp; return Sequences.map( Sequences.simple(gran.iterable(actualInterval.getStartMillis(), actualInterval.getEndMillis())), new Function<Long, Cursor>() { EntryHolder currEntry = new EntryHolder(); private final ValueMatcher filterMatcher; { filterMatcher = makeFilterMatcher(filter, currEntry); } @Override public Cursor apply(@Nullable final Long input) { final long timeStart = Math.max(input, actualInterval.getStartMillis()); return new Cursor() { private Iterator<Map.Entry<IncrementalIndex.TimeAndDims, Integer>> baseIter; private ConcurrentNavigableMap<IncrementalIndex.TimeAndDims, Integer> cursorMap; final DateTime time; int numAdvanced = -1; boolean done; { cursorMap = index.getSubMap( new IncrementalIndex.TimeAndDims(timeStart, new String[][] {}), new IncrementalIndex.TimeAndDims( Math.min(actualInterval.getEndMillis(), gran.next(input)), new String[][] {})); time = gran.toDateTime(input); reset(); } @Override public DateTime getTime() { return time; } @Override public void advance() { if (!baseIter.hasNext()) { done = true; return; } while (baseIter.hasNext()) { if (Thread.interrupted()) { throw new QueryInterruptedException(); } currEntry.set(baseIter.next()); if (filterMatcher.matches()) { return; } } if (!filterMatcher.matches()) { done = true; } } @Override public void advanceTo(int offset) { int count = 0; while (count < offset && !isDone()) { advance(); count++; } } @Override public boolean isDone() { return done; } @Override public void reset() { baseIter = cursorMap.entrySet().iterator(); if (numAdvanced == -1) { numAdvanced = 0; } else { Iterators.advance(baseIter, numAdvanced); } if (Thread.interrupted()) { throw new QueryInterruptedException(); } boolean foundMatched = false; while (baseIter.hasNext()) { currEntry.set(baseIter.next()); if (filterMatcher.matches()) { foundMatched = true; break; } numAdvanced++; } done = !foundMatched && (cursorMap.size() == 0 || !baseIter.hasNext()); } @Override public DimensionSelector makeDimensionSelector(final String dimension, @Nullable final ExtractionFn extractionFn) { if (dimension.equals(Column.TIME_COLUMN_NAME)) { return new SingleScanTimeDimSelector(makeLongColumnSelector(dimension), extractionFn); } final IncrementalIndex.DimDim dimValLookup = index.getDimension(dimension); if (dimValLookup == null) { return NULL_DIMENSION_SELECTOR; } final int maxId = dimValLookup.size(); final int dimIndex = index.getDimensionIndex(dimension); return new DimensionSelector() { @Override public IndexedInts getRow() { final ArrayList<Integer> vals = Lists.newArrayList(); if (dimIndex < currEntry.getKey().getDims().length) { final String[] dimVals = currEntry.getKey().getDims()[dimIndex]; if (dimVals != null) { for (String dimVal : dimVals) { int id = dimValLookup.getId(dimVal); if (id < maxId) { vals.add(id); } } } } // check for null entry if (vals.isEmpty() && dimValLookup.contains(null)) { int id = dimValLookup.getId(null); if (id < maxId) { vals.add(id); } } return new IndexedInts() { @Override public int size() { return vals.size(); } @Override public int get(int index) { return vals.get(index); } @Override public Iterator<Integer> iterator() { return vals.iterator(); } @Override public void fill(int index, int[] toFill) { throw new UnsupportedOperationException("fill not supported"); } @Override public void close() throws IOException { } }; } @Override public int getValueCardinality() { return maxId; } @Override public String lookupName(int id) { final String value = dimValLookup.getValue(id); return extractionFn == null ? value : extractionFn.apply(value); } @Override public int lookupId(String name) { if (extractionFn != null) { throw new UnsupportedOperationException( "cannot perform lookup when applying an extraction function"); } return dimValLookup.getId(name); } }; } @Override public FloatColumnSelector makeFloatColumnSelector(String columnName) { final Integer metricIndexInt = index.getMetricIndex(columnName); if (metricIndexInt == null) { return new FloatColumnSelector() { @Override public float get() { return 0.0f; } }; } final int metricIndex = metricIndexInt; return new FloatColumnSelector() { @Override public float get() { return index.getMetricFloatValue(currEntry.getValue(), metricIndex); } }; } @Override public LongColumnSelector makeLongColumnSelector(String columnName) { if (columnName.equals(Column.TIME_COLUMN_NAME)) { return new LongColumnSelector() { @Override public long get() { return currEntry.getKey().getTimestamp(); } }; } final Integer metricIndexInt = index.getMetricIndex(columnName); if (metricIndexInt == null) { return new LongColumnSelector() { @Override public long get() { return 0L; } }; } final int metricIndex = metricIndexInt; return new LongColumnSelector() { @Override public long get() { return index.getMetricLongValue(currEntry.getValue(), metricIndex); } }; } @Override public ObjectColumnSelector makeObjectColumnSelector(String column) { if (column.equals(Column.TIME_COLUMN_NAME)) { return new ObjectColumnSelector<Long>() { @Override public Class classOfObject() { return Long.TYPE; } @Override public Long get() { return currEntry.getKey().getTimestamp(); } }; } final Integer metricIndexInt = index.getMetricIndex(column); if (metricIndexInt != null) { final int metricIndex = metricIndexInt; final ComplexMetricSerde serde = ComplexMetrics .getSerdeForType(index.getMetricType(column)); return new ObjectColumnSelector() { @Override public Class classOfObject() { return serde.getObjectStrategy().getClazz(); } @Override public Object get() { return index.getMetricObjectValue(currEntry.getValue(), metricIndex); } }; } final Integer dimensionIndexInt = index.getDimensionIndex(column); if (dimensionIndexInt != null) { final int dimensionIndex = dimensionIndexInt; return new ObjectColumnSelector<Object>() { @Override public Class classOfObject() { return Object.class; } @Override public Object get() { IncrementalIndex.TimeAndDims key = currEntry.getKey(); if (key == null) { return null; } String[][] dims = key.getDims(); if (dimensionIndex >= dims.length) { return null; } final String[] dimVals = dims[dimensionIndex]; if (dimVals == null || dimVals.length == 0) { return null; } if (dimVals.length == 1) { return dimVals[0]; } return dimVals; } }; } return null; } }; } }); }
From source file:io.druid.segment.indexing.granularity.ArbitraryGranularitySpec.java
License:Apache License
@Override public Optional<Interval> bucketInterval(DateTime dt) { // First interval with start time dt final Interval interval = intervals.floor(new Interval(dt, new DateTime(JodaUtils.MAX_INSTANT))); if (interval != null && interval.contains(dt)) { return Optional.of(interval); } else {//w w w . j av a2 s .c o m return Optional.absent(); } }
From source file:io.druid.segment.IndexMaker.java
License:Apache License
private static void makeIndexBinary(final FileSmoosher v9Smoosher, final List<IndexableAdapter> adapters, final File outDir, final List<String> mergedDimensions, final List<String> mergedMetrics, final Set<String> skippedDimensions, final ProgressIndicator progress, final IndexSpec indexSpec) throws IOException { final String section = "building index.drd"; progress.startSection(section);//from w w w . ja v a2 s. co m final Set<String> finalColumns = Sets.newTreeSet(); finalColumns.addAll(mergedDimensions); finalColumns.addAll(mergedMetrics); finalColumns.removeAll(skippedDimensions); final Iterable<String> finalDimensions = Iterables.filter(mergedDimensions, new Predicate<String>() { @Override public boolean apply(String input) { return !skippedDimensions.contains(input); } }); GenericIndexed<String> cols = GenericIndexed.fromIterable(finalColumns, GenericIndexed.STRING_STRATEGY); GenericIndexed<String> dims = GenericIndexed.fromIterable(finalDimensions, GenericIndexed.STRING_STRATEGY); final String bitmapSerdeFactoryType = mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory()); final long numBytes = cols.getSerializedSize() + dims.getSerializedSize() + 16 + serializerUtils.getSerializedStringByteSize(bitmapSerdeFactoryType); final SmooshedWriter writer = v9Smoosher.addWithSmooshedWriter("index.drd", numBytes); cols.writeToChannel(writer); dims.writeToChannel(writer); DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT); DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT); for (IndexableAdapter index : adapters) { minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart()); maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd()); } final Interval dataInterval = new Interval(minTime, maxTime); serializerUtils.writeLong(writer, dataInterval.getStartMillis()); serializerUtils.writeLong(writer, dataInterval.getEndMillis()); serializerUtils.writeString(writer, bitmapSerdeFactoryType); writer.close(); IndexIO.checkFileSize(new File(outDir, "index.drd")); progress.stopSection(section); }
From source file:io.druid.segment.IndexMerger.java
License:Apache License
private static File makeIndexFiles(final List<IndexableAdapter> indexes, final File outDir, final ProgressIndicator progress, final List<String> mergedDimensions, final List<String> mergedMetrics, final Map<String, Object> segmentMetadata, final Function<ArrayList<Iterable<Rowboat>>, Iterable<Rowboat>> rowMergerFn, final IndexSpec indexSpec) throws IOException { final Map<String, ValueType> valueTypes = Maps.newTreeMap(Ordering.<String>natural().nullsFirst()); final Map<String, String> metricTypeNames = Maps.newTreeMap(Ordering.<String>natural().nullsFirst()); final Map<String, ColumnCapabilitiesImpl> columnCapabilities = Maps.newHashMap(); for (IndexableAdapter adapter : indexes) { for (String dimension : adapter.getDimensionNames()) { ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(dimension); ColumnCapabilities capabilities = adapter.getCapabilities(dimension); if (mergedCapabilities == null) { mergedCapabilities = new ColumnCapabilitiesImpl(); mergedCapabilities.setType(ValueType.STRING); }// ww w. j a v a2 s .c om columnCapabilities.put(dimension, mergedCapabilities.merge(capabilities)); } for (String metric : adapter.getMetricNames()) { ColumnCapabilitiesImpl mergedCapabilities = columnCapabilities.get(metric); ColumnCapabilities capabilities = adapter.getCapabilities(metric); if (mergedCapabilities == null) { mergedCapabilities = new ColumnCapabilitiesImpl(); } columnCapabilities.put(metric, mergedCapabilities.merge(capabilities)); valueTypes.put(metric, capabilities.getType()); metricTypeNames.put(metric, adapter.getMetricType(metric)); } } final Interval dataInterval; File v8OutDir = new File(outDir, "v8-tmp"); v8OutDir.mkdirs(); /************* Main index.drd file **************/ progress.progress(); long startTime = System.currentTimeMillis(); File indexFile = new File(v8OutDir, "index.drd"); try (FileOutputStream fileOutputStream = new FileOutputStream(indexFile); FileChannel channel = fileOutputStream.getChannel()) { channel.write(ByteBuffer.wrap(new byte[] { IndexIO.V8_VERSION })); GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY).writeToChannel(channel); GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY).writeToChannel(channel); DateTime minTime = new DateTime(JodaUtils.MAX_INSTANT); DateTime maxTime = new DateTime(JodaUtils.MIN_INSTANT); for (IndexableAdapter index : indexes) { minTime = JodaUtils.minDateTime(minTime, index.getDataInterval().getStart()); maxTime = JodaUtils.maxDateTime(maxTime, index.getDataInterval().getEnd()); } dataInterval = new Interval(minTime, maxTime); serializerUtils.writeString(channel, String.format("%s/%s", minTime, maxTime)); serializerUtils.writeString(channel, mapper.writeValueAsString(indexSpec.getBitmapSerdeFactory())); } IndexIO.checkFileSize(indexFile); log.info("outDir[%s] completed index.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); /************* Setup Dim Conversions **************/ progress.progress(); startTime = System.currentTimeMillis(); IOPeon ioPeon = new TmpFileIOPeon(); ArrayList<FileOutputSupplier> dimOuts = Lists.newArrayListWithCapacity(mergedDimensions.size()); Map<String, Integer> dimensionCardinalities = Maps.newHashMap(); ArrayList<Map<String, IntBuffer>> dimConversions = Lists.newArrayListWithCapacity(indexes.size()); for (IndexableAdapter index : indexes) { dimConversions.add(Maps.<String, IntBuffer>newHashMap()); } for (String dimension : mergedDimensions) { final GenericIndexedWriter<String> writer = new GenericIndexedWriter<String>(ioPeon, dimension, GenericIndexed.STRING_STRATEGY); writer.open(); List<Indexed<String>> dimValueLookups = Lists.newArrayListWithCapacity(indexes.size()); DimValueConverter[] converters = new DimValueConverter[indexes.size()]; for (int i = 0; i < indexes.size(); i++) { Indexed<String> dimValues = indexes.get(i).getDimValueLookup(dimension); if (!isNullColumn(dimValues)) { dimValueLookups.add(dimValues); converters[i] = new DimValueConverter(dimValues); } } Iterable<String> dimensionValues = CombiningIterable.createSplatted( Iterables.transform(dimValueLookups, new Function<Indexed<String>, Iterable<String>>() { @Override public Iterable<String> apply(@Nullable Indexed<String> indexed) { return Iterables.transform(indexed, new Function<String, String>() { @Override public String apply(@Nullable String input) { return (input == null) ? "" : input; } }); } }), Ordering.<String>natural().nullsFirst()); int count = 0; for (String value : dimensionValues) { value = value == null ? "" : value; writer.write(value); for (int i = 0; i < indexes.size(); i++) { DimValueConverter converter = converters[i]; if (converter != null) { converter.convert(value, count); } } ++count; } dimensionCardinalities.put(dimension, count); FileOutputSupplier dimOut = new FileOutputSupplier(IndexIO.makeDimFile(v8OutDir, dimension), true); dimOuts.add(dimOut); writer.close(); serializerUtils.writeString(dimOut, dimension); ByteStreams.copy(writer.combineStreams(), dimOut); for (int i = 0; i < indexes.size(); ++i) { DimValueConverter converter = converters[i]; if (converter != null) { dimConversions.get(i).put(dimension, converters[i].getConversionBuffer()); } } ioPeon.cleanup(); } log.info("outDir[%s] completed dim conversions in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); /************* Walk through data sets and merge them *************/ progress.progress(); startTime = System.currentTimeMillis(); ArrayList<Iterable<Rowboat>> boats = Lists.newArrayListWithCapacity(indexes.size()); for (int i = 0; i < indexes.size(); ++i) { final IndexableAdapter adapter = indexes.get(i); final int[] dimLookup = new int[mergedDimensions.size()]; int count = 0; for (String dim : adapter.getDimensionNames()) { dimLookup[count] = mergedDimensions.indexOf(dim); count++; } final int[] metricLookup = new int[mergedMetrics.size()]; count = 0; for (String metric : adapter.getMetricNames()) { metricLookup[count] = mergedMetrics.indexOf(metric); count++; } boats.add(new MMappedIndexRowIterable( Iterables.transform(indexes.get(i).getRows(), new Function<Rowboat, Rowboat>() { @Override public Rowboat apply(@Nullable Rowboat input) { int[][] newDims = new int[mergedDimensions.size()][]; int j = 0; for (int[] dim : input.getDims()) { newDims[dimLookup[j]] = dim; j++; } Object[] newMetrics = new Object[mergedMetrics.size()]; j = 0; for (Object met : input.getMetrics()) { newMetrics[metricLookup[j]] = met; j++; } return new Rowboat(input.getTimestamp(), newDims, newMetrics, input.getRowNum()); } }), mergedDimensions, dimConversions.get(i), i)); } Iterable<Rowboat> theRows = rowMergerFn.apply(boats); CompressedLongsSupplierSerializer timeWriter = CompressedLongsSupplierSerializer.create(ioPeon, "little_end_time", IndexIO.BYTE_ORDER, CompressedObjectStrategy.DEFAULT_COMPRESSION_STRATEGY); timeWriter.open(); ArrayList<VSizeIndexedWriter> forwardDimWriters = Lists.newArrayListWithCapacity(mergedDimensions.size()); for (String dimension : mergedDimensions) { VSizeIndexedWriter writer = new VSizeIndexedWriter(ioPeon, dimension, dimensionCardinalities.get(dimension)); writer.open(); forwardDimWriters.add(writer); } ArrayList<MetricColumnSerializer> metWriters = Lists.newArrayListWithCapacity(mergedMetrics.size()); for (String metric : mergedMetrics) { ValueType type = valueTypes.get(metric); switch (type) { case LONG: metWriters.add(new LongMetricColumnSerializer(metric, v8OutDir, ioPeon)); break; case FLOAT: metWriters.add(new FloatMetricColumnSerializer(metric, v8OutDir, ioPeon)); break; case COMPLEX: final String typeName = metricTypeNames.get(metric); ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(typeName); if (serde == null) { throw new ISE("Unknown type[%s]", typeName); } metWriters.add(new ComplexMetricColumnSerializer(metric, v8OutDir, ioPeon, serde)); break; default: throw new ISE("Unknown type[%s]", type); } } for (MetricColumnSerializer metWriter : metWriters) { metWriter.open(); } int rowCount = 0; long time = System.currentTimeMillis(); List<IntBuffer> rowNumConversions = Lists.newArrayListWithCapacity(indexes.size()); for (IndexableAdapter index : indexes) { int[] arr = new int[index.getNumRows()]; Arrays.fill(arr, INVALID_ROW); rowNumConversions.add(IntBuffer.wrap(arr)); } for (Rowboat theRow : theRows) { progress.progress(); timeWriter.add(theRow.getTimestamp()); final Object[] metrics = theRow.getMetrics(); for (int i = 0; i < metrics.length; ++i) { metWriters.get(i).serialize(metrics[i]); } int[][] dims = theRow.getDims(); for (int i = 0; i < dims.length; ++i) { List<Integer> listToWrite = (i >= dims.length || dims[i] == null) ? null : Ints.asList(dims[i]); forwardDimWriters.get(i).write(listToWrite); } for (Map.Entry<Integer, TreeSet<Integer>> comprisedRow : theRow.getComprisedRows().entrySet()) { final IntBuffer conversionBuffer = rowNumConversions.get(comprisedRow.getKey()); for (Integer rowNum : comprisedRow.getValue()) { while (conversionBuffer.position() < rowNum) { conversionBuffer.put(INVALID_ROW); } conversionBuffer.put(rowCount); } } if ((++rowCount % 500000) == 0) { log.info("outDir[%s] walked 500,000/%,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - time); time = System.currentTimeMillis(); } } for (IntBuffer rowNumConversion : rowNumConversions) { rowNumConversion.rewind(); } final File timeFile = IndexIO.makeTimeFile(v8OutDir, IndexIO.BYTE_ORDER); timeFile.delete(); OutputSupplier<FileOutputStream> out = Files.newOutputStreamSupplier(timeFile, true); timeWriter.closeAndConsolidate(out); IndexIO.checkFileSize(timeFile); for (int i = 0; i < mergedDimensions.size(); ++i) { forwardDimWriters.get(i).close(); ByteStreams.copy(forwardDimWriters.get(i).combineStreams(), dimOuts.get(i)); } for (MetricColumnSerializer metWriter : metWriters) { metWriter.close(); } ioPeon.cleanup(); log.info("outDir[%s] completed walk through of %,d rows in %,d millis.", v8OutDir, rowCount, System.currentTimeMillis() - startTime); /************ Create Inverted Indexes *************/ startTime = System.currentTimeMillis(); final File invertedFile = new File(v8OutDir, "inverted.drd"); Files.touch(invertedFile); out = Files.newOutputStreamSupplier(invertedFile, true); final File geoFile = new File(v8OutDir, "spatial.drd"); Files.touch(geoFile); OutputSupplier<FileOutputStream> spatialOut = Files.newOutputStreamSupplier(geoFile, true); for (int i = 0; i < mergedDimensions.size(); ++i) { long dimStartTime = System.currentTimeMillis(); String dimension = mergedDimensions.get(i); File dimOutFile = dimOuts.get(i).getFile(); final MappedByteBuffer dimValsMapped = Files.map(dimOutFile); if (!dimension.equals(serializerUtils.readString(dimValsMapped))) { throw new ISE("dimensions[%s] didn't equate!? This is a major WTF moment.", dimension); } Indexed<String> dimVals = GenericIndexed.read(dimValsMapped, GenericIndexed.STRING_STRATEGY); log.info("Starting dimension[%s] with cardinality[%,d]", dimension, dimVals.size()); final BitmapSerdeFactory bitmapSerdeFactory = indexSpec.getBitmapSerdeFactory(); GenericIndexedWriter<ImmutableBitmap> writer = new GenericIndexedWriter<>(ioPeon, dimension, bitmapSerdeFactory.getObjectStrategy()); writer.open(); boolean isSpatialDim = columnCapabilities.get(dimension).hasSpatialIndexes(); ByteBufferWriter<ImmutableRTree> spatialWriter = null; RTree tree = null; IOPeon spatialIoPeon = new TmpFileIOPeon(); if (isSpatialDim) { BitmapFactory bitmapFactory = bitmapSerdeFactory.getBitmapFactory(); spatialWriter = new ByteBufferWriter<ImmutableRTree>(spatialIoPeon, dimension, new IndexedRTree.ImmutableRTreeObjectStrategy(bitmapFactory)); spatialWriter.open(); tree = new RTree(2, new LinearGutmanSplitStrategy(0, 50, bitmapFactory), bitmapFactory); } for (String dimVal : IndexedIterable.create(dimVals)) { progress.progress(); List<Iterable<Integer>> convertedInverteds = Lists.newArrayListWithCapacity(indexes.size()); for (int j = 0; j < indexes.size(); ++j) { convertedInverteds.add(new ConvertingIndexedInts( indexes.get(j).getBitmapIndex(dimension, dimVal), rowNumConversions.get(j))); } MutableBitmap bitset = bitmapSerdeFactory.getBitmapFactory().makeEmptyMutableBitmap(); for (Integer row : CombiningIterable.createSplatted(convertedInverteds, Ordering.<Integer>natural().nullsFirst())) { if (row != INVALID_ROW) { bitset.add(row); } } writer.write(bitmapSerdeFactory.getBitmapFactory().makeImmutableBitmap(bitset)); if (isSpatialDim && dimVal != null) { List<String> stringCoords = Lists.newArrayList(SPLITTER.split(dimVal)); float[] coords = new float[stringCoords.size()]; for (int j = 0; j < coords.length; j++) { coords[j] = Float.valueOf(stringCoords.get(j)); } tree.insert(coords, bitset); } } writer.close(); serializerUtils.writeString(out, dimension); ByteStreams.copy(writer.combineStreams(), out); ioPeon.cleanup(); log.info("Completed dimension[%s] in %,d millis.", dimension, System.currentTimeMillis() - dimStartTime); if (isSpatialDim) { spatialWriter.write(ImmutableRTree.newImmutableFromMutable(tree)); spatialWriter.close(); serializerUtils.writeString(spatialOut, dimension); ByteStreams.copy(spatialWriter.combineStreams(), spatialOut); spatialIoPeon.cleanup(); } } log.info("outDir[%s] completed inverted.drd in %,d millis.", v8OutDir, System.currentTimeMillis() - startTime); final ArrayList<String> expectedFiles = Lists.newArrayList(Iterables.concat( Arrays.asList("index.drd", "inverted.drd", "spatial.drd", String.format("time_%s.drd", IndexIO.BYTE_ORDER)), Iterables.transform(mergedDimensions, GuavaUtils.formatFunction("dim_%s.drd")), Iterables.transform(mergedMetrics, GuavaUtils.formatFunction(String.format("met_%%s_%s.drd", IndexIO.BYTE_ORDER))))); if (segmentMetadata != null && !segmentMetadata.isEmpty()) { writeMetadataToFile(new File(v8OutDir, "metadata.drd"), segmentMetadata); log.info("wrote metadata.drd in outDir[%s].", v8OutDir); expectedFiles.add("metadata.drd"); } Map<String, File> files = Maps.newLinkedHashMap(); for (String fileName : expectedFiles) { files.put(fileName, new File(v8OutDir, fileName)); } File smooshDir = new File(v8OutDir, "smoosher"); smooshDir.mkdir(); for (Map.Entry<String, File> entry : Smoosh.smoosh(v8OutDir, smooshDir, files).entrySet()) { entry.getValue().delete(); } for (File file : smooshDir.listFiles()) { Files.move(file, new File(v8OutDir, file.getName())); } if (!smooshDir.delete()) { log.info("Unable to delete temporary dir[%s], contains[%s]", smooshDir, Arrays.asList(smooshDir.listFiles())); throw new IOException(String.format("Unable to delete temporary dir[%s]", smooshDir)); } createIndexDrdFile(IndexIO.V8_VERSION, v8OutDir, GenericIndexed.fromIterable(mergedDimensions, GenericIndexed.STRING_STRATEGY), GenericIndexed.fromIterable(mergedMetrics, GenericIndexed.STRING_STRATEGY), dataInterval, indexSpec.getBitmapSerdeFactory()); IndexIO.DefaultIndexIOHandler.convertV8toV9(v8OutDir, outDir, indexSpec); FileUtils.deleteDirectory(v8OutDir); return outDir; }