List of usage examples for org.joda.time Interval Interval
public Interval(Object interval)
From source file:com.metamx.druid.realtime.RealtimePlumberSchool.java
License:Open Source License
@Override public Plumber findPlumber(final Schema schema, final FireDepartmentMetrics metrics) { verifyState();/*ww w . j av a2s . c o m*/ initializeExecutors(); computeBaseDir(schema).mkdirs(); final Map<Long, Sink> sinks = Maps.newConcurrentMap(); for (File sinkDir : computeBaseDir(schema).listFiles()) { Interval sinkInterval = new Interval(sinkDir.getName().replace("_", "/")); final File[] sinkFiles = sinkDir.listFiles(); Arrays.sort(sinkFiles, new Comparator<File>() { @Override public int compare(File o1, File o2) { try { return Ints.compare(Integer.parseInt(o1.getName()), Integer.parseInt(o2.getName())); } catch (NumberFormatException e) { log.error(e, "Couldn't compare as numbers? [%s][%s]", o1, o2); return o1.compareTo(o2); } } }); try { List<FireHydrant> hydrants = Lists.newArrayList(); for (File segmentDir : sinkFiles) { log.info("Loading previously persisted segment at [%s]", segmentDir); hydrants.add(new FireHydrant(new QueryableIndexSegment(null, IndexIO.loadIndex(segmentDir)), Integer.parseInt(segmentDir.getName()))); } Sink currSink = new Sink(sinkInterval, schema, hydrants); sinks.put(sinkInterval.getStartMillis(), currSink); metadataUpdater.announceSegment(currSink.getSegment()); } catch (IOException e) { log.makeAlert(e, "Problem loading sink[%s] from disk.", schema.getDataSource()) .addData("interval", sinkInterval).emit(); } } serverView.registerSegmentCallback(persistExecutor, new ServerView.BaseSegmentCallback() { @Override public ServerView.CallbackAction segmentAdded(DruidServer server, DataSegment segment) { if ("realtime".equals(server.getType())) { return ServerView.CallbackAction.CONTINUE; } log.debug("Checking segment[%s] on server[%s]", segment, server); if (schema.getDataSource().equals(segment.getDataSource())) { final Interval interval = segment.getInterval(); for (Map.Entry<Long, Sink> entry : sinks.entrySet()) { final Long sinkKey = entry.getKey(); if (interval.contains(sinkKey)) { final Sink sink = entry.getValue(); log.info("Segment matches sink[%s]", sink); if (segment.getVersion().compareTo(sink.getSegment().getVersion()) >= 0) { try { metadataUpdater.unannounceSegment(sink.getSegment()); FileUtils.deleteDirectory(computePersistDir(schema, sink.getInterval())); sinks.remove(sinkKey); } catch (IOException e) { log.makeAlert(e, "Unable to delete old segment for dataSource[%s].", schema.getDataSource()).addData("interval", sink.getInterval()).emit(); } } } } } return ServerView.CallbackAction.CONTINUE; } }); final long truncatedNow = segmentGranularity.truncate(new DateTime()).getMillis(); final long windowMillis = windowPeriod.toStandardDuration().getMillis(); final RejectionPolicy rejectionPolicy = rejectionPolicyFactory.create(windowPeriod); log.info("Creating plumber using rejectionPolicy[%s]", rejectionPolicy); log.info("Expect to run at [%s]", new DateTime().plus(new Duration(System.currentTimeMillis(), segmentGranularity.increment(truncatedNow) + windowMillis))); ScheduledExecutors.scheduleAtFixedRate(scheduledExecutor, new Duration(System.currentTimeMillis(), segmentGranularity.increment(truncatedNow) + windowMillis), new Duration(truncatedNow, segmentGranularity.increment(truncatedNow)), new ThreadRenamingRunnable(String.format("%s-overseer", schema.getDataSource())) { @Override public void doRun() { log.info("Starting merge and push."); long minTimestamp = segmentGranularity.truncate(rejectionPolicy.getCurrMaxTime()) .getMillis() - windowMillis; List<Map.Entry<Long, Sink>> sinksToPush = Lists.newArrayList(); for (Map.Entry<Long, Sink> entry : sinks.entrySet()) { final Long intervalStart = entry.getKey(); if (intervalStart < minTimestamp) { log.info("Adding entry[%s] for merge and push.", entry); sinksToPush.add(entry); } } for (final Map.Entry<Long, Sink> entry : sinksToPush) { final Sink sink = entry.getValue(); final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(entry.getKey())); persistExecutor.execute(new ThreadRenamingRunnable(threadName) { @Override public void doRun() { final Interval interval = sink.getInterval(); for (FireHydrant hydrant : sink) { if (!hydrant.hasSwapped()) { log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink); final int rowCount = persistHydrant(hydrant, schema, interval); metrics.incrementRowOutputCount(rowCount); } } final File mergedFile; try { List<QueryableIndex> indexes = Lists.newArrayList(); for (FireHydrant fireHydrant : sink) { Segment segment = fireHydrant.getSegment(); final QueryableIndex queryableIndex = segment.asQueryableIndex(); log.info("Adding hydrant[%s]", fireHydrant); indexes.add(queryableIndex); } mergedFile = IndexMerger.mergeQueryableIndex(indexes, schema.getAggregators(), new File(computePersistDir(schema, interval), "merged")); QueryableIndex index = IndexIO.loadIndex(mergedFile); DataSegment segment = segmentPusher.push(mergedFile, sink.getSegment().withDimensions( Lists.newArrayList(index.getAvailableDimensions()))); metadataUpdater.publishSegment(segment); } catch (IOException e) { log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit(); } } }); } } }); return new Plumber() { @Override public Sink getSink(long timestamp) { if (!rejectionPolicy.accept(timestamp)) { return null; } final long truncatedTime = segmentGranularity.truncate(timestamp); Sink retVal = sinks.get(truncatedTime); if (retVal == null) { retVal = new Sink(new Interval(new DateTime(truncatedTime), segmentGranularity.increment(new DateTime(truncatedTime))), schema); try { metadataUpdater.announceSegment(retVal.getSegment()); sinks.put(truncatedTime, retVal); } catch (IOException e) { log.makeAlert(e, "Failed to announce new segment[%s]", schema.getDataSource()) .addData("interval", retVal.getInterval()).emit(); } } return retVal; } @Override public <T> QueryRunner<T> getQueryRunner(final Query<T> query) { final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query); final Function<Query<T>, ServiceMetricEvent.Builder> builderFn = new Function<Query<T>, ServiceMetricEvent.Builder>() { private final QueryToolChest<T, Query<T>> toolchest = factory.getToolchest(); @Override public ServiceMetricEvent.Builder apply(@Nullable Query<T> input) { return toolchest.makeMetricBuilder(query); } }; return factory.mergeRunners(EXEC, FunctionalIterable.create(sinks.values()).transform(new Function<Sink, QueryRunner<T>>() { @Override public QueryRunner<T> apply(@Nullable Sink input) { return new MetricsEmittingQueryRunner<T>(emitter, builderFn, factory.mergeRunners( EXEC, Iterables.transform(input, new Function<FireHydrant, QueryRunner<T>>() { @Override public QueryRunner<T> apply(@Nullable FireHydrant input) { return factory.createRunner(input.getSegment()); } }))); } })); } @Override public void persist(final Runnable commitRunnable) { final List<Pair<FireHydrant, Interval>> indexesToPersist = Lists.newArrayList(); for (Sink sink : sinks.values()) { if (sink.swappable()) { indexesToPersist.add(Pair.of(sink.swap(), sink.getInterval())); } } log.info("Submitting persist runnable for dataSource[%s]", schema.getDataSource()); persistExecutor.execute(new ThreadRenamingRunnable( String.format("%s-incremental-persist", schema.getDataSource())) { @Override public void doRun() { for (Pair<FireHydrant, Interval> pair : indexesToPersist) { metrics.incrementRowOutputCount(persistHydrant(pair.lhs, schema, pair.rhs)); } commitRunnable.run(); } }); } @Override public void finishJob() { throw new UnsupportedOperationException(); } }; }
From source file:com.metamx.druid.utils.ExposeS3DataSource.java
License:Open Source License
public static void main(String[] args) throws ServiceException, IOException, NoSuchAlgorithmException { CLI cli = new CLI(); cli.addOption(new RequiredOption(null, "s3Bucket", true, "s3 bucket to pull data from")); cli.addOption(new RequiredOption(null, "s3Path", true, "base input path in s3 bucket. Everything until the date strings.")); cli.addOption(new RequiredOption(null, "timeInterval", true, "ISO8601 interval of dates to index")); cli.addOption(new RequiredOption(null, "granularity", true, String.format( "granularity of index, supported granularities: [%s]", Arrays.asList(Granularity.values())))); cli.addOption(new RequiredOption(null, "zkCluster", true, "Cluster string to connect to ZK with.")); cli.addOption(new RequiredOption(null, "zkBasePath", true, "The base path to register index changes to.")); CommandLine commandLine = cli.parse(args); if (commandLine == null) { return;/* w ww . ja va 2 s . c o m*/ } String s3Bucket = commandLine.getOptionValue("s3Bucket"); String s3Path = commandLine.getOptionValue("s3Path"); String timeIntervalString = commandLine.getOptionValue("timeInterval"); String granularity = commandLine.getOptionValue("granularity"); String zkCluster = commandLine.getOptionValue("zkCluster"); String zkBasePath = commandLine.getOptionValue("zkBasePath"); Interval timeInterval = new Interval(timeIntervalString); Granularity gran = Granularity.valueOf(granularity.toUpperCase()); final RestS3Service s3Client = new RestS3Service(new AWSCredentials( System.getProperty("com.metamx.aws.accessKey"), System.getProperty("com.metamx.aws.secretKey"))); ZkClient zkClient = new ZkClient(new ZkConnection(zkCluster), Integer.MAX_VALUE, new StringZkSerializer()); zkClient.waitUntilConnected(); for (Interval interval : gran.getIterable(timeInterval)) { log.info("Processing interval[%s]", interval); String s3DatePath = JOINER.join(s3Path, gran.toPath(interval.getStart())); if (!s3DatePath.endsWith("/")) { s3DatePath += "/"; } StorageObjectsChunk chunk = s3Client.listObjectsChunked(s3Bucket, s3DatePath, "/", 2000, null, true); TreeSet<String> commonPrefixes = Sets.newTreeSet(); commonPrefixes.addAll(Arrays.asList(chunk.getCommonPrefixes())); if (commonPrefixes.isEmpty()) { log.info("Nothing at s3://%s/%s", s3Bucket, s3DatePath); continue; } String latestPrefix = commonPrefixes.last(); log.info("Latest segments at [s3://%s/%s]", s3Bucket, latestPrefix); chunk = s3Client.listObjectsChunked(s3Bucket, latestPrefix, "/", 2000, null, true); Integer partitionNumber; if (chunk.getCommonPrefixes().length == 0) { partitionNumber = null; } else { partitionNumber = -1; for (String partitionPrefix : chunk.getCommonPrefixes()) { String[] splits = partitionPrefix.split("/"); partitionNumber = Math.max(partitionNumber, Integer.parseInt(splits[splits.length - 1])); } } log.info("Highest segment partition[%,d]", partitionNumber); if (partitionNumber == null) { final S3Object s3Obj = new S3Object(new S3Bucket(s3Bucket), String.format("%sdescriptor.json", latestPrefix)); updateWithS3Object(zkBasePath, s3Client, zkClient, s3Obj); } else { for (int i = partitionNumber; i >= 0; --i) { final S3Object partitionObject = new S3Object(new S3Bucket(s3Bucket), String.format("%s%s/descriptor.json", latestPrefix, i)); updateWithS3Object(zkBasePath, s3Client, zkClient, partitionObject); } } } }
From source file:com.yahoo.bard.webservice.web.responseprocessors.DruidPartialDataResponseProcessor.java
License:Apache License
/** * Returns the overlap between uncoveredIntervals from Druid and missing intervals that Fili expects. * * @param json The JSON node that contains the uncoveredIntervals from Druid, for example * <pre>/*from w w w .j a v a2s .com*/ * {@code * X-Druid-Response-Context: { * "uncoveredIntervals": [ * "2016-11-22T00:00:00.000Z/2016-12-18T00:00:00.000Z","2016-12-25T00:00:00.000Z/2017- * 01-03T00:00:00.000Z","2017-01-31T00:00:00.000Z/2017-02-01T00:00:00.000Z","2017-02- * 08T00:00:00.000Z/2017-02-09T00:00:00.000Z","2017-02-10T00:00:00.000Z/2017-02- * 13T00:00:00.000Z","2017-02-16T00:00:00.000Z/2017-02-20T00:00:00.000Z","2017-02- * 22T00:00:00.000Z/2017-02-25T00:00:00.000Z","2017-02-26T00:00:00.000Z/2017-03- * 01T00:00:00.000Z","2017-03-04T00:00:00.000Z/2017-03-05T00:00:00.000Z","2017-03- * 08T00:00:00.000Z/2017-03-09T00:00:00.000Z" * ], * "uncoveredIntervalsOverflowed": true * } * } * </pre> * @param query The Druid query that contains the missing intervals that Fili expects * * @return the overlap between uncoveredIntervals from Druid and missing intervals that Fili expects. */ private SimplifiedIntervalList getOverlap(JsonNode json, DruidAggregationQuery<?> query) { List<Interval> intervals = new ArrayList<>(); for (JsonNode jsonNode : json.get(DruidJsonResponseContentKeys.DRUID_RESPONSE_CONTEXT.getName()) .get(DruidJsonResponseContentKeys.UNCOVERED_INTERVALS.getName())) { intervals.add(new Interval(jsonNode.asText())); } SimplifiedIntervalList druidIntervals = new SimplifiedIntervalList(intervals); return druidIntervals.intersect(query.getDataSource().getPhysicalTable().getAvailableIntervals()); }
From source file:com.yahoo.druid.hadoop.DruidInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); final String overlordUrl = conf.get(CONF_DRUID_OVERLORD_HOSTPORT); final String storageDir = conf.get(CONF_DRUID_STORAGE_STORAGE_DIR); String dataSource = conf.get(CONF_DRUID_DATASOURCE); String intervalStr = conf.get(CONF_DRUID_INTERVAL); logger.info("druid overlord url = " + overlordUrl); logger.info("druid storage dir = " + storageDir); logger.info("druid datasource = " + dataSource); logger.info("druid datasource interval = " + intervalStr); //TODO: currently we are creating 1 split per segment which is not really //necessary, we can use some configuration to combine multiple segments into //one input split List<InputSplit> splits = Lists.transform( druid.getSegmentPathsToLoad(dataSource, new Interval(intervalStr), storageDir, overlordUrl), new Function<String, InputSplit>() { @Override/* www . j a va 2 s .c om*/ public InputSplit apply(String input) { return new DruidInputSplit(input); } }); logger.info("Number of splits = " + splits.size()); return splits; }
From source file:com.yahoo.druid.hadoop.DruidRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { ObjectMapper jsonMapper = DruidInitialization.getInstance().getObjectMapper(); SegmentLoadSpec spec = readSegmentJobSpec(context.getConfiguration(), jsonMapper); final List<String> dimensions = spec.getDimensions(); final List<String> metrics = spec.getMetrics(); final DimFilter filter = spec.getFilter(); final Interval interval = new Interval( context.getConfiguration().get(DruidInputFormat.CONF_DRUID_INTERVAL)); String hdfsPath = ((DruidInputSplit) split).getPath(); logger.info("Reading segment from " + hdfsPath); segmentDir = Files.createTempDir(); logger.info("segment dir: " + segmentDir); FileSystem fs = FileSystem.get(context.getConfiguration()); getSegmentFiles(hdfsPath, segmentDir, fs); logger.info("finished getting segment files"); QueryableIndex index = IndexIO.loadIndex(segmentDir); StorageAdapter adapter = new QueryableIndexStorageAdapter(index); List<StorageAdapter> adapters = Lists.newArrayList(adapter); rowYielder = new IngestSegmentFirehose(adapters, dimensions, metrics, filter, interval, QueryGranularity.NONE);// w ww .j av a 2s . c o m }
From source file:gsonjodatime.IntervalConverter.java
License:Open Source License
/** * Gson invokes this call-back method during deserialization when it encounters a field of the * specified type. <p>// w w w . ja va2 s.co m * * In the implementation of this call-back method, you should consider invoking * {@link JsonDeserializationContext#deserialize(JsonElement, Type)} method to create objects * for any non-trivial field of the returned object. However, you should never invoke it on the * the same type passing {@code json} since that will cause an infinite loop (Gson will call your * call-back method again). * @param json The Json data being deserialized * @param typeOfT The type of the Object to deserialize to * @return a deserialized object of the specified type typeOfT which is a subclass of {@code T} * @throws JsonParseException if json is not in the expected format of {@code typeOfT} */ @Override public Interval deserialize(JsonElement json, Type typeOfT, JsonDeserializationContext context) throws JsonParseException { // Do not try to deserialize null or empty values if (json.getAsString() == null || json.getAsString().isEmpty()) { return null; } return new Interval(json.getAsString()); }
From source file:io.druid.db.DatabaseSegmentManager.java
License:Open Source License
public boolean enableDatasource(final String ds) { try {/*from w w w. ja v a2 s .c om*/ VersionedIntervalTimeline<String, DataSegment> segmentTimeline = dbi .withHandle(new HandleCallback<VersionedIntervalTimeline<String, DataSegment>>() { @Override public VersionedIntervalTimeline<String, DataSegment> withHandle(Handle handle) throws Exception { return handle .createQuery( String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())) .bind("dataSource", ds) .fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, Map<String, Object>>() { @Override public VersionedIntervalTimeline<String, DataSegment> fold( VersionedIntervalTimeline<String, DataSegment> timeline, Map<String, Object> stringObjectMap, FoldController foldController, StatementContext statementContext) throws SQLException { try { DataSegment segment = jsonMapper.readValue( (String) stringObjectMap.get("payload"), DataSegment.class); timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); return timeline; } catch (Exception e) { throw new SQLException(e.toString()); } } }); } }); final List<DataSegment> segments = Lists.newArrayList(); for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline .lookup(new Interval("0000-01-01/3000-01-01"))) { for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) { segments.add(partitionChunk.getObject()); } } if (segments.isEmpty()) { log.warn("No segments found in the database!"); return false; } dbi.withHandle(new HandleCallback<Void>() { @Override public Void withHandle(Handle handle) throws Exception { Batch batch = handle.createBatch(); for (DataSegment segment : segments) { batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier())); } batch.execute(); return null; } }); } catch (Exception e) { log.error(e, "Exception enabling datasource %s", ds); return false; } return true; }
From source file:io.druid.indexing.common.task.AppendTask.java
License:Apache License
@Override public File merge(final Map<DataSegment, File> segments, final File outDir) throws Exception { VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<String, DataSegment>( Ordering.<String>natural().nullsFirst()); for (DataSegment segment : segments.keySet()) { timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); }/*from w w w . j a va 2 s . c om*/ final Iterable<SegmentToMergeHolder> segmentsToMerge = Iterables .concat(Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<String, DataSegment>, Iterable<SegmentToMergeHolder>>() { @Override public Iterable<SegmentToMergeHolder> apply( final TimelineObjectHolder<String, DataSegment> input) { return Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, SegmentToMergeHolder>() { @Nullable @Override public SegmentToMergeHolder apply( PartitionChunk<DataSegment> chunkInput) { DataSegment segment = chunkInput.getObject(); return new SegmentToMergeHolder(segment, input.getInterval(), Preconditions.checkNotNull(segments.get(segment), "File for segment %s", segment.getIdentifier())); } }); } })); List<IndexableAdapter> adapters = Lists.newArrayList(); for (final SegmentToMergeHolder holder : segmentsToMerge) { adapters.add(new RowboatFilteringIndexAdapter( new QueryableIndexIndexableAdapter(IndexIO.loadIndex(holder.getFile())), new Predicate<Rowboat>() { @Override public boolean apply(Rowboat input) { return holder.getInterval().contains(input.getTimestamp()); } })); } return IndexMerger.append(adapters, outDir, indexSpec); }
From source file:io.druid.indexing.overlord.http.OverlordResource.java
License:Apache License
@GET @Path("/worker/history") @Produces(MediaType.APPLICATION_JSON)/*w w w. jav a2 s. c o m*/ public Response getWorkerConfigHistory(@QueryParam("interval") final String interval) { Interval theInterval = interval == null ? null : new Interval(interval); return Response.ok(auditManager.fetchAuditHistory(WorkerBehaviorConfig.CONFIG_KEY, WorkerBehaviorConfig.CONFIG_KEY, theInterval)).build(); }
From source file:io.druid.metadata.SQLMetadataSegmentManager.java
License:Apache License
@Override public boolean enableDatasource(final String ds) { try {/* www. j av a 2s . c o m*/ VersionedIntervalTimeline<String, DataSegment> segmentTimeline = dbi .withHandle(new HandleCallback<VersionedIntervalTimeline<String, DataSegment>>() { @Override public VersionedIntervalTimeline<String, DataSegment> withHandle(Handle handle) throws Exception { return handle .createQuery( String.format("SELECT payload FROM %s WHERE dataSource = :dataSource", getSegmentsTable())) .bind("dataSource", ds).map(ByteArrayMapper.FIRST) .fold(new VersionedIntervalTimeline<String, DataSegment>(Ordering.natural()), new Folder3<VersionedIntervalTimeline<String, DataSegment>, byte[]>() { @Override public VersionedIntervalTimeline<String, DataSegment> fold( VersionedIntervalTimeline<String, DataSegment> timeline, byte[] payload, FoldController foldController, StatementContext statementContext) throws SQLException { try { DataSegment segment = jsonMapper.readValue(payload, DataSegment.class); timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment)); return timeline; } catch (Exception e) { throw new SQLException(e.toString()); } } }); } }); final List<DataSegment> segments = Lists.newArrayList(); for (TimelineObjectHolder<String, DataSegment> objectHolder : segmentTimeline .lookup(new Interval("0000-01-01/3000-01-01"))) { for (PartitionChunk<DataSegment> partitionChunk : objectHolder.getObject()) { segments.add(partitionChunk.getObject()); } } if (segments.isEmpty()) { log.warn("No segments found in the database!"); return false; } dbi.withHandle(new HandleCallback<Void>() { @Override public Void withHandle(Handle handle) throws Exception { Batch batch = handle.createBatch(); for (DataSegment segment : segments) { batch.add(String.format("UPDATE %s SET used=true WHERE id = '%s'", getSegmentsTable(), segment.getIdentifier())); } batch.execute(); return null; } }); } catch (Exception e) { log.error(e, "Exception enabling datasource %s", ds); return false; } return true; }