Example usage for org.joda.time DateTimeZone UTC

List of usage examples for org.joda.time DateTimeZone UTC

Introduction

In this page you can find the example usage for org.joda.time DateTimeZone UTC.

Prototype

DateTimeZone UTC

To view the source code for org.joda.time DateTimeZone UTC.

Click Source Link

Document

The time zone for Universal Coordinated Time

Usage

From source file:com.knewton.mapreduce.StudentEventAbstractMapper.java

License:Apache License

/**
 * Checks to see if the event is in the desired time range.
 *
 * @param eventId//  w  w  w.  j  a v  a2  s. c o  m
 * @param context
 * @return
 */
private boolean isInTimeRange(long eventId, Context context) {
    DateTime eventTime = new DateTime(eventId).withZone(DateTimeZone.UTC);
    // Skip events outside of the desired time range.
    if (timeRange != null && !timeRange.contains(eventTime)) {
        context.getCounter(CounterConstants.STUDENT_EVENTS_JOB, CounterConstants.STUDENT_EVENTS_SKIPPED)
                .increment(1);
        return false;
    }
    return true;
}

From source file:com.knewton.mapreduce.StudentEventAbstractMapper.java

License:Apache License

/**
 * Sets up a DateTime interval for excluding student events. When start time is not set then it
 * defaults to the beginning of time. If end date is not specified then it defaults to
 * "the end of time"./*from  ww w.  j  a v  a  2 s . co m*/
 *
 * @param conf
 */
private void setupTimeRange(Configuration conf) {
    DateTimeFormatter dtf = DateTimeFormat.forPattern(DATE_TIME_STRING_FORMAT).withZoneUTC();
    String startDateStr = conf.get(START_DATE_PARAMETER_NAME);
    String endDateStr = conf.get(END_DATE_PARAMETER_NAME);
    // No need to instantiate timeRange.
    if (startDateStr == null && endDateStr == null) {
        return;
    }
    DateTime startDate;
    if (startDateStr != null) {
        startDate = dtf.parseDateTime(startDateStr);
    } else {
        startDate = new DateTime(Long.MIN_VALUE + ONE_DAY_IN_MILLIS).withZone(DateTimeZone.UTC);
    }
    DateTime endDate;
    if (endDateStr != null) {
        endDate = dtf.parseDateTime(endDateStr);
    } else {
        endDate = new DateTime(Long.MAX_VALUE - ONE_DAY_IN_MILLIS).withZone(DateTimeZone.UTC);
    }
    this.timeRange = new Interval(startDate, endDate);
}

From source file:com.kubaspatny.nuntius.service.ShortMessageService.java

License:Apache License

@Override
public Long add(String messageBody) {

    DateTime timestamp = DateTime.now(DateTimeZone.UTC);
    ShortMessage message = new ShortMessage.ShortMessageBuilder().setMessageBody(messageBody)
            .setMessageTimestamp(timestamp).build();
    return genericDao.saveOrUpdate(message).getId();
}

From source file:com.linkedin.pinot.common.utils.LLCSegmentName.java

License:Apache License

public LLCSegmentName(String tableName, int partitionId, int sequenceNumber, long msSinceEpoch) {
    if (!isValidComponentName(tableName)) {
        throw new RuntimeException("Invalid table name " + tableName);
    }//w  ww .  j ava 2  s  . c o m
    _tableName = tableName;
    _partitionId = partitionId;
    _sequenceNumber = sequenceNumber;
    // ISO8601 date: 20160120T1234Z
    DateTime dateTime = new DateTime(msSinceEpoch, DateTimeZone.UTC);
    _creationTime = dateTime.toString("yyyyMMdd'T'HHmm'Z'");
    _segmentName = tableName + SEPARATOR + partitionId + SEPARATOR + sequenceNumber + SEPARATOR + _creationTime;
}

From source file:com.linkedin.pinot.core.data.manager.realtime.HLRealtimeSegmentDataManager.java

License:Apache License

public HLRealtimeSegmentDataManager(final RealtimeSegmentZKMetadata realtimeSegmentZKMetadata,
        final TableConfig tableConfig, InstanceZKMetadata instanceMetadata,
        final RealtimeTableDataManager realtimeTableDataManager, final String resourceDataDir,
        final IndexLoadingConfig indexLoadingConfig, final Schema schema, final ServerMetrics serverMetrics)
        throws Exception {
    super();/*from w  w w  . j  a  v  a 2  s  .  c o m*/
    _segmentVersion = indexLoadingConfig.getSegmentVersion();
    this.schema = schema;
    this.extractor = FieldExtractorFactory.getPlainFieldExtractor(schema);
    this.serverMetrics = serverMetrics;
    this.segmentName = realtimeSegmentZKMetadata.getSegmentName();
    this.tableName = tableConfig.getTableName();

    List<String> sortedColumns = indexLoadingConfig.getSortedColumns();
    if (sortedColumns.isEmpty()) {
        LOGGER.info("RealtimeDataResourceZKMetadata contains no information about sorted column for segment {}",
                segmentName);
        this.sortedColumn = null;
    } else {
        String firstSortedColumn = sortedColumns.get(0);
        if (this.schema.hasColumn(firstSortedColumn)) {
            LOGGER.info("Setting sorted column name: {} from RealtimeDataResourceZKMetadata for segment {}",
                    firstSortedColumn, segmentName);
            this.sortedColumn = firstSortedColumn;
        } else {
            LOGGER.warn(
                    "Sorted column name: {} from RealtimeDataResourceZKMetadata is not existed in schema for segment {}.",
                    firstSortedColumn, segmentName);
            this.sortedColumn = null;
        }
    }

    // Inverted index columns
    Set<String> invertedIndexColumns = indexLoadingConfig.getInvertedIndexColumns();
    // We need to add sorted column into inverted index columns because when we convert realtime in memory segment into
    // offline segment, we use sorted column's inverted index to maintain the order of the records so that the records
    // are sorted on the sorted column.
    if (sortedColumn != null) {
        invertedIndexColumns.add(sortedColumn);
    }
    this.invertedIndexColumns = new ArrayList<>(invertedIndexColumns);

    this.segmentMetatdaZk = realtimeSegmentZKMetadata;

    // No DictionaryColumns
    noDictionaryColumns = new ArrayList<>(indexLoadingConfig.getNoDictionaryColumns());

    // create and init stream provider config
    // TODO : ideally resourceMetatda should create and give back a streamProviderConfig
    this.kafkaStreamProviderConfig = new KafkaHighLevelStreamProviderConfig();
    this.kafkaStreamProviderConfig.init(tableConfig, instanceMetadata, schema);
    segmentLogger = LoggerFactory.getLogger(HLRealtimeSegmentDataManager.class.getName() + "_" + segmentName
            + "_" + kafkaStreamProviderConfig.getStreamName());
    segmentLogger.info("Created segment data manager with Sorted column:{}, invertedIndexColumns:{}",
            sortedColumn, this.invertedIndexColumns);

    segmentEndTimeThreshold = start + kafkaStreamProviderConfig.getTimeThresholdToFlushSegment();

    this.resourceDir = new File(resourceDataDir);
    this.resourceTmpDir = new File(resourceDataDir, "_tmp");
    if (!resourceTmpDir.exists()) {
        resourceTmpDir.mkdirs();
    }
    // create and init stream provider
    final String tableName = tableConfig.getTableName();
    this.kafkaStreamProvider = StreamProviderFactory.buildStreamProvider();
    this.kafkaStreamProvider.init(kafkaStreamProviderConfig, tableName, serverMetrics);
    this.kafkaStreamProvider.start();
    this.tableStreamName = tableName + "_" + kafkaStreamProviderConfig.getStreamName();

    IndexingConfig indexingConfig = tableConfig.getIndexingConfig();
    if (indexingConfig != null && indexingConfig.getAggregateMetrics()) {
        LOGGER.warn("Updating of metrics only supported for LLC consumer, ignoring.");
    }

    // lets create a new realtime segment
    segmentLogger.info("Started kafka stream provider");
    final int capacity = kafkaStreamProviderConfig.getSizeThresholdToFlushSegment();
    RealtimeSegmentConfig realtimeSegmentConfig = new RealtimeSegmentConfig.Builder()
            .setSegmentName(segmentName).setStreamName(kafkaStreamProviderConfig.getStreamName())
            .setSchema(schema).setCapacity(capacity)
            .setAvgNumMultiValues(indexLoadingConfig.getRealtimeAvgMultiValueCount())
            .setNoDictionaryColumns(indexLoadingConfig.getNoDictionaryColumns())
            .setInvertedIndexColumns(invertedIndexColumns)
            .setRealtimeSegmentZKMetadata(realtimeSegmentZKMetadata)
            .setOffHeap(indexLoadingConfig.isRealtimeOffheapAllocation())
            .setMemoryManager(getMemoryManager(realtimeTableDataManager.getConsumerDir(), segmentName,
                    indexLoadingConfig.isRealtimeOffheapAllocation(),
                    indexLoadingConfig.isDirectRealtimeOffheapAllocation(), serverMetrics))
            .setStatsHistory(realtimeTableDataManager.getStatsHistory()).build();
    realtimeSegment = new RealtimeSegmentImpl(realtimeSegmentConfig);

    notifier = realtimeTableDataManager;

    LOGGER.info("Starting consumption on realtime consuming segment {} maxRowCount {} maxEndTime {}",
            segmentName, capacity, new DateTime(segmentEndTimeThreshold, DateTimeZone.UTC).toString());
    segmentStatusTask = new TimerTask() {
        @Override
        public void run() {
            computeKeepIndexing();
        }
    };

    // start the indexing thread
    indexingThread = new Thread(new Runnable() {
        @Override
        public void run() {
            // continue indexing until criteria is met
            boolean notFull = true;
            long exceptionSleepMillis = 50L;
            segmentLogger.info("Starting to collect rows");

            do {
                GenericRow readRow = null;
                GenericRow transformedRow = null;
                GenericRow row = null;
                try {
                    readRow = GenericRow.createOrReuseRow(readRow);
                    readRow = kafkaStreamProvider.next(readRow);
                    row = readRow;

                    if (readRow != null) {
                        transformedRow = GenericRow.createOrReuseRow(transformedRow);
                        transformedRow = extractor.transform(readRow, transformedRow);
                        row = transformedRow;
                        notFull = realtimeSegment.index(transformedRow);
                        exceptionSleepMillis = 50L;
                    }
                } catch (Exception e) {
                    segmentLogger.warn(
                            "Caught exception while indexing row, sleeping for {} ms, row contents {}",
                            exceptionSleepMillis, row, e);

                    // Sleep for a short time as to avoid filling the logs with exceptions too quickly
                    Uninterruptibles.sleepUninterruptibly(exceptionSleepMillis, TimeUnit.MILLISECONDS);
                    exceptionSleepMillis = Math.min(60000L, exceptionSleepMillis * 2);
                } catch (Error e) {
                    segmentLogger.error("Caught error in indexing thread", e);
                    throw e;
                }
            } while (notFull && keepIndexing && (!isShuttingDown));

            if (isShuttingDown) {
                segmentLogger.info("Shutting down indexing thread!");
                return;
            }
            try {
                int numErrors, numConversions, numNulls, numNullCols;
                if ((numErrors = extractor.getTotalErrors()) > 0) {
                    serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_WITH_ERRORS,
                            (long) numErrors);
                }
                Map<String, Integer> errorCount = extractor.getErrorCount();
                for (String column : errorCount.keySet()) {
                    if ((numErrors = errorCount.get(column)) > 0) {
                        segmentLogger.warn("Column {} had {} rows with errors", column, numErrors);
                    }
                }
                if ((numConversions = extractor.getTotalConversions()) > 0) {
                    serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_NEEDING_CONVERSIONS,
                            (long) numConversions);
                    segmentLogger.info("{} rows needed conversions ", numConversions);
                }
                if ((numNulls = extractor.getTotalNulls()) > 0) {
                    serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.ROWS_WITH_NULL_VALUES,
                            (long) numNulls);
                    segmentLogger.info("{} rows had null columns", numNulls);
                }
                if ((numNullCols = extractor.getTotalNullCols()) > 0) {
                    serverMetrics.addMeteredTableValue(tableStreamName, ServerMeter.COLUMNS_WITH_NULL_VALUES,
                            (long) numNullCols);
                    segmentLogger.info("{} columns had null values", numNullCols);
                }
                segmentLogger.info("Indexing threshold reached, proceeding with index conversion");
                // kill the timer first
                segmentStatusTask.cancel();
                updateCurrentDocumentCountMetrics();
                segmentLogger.info("Indexed {} raw events", realtimeSegment.getNumDocsIndexed());
                File tempSegmentFolder = new File(resourceTmpDir,
                        "tmp-" + String.valueOf(System.currentTimeMillis()));

                // lets convert the segment now
                RealtimeSegmentConverter converter = new RealtimeSegmentConverter(realtimeSegment,
                        tempSegmentFolder.getAbsolutePath(), schema, realtimeSegmentZKMetadata.getTableName(),
                        realtimeSegmentZKMetadata.getSegmentName(), sortedColumn,
                        HLRealtimeSegmentDataManager.this.invertedIndexColumns, noDictionaryColumns,
                        null/*StarTreeIndexSpec*/); // Star tree not supported for HLC.

                segmentLogger.info("Trying to build segment");
                final long buildStartTime = System.nanoTime();
                converter.build(_segmentVersion, serverMetrics);
                final long buildEndTime = System.nanoTime();
                segmentLogger.info("Built segment in {} ms",
                        TimeUnit.MILLISECONDS.convert((buildEndTime - buildStartTime), TimeUnit.NANOSECONDS));
                File destDir = new File(resourceDataDir, realtimeSegmentZKMetadata.getSegmentName());
                FileUtils.deleteQuietly(destDir);
                FileUtils.moveDirectory(tempSegmentFolder.listFiles()[0], destDir);

                FileUtils.deleteQuietly(tempSegmentFolder);
                long segStartTime = realtimeSegment.getMinTime();
                long segEndTime = realtimeSegment.getMaxTime();

                TimeUnit timeUnit = schema.getTimeFieldSpec().getOutgoingGranularitySpec().getTimeType();
                IndexSegment segment = ColumnarSegmentLoader
                        .load(new File(resourceDir, segmentMetatdaZk.getSegmentName()), indexLoadingConfig);

                segmentLogger.info("Committing Kafka offsets");
                boolean commitSuccessful = false;
                try {
                    kafkaStreamProvider.commit();
                    commitSuccessful = true;
                    kafkaStreamProvider.shutdown();
                    segmentLogger.info("Successfully committed Kafka offsets, consumer release requested.");
                } catch (Throwable e) {
                    // If we got here, it means that either the commit or the shutdown failed. Considering that the
                    // KafkaConsumerManager delays shutdown and only adds the consumer to be released in a deferred way, this
                    // likely means that writing the Kafka offsets failed.
                    //
                    // The old logic (mark segment as done, then commit offsets and shutdown the consumer immediately) would die
                    // in a terrible way, leaving the consumer open and causing us to only get half the records from that point
                    // on. In this case, because we keep the consumer open for a little while, we should be okay if the
                    // controller reassigns us a new segment before the consumer gets released. Hopefully by the next time that
                    // we get to committing the offsets, the transient ZK failure that caused the write to fail will not
                    // happen again and everything will be good.
                    //
                    // Several things can happen:
                    // - The controller reassigns us a new segment before we release the consumer (KafkaConsumerManager will
                    //   keep the consumer open for about a minute, which should be enough time for the controller to reassign
                    //   us a new segment) and the next time we close the segment the offsets commit successfully; we're good.
                    // - The controller reassigns us a new segment, but after we released the consumer (if the controller was
                    //   down or there was a ZK failure on writing the Kafka offsets but not the Helix state). We lose whatever
                    //   data was in this segment. Not good.
                    // - The server crashes after this comment and before we mark the current segment as done; if the Kafka
                    //   offsets didn't get written, then when the server restarts it'll start consuming the current segment
                    //   from the previously committed offsets; we're good.
                    // - The server crashes after this comment, the Kafka offsets were written but the segment wasn't marked as
                    //   done in Helix, but we got a failure (or not) on the commit; we lose whatever data was in this segment
                    //   if we restart the server (not good). If we manually mark the segment as done in Helix by editing the
                    //   state in ZK, everything is good, we'll consume a new segment that starts from the correct offsets.
                    //
                    // This is still better than the previous logic, which would have these failure modes:
                    // - Consumer was left open and the controller reassigned us a new segment; consume only half the events
                    //   (because there are two consumers and Kafka will try to rebalance partitions between those two)
                    // - We got a segment assigned to us before we got around to committing the offsets, reconsume the data that
                    //   we got in this segment again, as we're starting consumption from the previously committed offset (eg.
                    //   duplicate data).
                    //
                    // This is still not very satisfactory, which is why this part is due for a redesign.
                    //
                    // Assuming you got here because the realtime offset commit metric has fired, check the logs to determine
                    // which of the above scenarios happened. If you're in one of the good scenarios, then there's nothing to
                    // do. If you're not, then based on how critical it is to get those rows back, then your options are:
                    // - Wipe the realtime table and reconsume everything (mark the replica as disabled so that clients don't
                    //   see query results from partially consumed data, then re-enable it when this replica has caught up)
                    // - Accept that those rows are gone in this replica and move on (they'll be replaced by good offline data
                    //   soon anyway)
                    // - If there's a replica that has consumed properly, you could shut it down, copy its segments onto this
                    //   replica, assign a new consumer group id to this replica, rename the copied segments and edit their
                    //   metadata to reflect the new consumer group id, copy the Kafka offsets from the shutdown replica onto
                    //   the new consumer group id and then restart both replicas. This should get you the missing rows.

                    segmentLogger.error(
                            "FATAL: Exception committing or shutting down consumer commitSuccessful={}",
                            commitSuccessful, e);
                    serverMetrics.addMeteredTableValue(tableName, ServerMeter.REALTIME_OFFSET_COMMIT_EXCEPTIONS,
                            1L);
                    if (!commitSuccessful) {
                        kafkaStreamProvider.shutdown();
                    }
                }

                try {
                    segmentLogger.info("Marking current segment as completed in Helix");
                    RealtimeSegmentZKMetadata metadataToOverwrite = new RealtimeSegmentZKMetadata();
                    metadataToOverwrite.setTableName(realtimeSegmentZKMetadata.getTableName());
                    metadataToOverwrite.setSegmentName(realtimeSegmentZKMetadata.getSegmentName());
                    metadataToOverwrite.setSegmentType(SegmentType.OFFLINE);
                    metadataToOverwrite.setStatus(Status.DONE);
                    metadataToOverwrite.setStartTime(segStartTime);
                    metadataToOverwrite.setEndTime(segEndTime);
                    metadataToOverwrite.setTimeUnit(timeUnit);
                    metadataToOverwrite.setTotalRawDocs(realtimeSegment.getNumDocsIndexed());
                    notifier.notifySegmentCommitted(metadataToOverwrite, segment);
                    segmentLogger.info(
                            "Completed write of segment completion to Helix, waiting for controller to assign a new segment");
                } catch (Exception e) {
                    if (commitSuccessful) {
                        segmentLogger.error(
                                "Offsets were committed to Kafka but we were unable to mark this segment as completed in Helix. Manually mark the segment as completed in Helix; restarting this instance will result in data loss.",
                                e);
                    } else {
                        segmentLogger.warn(
                                "Caught exception while marking segment as completed in Helix. Offsets were not written, restarting the instance should be safe.",
                                e);
                    }
                }
            } catch (Exception e) {
                segmentLogger.error("Caught exception in the realtime indexing thread", e);
            }
        }
    });

    indexingThread.start();
    serverMetrics.addValueToTableGauge(tableName, ServerGauge.SEGMENT_COUNT, 1L);
    segmentLogger.debug("scheduling keepIndexing timer check");
    // start a schedule timer to keep track of the segment
    TimerService.timer.schedule(segmentStatusTask, ONE_MINUTE_IN_MILLSEC, ONE_MINUTE_IN_MILLSEC);
    segmentLogger.info("finished scheduling keepIndexing timer check");
}

From source file:com.linkedin.pinot.core.data.manager.realtime.LLRealtimeSegmentDataManager.java

License:Apache License

public LLRealtimeSegmentDataManager(RealtimeSegmentZKMetadata segmentZKMetadata, TableConfig tableConfig,
        InstanceZKMetadata instanceZKMetadata, RealtimeTableDataManager realtimeTableDataManager,
        String resourceDataDir, IndexLoadingConfig indexLoadingConfig, Schema schema,
        ServerMetrics serverMetrics) throws Exception {
    _segBuildSemaphore = realtimeTableDataManager.getSegmentBuildSemaphore();
    _segmentZKMetadata = (LLCRealtimeSegmentZKMetadata) segmentZKMetadata;
    _tableConfig = tableConfig;/*from   w  w  w .ja v a 2  s  .com*/
    _realtimeTableDataManager = realtimeTableDataManager;
    _resourceDataDir = resourceDataDir;
    _indexLoadingConfig = indexLoadingConfig;
    _schema = schema;
    _serverMetrics = serverMetrics;
    _segmentVersion = indexLoadingConfig.getSegmentVersion();
    _instanceId = _realtimeTableDataManager.getServerInstance();
    _leaseExtender = SegmentBuildTimeLeaseExtender.getLeaseExtender(_instanceId);
    _protocolHandler = new ServerSegmentCompletionProtocolHandler(_instanceId);

    // TODO Validate configs
    IndexingConfig indexingConfig = _tableConfig.getIndexingConfig();
    _streamMetadata = new StreamMetadata(indexingConfig.getStreamConfigs());
    _pinotStreamConsumerFactory = PinotStreamConsumerFactory.create(_streamMetadata);
    KafkaLowLevelStreamProviderConfig kafkaStreamProviderConfig = createStreamProviderConfig();
    kafkaStreamProviderConfig.init(tableConfig, instanceZKMetadata, schema);
    _kafkaBootstrapNodes = indexingConfig.getStreamConfigs().get(CommonConstants.Helix.DataSource.STREAM_PREFIX
            + "." + CommonConstants.Helix.DataSource.Realtime.Kafka.KAFKA_BROKER_LIST);
    _kafkaTopic = kafkaStreamProviderConfig.getTopicName();
    _segmentNameStr = _segmentZKMetadata.getSegmentName();
    _segmentName = new LLCSegmentName(_segmentNameStr);
    _kafkaPartitionId = _segmentName.getPartitionId();
    _tableName = _tableConfig.getTableName();
    _metricKeyName = _tableName + "-" + _kafkaTopic + "-" + _kafkaPartitionId;
    segmentLogger = LoggerFactory
            .getLogger(LLRealtimeSegmentDataManager.class.getName() + "_" + _segmentNameStr);
    _tableStreamName = _tableName + "_" + kafkaStreamProviderConfig.getStreamName();
    _memoryManager = getMemoryManager(realtimeTableDataManager.getConsumerDir(), _segmentNameStr,
            indexLoadingConfig.isRealtimeOffheapAllocation(),
            indexLoadingConfig.isDirectRealtimeOffheapAllocation(),
            realtimeTableDataManager.getServerMetrics());

    List<String> sortedColumns = indexLoadingConfig.getSortedColumns();
    if (sortedColumns.isEmpty()) {
        segmentLogger.info(
                "RealtimeDataResourceZKMetadata contains no information about sorted column for segment {}",
                _segmentName);
        _sortedColumn = null;
    } else {
        String firstSortedColumn = sortedColumns.get(0);
        if (_schema.hasColumn(firstSortedColumn)) {
            segmentLogger.info(
                    "Setting sorted column name: {} from RealtimeDataResourceZKMetadata for segment {}",
                    firstSortedColumn, _segmentName);
            _sortedColumn = firstSortedColumn;
        } else {
            segmentLogger.warn(
                    "Sorted column name: {} from RealtimeDataResourceZKMetadata is not existed in schema for segment {}.",
                    firstSortedColumn, _segmentName);
            _sortedColumn = null;
        }
    }

    // Inverted index columns
    Set<String> invertedIndexColumns = indexLoadingConfig.getInvertedIndexColumns();
    // We need to add sorted column into inverted index columns because when we convert realtime in memory segment into
    // offline segment, we use sorted column's inverted index to maintain the order of the records so that the records
    // are sorted on the sorted column.
    if (_sortedColumn != null) {
        invertedIndexColumns.add(_sortedColumn);
    }
    _invertedIndexColumns = new ArrayList<>(invertedIndexColumns);

    // No dictionary Columns
    _noDictionaryColumns = new ArrayList<>(indexLoadingConfig.getNoDictionaryColumns());

    // Read the star tree config
    _starTreeIndexSpec = indexingConfig.getStarTreeIndexSpec();

    // Read the max number of rows
    int segmentMaxRowCount = kafkaStreamProviderConfig.getSizeThresholdToFlushSegment();

    if (0 < segmentZKMetadata.getSizeThresholdToFlushSegment()) {
        segmentMaxRowCount = segmentZKMetadata.getSizeThresholdToFlushSegment();
    }

    _segmentMaxRowCount = segmentMaxRowCount;

    // Start new realtime segment
    RealtimeSegmentConfig.Builder realtimeSegmentConfigBuilder = new RealtimeSegmentConfig.Builder()
            .setSegmentName(_segmentNameStr).setStreamName(_kafkaTopic).setSchema(schema)
            .setCapacity(_segmentMaxRowCount)
            .setAvgNumMultiValues(indexLoadingConfig.getRealtimeAvgMultiValueCount())
            .setNoDictionaryColumns(indexLoadingConfig.getNoDictionaryColumns())
            .setInvertedIndexColumns(invertedIndexColumns).setRealtimeSegmentZKMetadata(segmentZKMetadata)
            .setOffHeap(indexLoadingConfig.isRealtimeOffheapAllocation()).setMemoryManager(_memoryManager)
            .setStatsHistory(realtimeTableDataManager.getStatsHistory())
            .setAggregateMetrics(indexingConfig.getAggregateMetrics());

    // Create message decoder
    _messageDecoder = _pinotStreamConsumerFactory.getDecoder(kafkaStreamProviderConfig);
    _clientId = _kafkaPartitionId + "-" + NetUtil.getHostnameOrAddress();

    // Create field extractor
    _fieldExtractor = FieldExtractorFactory.getPlainFieldExtractor(schema);
    makeConsumerWrapper("Starting");

    SegmentPartitionConfig segmentPartitionConfig = indexingConfig.getSegmentPartitionConfig();
    if (segmentPartitionConfig != null) {
        try {
            int nPartitions = _consumerWrapper.getPartitionCount(_kafkaTopic, /*maxWaitTimeMs=*/5000L);
            segmentPartitionConfig.setNumPartitions(nPartitions);
            realtimeSegmentConfigBuilder.setSegmentPartitionConfig(segmentPartitionConfig);
        } catch (Exception e) {
            segmentLogger.warn("Couldn't get number of partitions in 5s, not using partition config {}",
                    e.getMessage());
            makeConsumerWrapper("Timeout getting number of partitions");
        }
    }

    _realtimeSegment = new RealtimeSegmentImpl(realtimeSegmentConfigBuilder.build());
    _startOffset = _segmentZKMetadata.getStartOffset();
    _currentOffset = _startOffset;
    _resourceTmpDir = new File(resourceDataDir, "_tmp");
    if (!_resourceTmpDir.exists()) {
        _resourceTmpDir.mkdirs();
    }
    _state = State.INITIAL_CONSUMING;
    long now = now();
    _consumeStartTime = now;
    _consumeEndTime = now + kafkaStreamProviderConfig.getTimeThresholdToFlushSegment();
    LOGGER.info("Starting consumption on realtime consuming segment {} maxRowCount {} maxEndTime {}",
            _segmentName, _segmentMaxRowCount, new DateTime(_consumeEndTime, DateTimeZone.UTC).toString());
    start();
}

From source file:com.lithium.yoda.DateString.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    long timestamp = timeCop.get(arguments[0].get());
    DateTime dt = new DateTime(timestamp, DateTimeZone.UTC);
    return outFormatter.print(dt);
}

From source file:com.lithium.yoda.IsoStartDateOfWeek.java

License:Apache License

@Override
public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
    Preconditions.checkPositionIndex(0, arguments.length);
    if (!(arguments[0] instanceof LongObjectInspector)) {
        throw new IllegalArgumentException(
                "Input to  IsoStartDateOfWeek UDF must be a bigint. Given " + arguments[0].getTypeName());
    }//from w ww .  j a  v a2s.  c  om
    timestampOi = (LongObjectInspector) arguments[0];

    YYYYMMDD = org.joda.time.format.DateTimeFormat.forPattern("YYYY-MM-dd");
    mdt = new MutableDateTime(0, DateTimeZone.UTC);

    return PrimitiveObjectInspectorFactory.javaIntObjectInspector;
}

From source file:com.lithium.yoda.IsoWeekOfWeekYear.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (arguments[0].get() == null) {
        return null;
    }/*from w ww .j av  a2 s .co  m*/
    long epoch = timestampOi.get(arguments[0].get());
    DateTime dt = new DateTime(epoch, DateTimeZone.UTC);
    return dt.getWeekOfWeekyear();
}

From source file:com.lithium.yoda.IsoWeekYear.java

License:Apache License

@Override
public Object evaluate(DeferredObject[] arguments) throws HiveException {
    if (arguments[0] == null || arguments[0].get() == null) {
        return null;
    }//  w  w  w  .  jav  a2 s.  co  m
    long epoch = timestampOi.get(arguments[0].get());
    DateTime dt = new DateTime(epoch, DateTimeZone.UTC);
    return dt.getWeekyear();
}