Example usage for com.google.common.collect TreeMultiset count

List of usage examples for com.google.common.collect TreeMultiset count

Introduction

In this page you can find the example usage for com.google.common.collect TreeMultiset count.

Prototype

@Override
    public int count(@Nullable Object element) 

Source Link

Usage

From source file:com.metamx.druid.merger.common.task.IndexDeterminePartitionsTask.java

@Override
public TaskStatus run(TaskContext context, TaskToolbox toolbox) throws Exception {
    log.info("Running with targetPartitionSize[%d]", targetPartitionSize);

    // This is similar to what DeterminePartitionsJob does in the hadoop indexer, but we don't require
    // a preconfigured partition dimension (we'll just pick the one with highest cardinality).

    // XXX - Space-efficiency (stores all unique dimension values, although at least not all combinations)
    // XXX - Time-efficiency (runs all this on one single node instead of through map/reduce)

    // Blacklist dimensions that have multiple values per row
    final Set<String> unusableDimensions = Sets.newHashSet();

    // Track values of all non-blacklisted dimensions
    final Map<String, TreeMultiset<String>> dimensionValueMultisets = Maps.newHashMap();

    // Load data/*from  w  w w . j  ava  2  s.  co m*/
    final Firehose firehose = firehoseFactory.connect();

    try {
        while (firehose.hasMore()) {

            final InputRow inputRow = firehose.nextRow();

            if (getInterval().contains(inputRow.getTimestampFromEpoch())) {

                // Extract dimensions from event
                for (final String dim : inputRow.getDimensions()) {
                    final List<String> dimValues = inputRow.getDimension(dim);

                    if (!unusableDimensions.contains(dim)) {

                        if (dimValues.size() == 1) {

                            // Track this value
                            TreeMultiset<String> dimensionValueMultiset = dimensionValueMultisets.get(dim);

                            if (dimensionValueMultiset == null) {
                                dimensionValueMultiset = TreeMultiset.create();
                                dimensionValueMultisets.put(dim, dimensionValueMultiset);
                            }

                            dimensionValueMultiset.add(dimValues.get(0));

                        } else {

                            // Only single-valued dimensions can be used for partitions
                            unusableDimensions.add(dim);
                            dimensionValueMultisets.remove(dim);

                        }

                    }
                }

            }

        }
    } finally {
        firehose.close();
    }

    // ShardSpecs for index generator tasks
    final List<ShardSpec> shardSpecs = Lists.newArrayList();

    // Select highest-cardinality dimension
    Ordering<Map.Entry<String, TreeMultiset<String>>> byCardinalityOrdering = new Ordering<Map.Entry<String, TreeMultiset<String>>>() {
        @Override
        public int compare(Map.Entry<String, TreeMultiset<String>> left,
                Map.Entry<String, TreeMultiset<String>> right) {
            return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size());
        }
    };

    if (dimensionValueMultisets.isEmpty()) {
        // No suitable partition dimension. We'll make one big segment and hope for the best.
        log.info("No suitable partition dimension found");
        shardSpecs.add(new NoneShardSpec());
    } else {
        // Find best partition dimension (heuristic: highest cardinality).
        final Map.Entry<String, TreeMultiset<String>> partitionEntry = byCardinalityOrdering
                .max(dimensionValueMultisets.entrySet());

        final String partitionDim = partitionEntry.getKey();
        final TreeMultiset<String> partitionDimValues = partitionEntry.getValue();

        log.info("Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", partitionDim,
                partitionDimValues.elementSet().size(), partitionDimValues.size());

        // Iterate over unique partition dimension values in sorted order
        String currentPartitionStart = null;
        int currentPartitionSize = 0;
        for (final String partitionDimValue : partitionDimValues.elementSet()) {
            currentPartitionSize += partitionDimValues.count(partitionDimValue);
            if (currentPartitionSize >= targetPartitionSize) {
                final ShardSpec shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart,
                        partitionDimValue, shardSpecs.size());

                log.info("Adding shard: %s", shardSpec);
                shardSpecs.add(shardSpec);

                currentPartitionSize = partitionDimValues.count(partitionDimValue);
                currentPartitionStart = partitionDimValue;
            }
        }

        if (currentPartitionSize > 0) {
            // One last shard to go
            final ShardSpec shardSpec;

            if (shardSpecs.isEmpty()) {
                shardSpec = new NoneShardSpec();
            } else {
                shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart, null,
                        shardSpecs.size());
            }

            log.info("Adding shard: %s", shardSpec);
            shardSpecs.add(shardSpec);
        }
    }

    return TaskStatus.continued(getId(), Lists.transform(shardSpecs, new Function<ShardSpec, Task>() {
        @Override
        public Task apply(ShardSpec shardSpec) {
            return new IndexGeneratorTask(getGroupId(), getInterval(), firehoseFactory, new Schema(
                    schema.getDataSource(), schema.getAggregators(), schema.getIndexGranularity(), shardSpec));
        }
    }));
}

From source file:io.druid.indexing.common.task.IndexDeterminePartitionsTask.java

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    log.info("Running with targetPartitionSize[%d]", targetPartitionSize);

    // The implementation of this determine partitions stuff is less than optimal.  Should be done better.

    // We know this exists
    final Interval interval = getImplicitLockInterval().get();

    // Blacklist dimensions that have multiple values per row
    final Set<String> unusableDimensions = Sets.newHashSet();

    // Track values of all non-blacklisted dimensions
    final Map<String, TreeMultiset<String>> dimensionValueMultisets = Maps.newHashMap();

    // Load data//from  w ww . j a va 2 s  .com
    final Firehose firehose = firehoseFactory.connect();

    try {
        while (firehose.hasMore()) {

            final InputRow inputRow = firehose.nextRow();

            if (interval.contains(inputRow.getTimestampFromEpoch())) {

                // Extract dimensions from event
                for (final String dim : inputRow.getDimensions()) {
                    final List<String> dimValues = inputRow.getDimension(dim);

                    if (!unusableDimensions.contains(dim)) {

                        if (dimValues.size() == 1) {

                            // Track this value
                            TreeMultiset<String> dimensionValueMultiset = dimensionValueMultisets.get(dim);

                            if (dimensionValueMultiset == null) {
                                dimensionValueMultiset = TreeMultiset.create();
                                dimensionValueMultisets.put(dim, dimensionValueMultiset);
                            }

                            dimensionValueMultiset.add(dimValues.get(0));

                        } else {

                            // Only single-valued dimensions can be used for partitions
                            unusableDimensions.add(dim);
                            dimensionValueMultisets.remove(dim);

                        }

                    }
                }

            }

        }
    } finally {
        firehose.close();
    }

    // ShardSpecs for index generator tasks
    final List<ShardSpec> shardSpecs = Lists.newArrayList();

    // Select highest-cardinality dimension
    Ordering<Map.Entry<String, TreeMultiset<String>>> byCardinalityOrdering = new Ordering<Map.Entry<String, TreeMultiset<String>>>() {
        @Override
        public int compare(Map.Entry<String, TreeMultiset<String>> left,
                Map.Entry<String, TreeMultiset<String>> right) {
            return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size());
        }
    };

    if (dimensionValueMultisets.isEmpty()) {
        // No suitable partition dimension. We'll make one big segment and hope for the best.
        log.info("No suitable partition dimension found");
        shardSpecs.add(new NoneShardSpec());
    } else {
        // Find best partition dimension (heuristic: highest cardinality).
        final Map.Entry<String, TreeMultiset<String>> partitionEntry = byCardinalityOrdering
                .max(dimensionValueMultisets.entrySet());

        final String partitionDim = partitionEntry.getKey();
        final TreeMultiset<String> partitionDimValues = partitionEntry.getValue();

        log.info("Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", partitionDim,
                partitionDimValues.elementSet().size(), partitionDimValues.size());

        // Iterate over unique partition dimension values in sorted order
        String currentPartitionStart = null;
        int currentPartitionSize = 0;
        for (final String partitionDimValue : partitionDimValues.elementSet()) {
            currentPartitionSize += partitionDimValues.count(partitionDimValue);
            if (currentPartitionSize >= targetPartitionSize) {
                final ShardSpec shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart,
                        partitionDimValue, shardSpecs.size());

                log.info("Adding shard: %s", shardSpec);
                shardSpecs.add(shardSpec);

                currentPartitionSize = partitionDimValues.count(partitionDimValue);
                currentPartitionStart = partitionDimValue;
            }
        }

        if (currentPartitionSize > 0) {
            // One last shard to go
            final ShardSpec shardSpec;

            if (shardSpecs.isEmpty()) {
                shardSpec = new NoneShardSpec();
            } else {
                shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart, null,
                        shardSpecs.size());
            }

            log.info("Adding shard: %s", shardSpec);
            shardSpecs.add(shardSpec);
        }
    }

    List<Task> nextTasks = Lists.transform(shardSpecs, new Function<ShardSpec, Task>() {
        @Override
        public Task apply(ShardSpec shardSpec) {
            return new IndexGeneratorTask(null, getGroupId(), getImplicitLockInterval().get(), firehoseFactory,
                    new Schema(schema.getDataSource(), schema.getSpatialDimensions(), schema.getAggregators(),
                            schema.getIndexGranularity(), shardSpec),
                    rowFlushBoundary);
        }
    });

    toolbox.getTaskActionClient().submit(new SpawnTasksAction(nextTasks));

    return TaskStatus.success(getId());
}

From source file:com.metamx.druid.indexing.common.task.IndexDeterminePartitionsTask.java

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    log.info("Running with targetPartitionSize[%d]", targetPartitionSize);

    // TODO: Replace/merge/whatever with hadoop determine-partitions code

    // We know this exists
    final Interval interval = getImplicitLockInterval().get();

    // Blacklist dimensions that have multiple values per row
    final Set<String> unusableDimensions = Sets.newHashSet();

    // Track values of all non-blacklisted dimensions
    final Map<String, TreeMultiset<String>> dimensionValueMultisets = Maps.newHashMap();

    // Load data/*from  www . j  a va  2  s. c  o m*/
    final Firehose firehose = firehoseFactory.connect();

    try {
        while (firehose.hasMore()) {

            final InputRow inputRow = firehose.nextRow();

            if (interval.contains(inputRow.getTimestampFromEpoch())) {

                // Extract dimensions from event
                for (final String dim : inputRow.getDimensions()) {
                    final List<String> dimValues = inputRow.getDimension(dim);

                    if (!unusableDimensions.contains(dim)) {

                        if (dimValues.size() == 1) {

                            // Track this value
                            TreeMultiset<String> dimensionValueMultiset = dimensionValueMultisets.get(dim);

                            if (dimensionValueMultiset == null) {
                                dimensionValueMultiset = TreeMultiset.create();
                                dimensionValueMultisets.put(dim, dimensionValueMultiset);
                            }

                            dimensionValueMultiset.add(dimValues.get(0));

                        } else {

                            // Only single-valued dimensions can be used for partitions
                            unusableDimensions.add(dim);
                            dimensionValueMultisets.remove(dim);

                        }

                    }
                }

            }

        }
    } finally {
        firehose.close();
    }

    // ShardSpecs for index generator tasks
    final List<ShardSpec> shardSpecs = Lists.newArrayList();

    // Select highest-cardinality dimension
    Ordering<Map.Entry<String, TreeMultiset<String>>> byCardinalityOrdering = new Ordering<Map.Entry<String, TreeMultiset<String>>>() {
        @Override
        public int compare(Map.Entry<String, TreeMultiset<String>> left,
                Map.Entry<String, TreeMultiset<String>> right) {
            return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size());
        }
    };

    if (dimensionValueMultisets.isEmpty()) {
        // No suitable partition dimension. We'll make one big segment and hope for the best.
        log.info("No suitable partition dimension found");
        shardSpecs.add(new NoneShardSpec());
    } else {
        // Find best partition dimension (heuristic: highest cardinality).
        final Map.Entry<String, TreeMultiset<String>> partitionEntry = byCardinalityOrdering
                .max(dimensionValueMultisets.entrySet());

        final String partitionDim = partitionEntry.getKey();
        final TreeMultiset<String> partitionDimValues = partitionEntry.getValue();

        log.info("Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", partitionDim,
                partitionDimValues.elementSet().size(), partitionDimValues.size());

        // Iterate over unique partition dimension values in sorted order
        String currentPartitionStart = null;
        int currentPartitionSize = 0;
        for (final String partitionDimValue : partitionDimValues.elementSet()) {
            currentPartitionSize += partitionDimValues.count(partitionDimValue);
            if (currentPartitionSize >= targetPartitionSize) {
                final ShardSpec shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart,
                        partitionDimValue, shardSpecs.size());

                log.info("Adding shard: %s", shardSpec);
                shardSpecs.add(shardSpec);

                currentPartitionSize = partitionDimValues.count(partitionDimValue);
                currentPartitionStart = partitionDimValue;
            }
        }

        if (currentPartitionSize > 0) {
            // One last shard to go
            final ShardSpec shardSpec;

            if (shardSpecs.isEmpty()) {
                shardSpec = new NoneShardSpec();
            } else {
                shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart, null,
                        shardSpecs.size());
            }

            log.info("Adding shard: %s", shardSpec);
            shardSpecs.add(shardSpec);
        }
    }

    List<Task> nextTasks = Lists.transform(shardSpecs, new Function<ShardSpec, Task>() {
        @Override
        public Task apply(ShardSpec shardSpec) {
            return new IndexGeneratorTask(null, getGroupId(), getImplicitLockInterval().get(), firehoseFactory,
                    new Schema(schema.getDataSource(), schema.getSpatialDimensions(), schema.getAggregators(),
                            schema.getIndexGranularity(), shardSpec),
                    rowFlushBoundary);
        }
    });

    toolbox.getTaskActionClient().submit(new SpawnTasksAction(nextTasks));

    return TaskStatus.success(getId());
}

From source file:org.apache.tez.analyzer.plugins.TaskConcurrencyAnalyzer.java

@Override
public void analyze(DagInfo dagInfo) throws TezException {

    //For each vertex find the concurrent tasks running at any point
    for (VertexInfo vertexInfo : dagInfo.getVertices()) {
        List<TaskAttemptInfo> taskAttempts = Lists.newLinkedList(vertexInfo.getTaskAttempts(true, null));

        String vertexName = vertexInfo.getVertexName();

        /**//from  w ww  . jav a 2s  .co m
         * - Get sorted multi-set of timestamps (S1, S2,...E1, E2..). Possible to have multiple
         * tasks starting/ending at same time.
         * - Walk through the set
         * - Increment concurrent tasks when start event is encountered
         * - Decrement concurrent tasks when start event is encountered
         */
        TreeMultiset<TimeInfo> timeInfoSet = TreeMultiset.create(new Comparator<TimeInfo>() {
            @Override
            public int compare(TimeInfo o1, TimeInfo o2) {
                if (o1.timestamp < o2.timestamp) {
                    return -1;
                }

                if (o1.timestamp > o2.timestamp) {
                    return 1;
                }

                if (o1.timestamp == o2.timestamp) {
                    //check event type
                    if (o1.eventType.equals(o2.eventType)) {
                        return 0;
                    }

                    if (o1.eventType.equals(EventType.START) && o2.eventType.equals(EventType.FINISH)) {
                        return -1;
                    } else {
                        return 1;
                    }
                }
                return 0;
            }
        });

        for (TaskAttemptInfo attemptInfo : taskAttempts) {
            TimeInfo startTimeInfo = new TimeInfo(EventType.START, attemptInfo.getStartTime());
            TimeInfo stopTimeInfo = new TimeInfo(EventType.FINISH, attemptInfo.getFinishTime());

            timeInfoSet.add(startTimeInfo);
            timeInfoSet.add(stopTimeInfo);
        }

        //Compute concurrent tasks in the list now.
        int concurrentTasks = 0;
        for (TimeInfo timeInfo : timeInfoSet.elementSet()) {
            switch (timeInfo.eventType) {
            case START:
                concurrentTasks += timeInfoSet.count(timeInfo);
                break;
            case FINISH:
                concurrentTasks -= timeInfoSet.count(timeInfo);
                break;
            default:
                break;
            }
            timeInfo.concurrentTasks = concurrentTasks;
            addToResult(vertexName, timeInfo.timestamp, timeInfo.concurrentTasks);
        }
    }
}