Example usage for com.google.common.collect TreeMultiset size

List of usage examples for com.google.common.collect TreeMultiset size

Introduction

In this page you can find the example usage for com.google.common.collect TreeMultiset size.

Prototype

@Override
    public int size() 

Source Link

Usage

From source file:org.apache.mahout.math.neighborhood.ProjectionSearch.java

/**
 * Adds a WeightedVector into the set of projections for later searching.
 * @param vector  The WeightedVector to add.
 *//*from   ww  w  . j  ava  2s . c  o  m*/
@Override
public void add(Vector vector) {
    initialize(vector.size());
    Vector projection = basisMatrix.times(vector);
    // Add the the new vector and the projected distance to each set separately.
    int i = 0;
    for (TreeMultiset<WeightedThing<Vector>> s : scalarProjections) {
        s.add(new WeightedThing<Vector>(vector, projection.get(i++)));
    }
    int numVectors = scalarProjections.get(0).size();
    for (TreeMultiset<WeightedThing<Vector>> s : scalarProjections) {
        Preconditions.checkArgument(s.size() == numVectors, "Number of vectors in projection sets " + "differ");
        double firstWeight = s.firstEntry().getElement().getWeight();
        for (WeightedThing<Vector> w : s) {
            Preconditions.checkArgument(firstWeight <= w.getWeight(),
                    "Weights not in non-decreasing " + "order");
            firstWeight = w.getWeight();
        }
    }
}

From source file:com.metamx.druid.merger.common.task.IndexDeterminePartitionsTask.java

@Override
public TaskStatus run(TaskContext context, TaskToolbox toolbox) throws Exception {
    log.info("Running with targetPartitionSize[%d]", targetPartitionSize);

    // This is similar to what DeterminePartitionsJob does in the hadoop indexer, but we don't require
    // a preconfigured partition dimension (we'll just pick the one with highest cardinality).

    // XXX - Space-efficiency (stores all unique dimension values, although at least not all combinations)
    // XXX - Time-efficiency (runs all this on one single node instead of through map/reduce)

    // Blacklist dimensions that have multiple values per row
    final Set<String> unusableDimensions = Sets.newHashSet();

    // Track values of all non-blacklisted dimensions
    final Map<String, TreeMultiset<String>> dimensionValueMultisets = Maps.newHashMap();

    // Load data//  w  w  w .  j a va  2  s  .  c  o  m
    final Firehose firehose = firehoseFactory.connect();

    try {
        while (firehose.hasMore()) {

            final InputRow inputRow = firehose.nextRow();

            if (getInterval().contains(inputRow.getTimestampFromEpoch())) {

                // Extract dimensions from event
                for (final String dim : inputRow.getDimensions()) {
                    final List<String> dimValues = inputRow.getDimension(dim);

                    if (!unusableDimensions.contains(dim)) {

                        if (dimValues.size() == 1) {

                            // Track this value
                            TreeMultiset<String> dimensionValueMultiset = dimensionValueMultisets.get(dim);

                            if (dimensionValueMultiset == null) {
                                dimensionValueMultiset = TreeMultiset.create();
                                dimensionValueMultisets.put(dim, dimensionValueMultiset);
                            }

                            dimensionValueMultiset.add(dimValues.get(0));

                        } else {

                            // Only single-valued dimensions can be used for partitions
                            unusableDimensions.add(dim);
                            dimensionValueMultisets.remove(dim);

                        }

                    }
                }

            }

        }
    } finally {
        firehose.close();
    }

    // ShardSpecs for index generator tasks
    final List<ShardSpec> shardSpecs = Lists.newArrayList();

    // Select highest-cardinality dimension
    Ordering<Map.Entry<String, TreeMultiset<String>>> byCardinalityOrdering = new Ordering<Map.Entry<String, TreeMultiset<String>>>() {
        @Override
        public int compare(Map.Entry<String, TreeMultiset<String>> left,
                Map.Entry<String, TreeMultiset<String>> right) {
            return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size());
        }
    };

    if (dimensionValueMultisets.isEmpty()) {
        // No suitable partition dimension. We'll make one big segment and hope for the best.
        log.info("No suitable partition dimension found");
        shardSpecs.add(new NoneShardSpec());
    } else {
        // Find best partition dimension (heuristic: highest cardinality).
        final Map.Entry<String, TreeMultiset<String>> partitionEntry = byCardinalityOrdering
                .max(dimensionValueMultisets.entrySet());

        final String partitionDim = partitionEntry.getKey();
        final TreeMultiset<String> partitionDimValues = partitionEntry.getValue();

        log.info("Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", partitionDim,
                partitionDimValues.elementSet().size(), partitionDimValues.size());

        // Iterate over unique partition dimension values in sorted order
        String currentPartitionStart = null;
        int currentPartitionSize = 0;
        for (final String partitionDimValue : partitionDimValues.elementSet()) {
            currentPartitionSize += partitionDimValues.count(partitionDimValue);
            if (currentPartitionSize >= targetPartitionSize) {
                final ShardSpec shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart,
                        partitionDimValue, shardSpecs.size());

                log.info("Adding shard: %s", shardSpec);
                shardSpecs.add(shardSpec);

                currentPartitionSize = partitionDimValues.count(partitionDimValue);
                currentPartitionStart = partitionDimValue;
            }
        }

        if (currentPartitionSize > 0) {
            // One last shard to go
            final ShardSpec shardSpec;

            if (shardSpecs.isEmpty()) {
                shardSpec = new NoneShardSpec();
            } else {
                shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart, null,
                        shardSpecs.size());
            }

            log.info("Adding shard: %s", shardSpec);
            shardSpecs.add(shardSpec);
        }
    }

    return TaskStatus.continued(getId(), Lists.transform(shardSpecs, new Function<ShardSpec, Task>() {
        @Override
        public Task apply(ShardSpec shardSpec) {
            return new IndexGeneratorTask(getGroupId(), getInterval(), firehoseFactory, new Schema(
                    schema.getDataSource(), schema.getAggregators(), schema.getIndexGranularity(), shardSpec));
        }
    }));
}

From source file:io.druid.indexing.common.task.IndexDeterminePartitionsTask.java

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    log.info("Running with targetPartitionSize[%d]", targetPartitionSize);

    // The implementation of this determine partitions stuff is less than optimal.  Should be done better.

    // We know this exists
    final Interval interval = getImplicitLockInterval().get();

    // Blacklist dimensions that have multiple values per row
    final Set<String> unusableDimensions = Sets.newHashSet();

    // Track values of all non-blacklisted dimensions
    final Map<String, TreeMultiset<String>> dimensionValueMultisets = Maps.newHashMap();

    // Load data/*ww w  . j av  a  2 s  . c  o m*/
    final Firehose firehose = firehoseFactory.connect();

    try {
        while (firehose.hasMore()) {

            final InputRow inputRow = firehose.nextRow();

            if (interval.contains(inputRow.getTimestampFromEpoch())) {

                // Extract dimensions from event
                for (final String dim : inputRow.getDimensions()) {
                    final List<String> dimValues = inputRow.getDimension(dim);

                    if (!unusableDimensions.contains(dim)) {

                        if (dimValues.size() == 1) {

                            // Track this value
                            TreeMultiset<String> dimensionValueMultiset = dimensionValueMultisets.get(dim);

                            if (dimensionValueMultiset == null) {
                                dimensionValueMultiset = TreeMultiset.create();
                                dimensionValueMultisets.put(dim, dimensionValueMultiset);
                            }

                            dimensionValueMultiset.add(dimValues.get(0));

                        } else {

                            // Only single-valued dimensions can be used for partitions
                            unusableDimensions.add(dim);
                            dimensionValueMultisets.remove(dim);

                        }

                    }
                }

            }

        }
    } finally {
        firehose.close();
    }

    // ShardSpecs for index generator tasks
    final List<ShardSpec> shardSpecs = Lists.newArrayList();

    // Select highest-cardinality dimension
    Ordering<Map.Entry<String, TreeMultiset<String>>> byCardinalityOrdering = new Ordering<Map.Entry<String, TreeMultiset<String>>>() {
        @Override
        public int compare(Map.Entry<String, TreeMultiset<String>> left,
                Map.Entry<String, TreeMultiset<String>> right) {
            return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size());
        }
    };

    if (dimensionValueMultisets.isEmpty()) {
        // No suitable partition dimension. We'll make one big segment and hope for the best.
        log.info("No suitable partition dimension found");
        shardSpecs.add(new NoneShardSpec());
    } else {
        // Find best partition dimension (heuristic: highest cardinality).
        final Map.Entry<String, TreeMultiset<String>> partitionEntry = byCardinalityOrdering
                .max(dimensionValueMultisets.entrySet());

        final String partitionDim = partitionEntry.getKey();
        final TreeMultiset<String> partitionDimValues = partitionEntry.getValue();

        log.info("Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", partitionDim,
                partitionDimValues.elementSet().size(), partitionDimValues.size());

        // Iterate over unique partition dimension values in sorted order
        String currentPartitionStart = null;
        int currentPartitionSize = 0;
        for (final String partitionDimValue : partitionDimValues.elementSet()) {
            currentPartitionSize += partitionDimValues.count(partitionDimValue);
            if (currentPartitionSize >= targetPartitionSize) {
                final ShardSpec shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart,
                        partitionDimValue, shardSpecs.size());

                log.info("Adding shard: %s", shardSpec);
                shardSpecs.add(shardSpec);

                currentPartitionSize = partitionDimValues.count(partitionDimValue);
                currentPartitionStart = partitionDimValue;
            }
        }

        if (currentPartitionSize > 0) {
            // One last shard to go
            final ShardSpec shardSpec;

            if (shardSpecs.isEmpty()) {
                shardSpec = new NoneShardSpec();
            } else {
                shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart, null,
                        shardSpecs.size());
            }

            log.info("Adding shard: %s", shardSpec);
            shardSpecs.add(shardSpec);
        }
    }

    List<Task> nextTasks = Lists.transform(shardSpecs, new Function<ShardSpec, Task>() {
        @Override
        public Task apply(ShardSpec shardSpec) {
            return new IndexGeneratorTask(null, getGroupId(), getImplicitLockInterval().get(), firehoseFactory,
                    new Schema(schema.getDataSource(), schema.getSpatialDimensions(), schema.getAggregators(),
                            schema.getIndexGranularity(), shardSpec),
                    rowFlushBoundary);
        }
    });

    toolbox.getTaskActionClient().submit(new SpawnTasksAction(nextTasks));

    return TaskStatus.success(getId());
}

From source file:com.metamx.druid.indexing.common.task.IndexDeterminePartitionsTask.java

@Override
public TaskStatus run(TaskToolbox toolbox) throws Exception {
    log.info("Running with targetPartitionSize[%d]", targetPartitionSize);

    // TODO: Replace/merge/whatever with hadoop determine-partitions code

    // We know this exists
    final Interval interval = getImplicitLockInterval().get();

    // Blacklist dimensions that have multiple values per row
    final Set<String> unusableDimensions = Sets.newHashSet();

    // Track values of all non-blacklisted dimensions
    final Map<String, TreeMultiset<String>> dimensionValueMultisets = Maps.newHashMap();

    // Load data//from   w  ww.  ja va  2  s.  c o  m
    final Firehose firehose = firehoseFactory.connect();

    try {
        while (firehose.hasMore()) {

            final InputRow inputRow = firehose.nextRow();

            if (interval.contains(inputRow.getTimestampFromEpoch())) {

                // Extract dimensions from event
                for (final String dim : inputRow.getDimensions()) {
                    final List<String> dimValues = inputRow.getDimension(dim);

                    if (!unusableDimensions.contains(dim)) {

                        if (dimValues.size() == 1) {

                            // Track this value
                            TreeMultiset<String> dimensionValueMultiset = dimensionValueMultisets.get(dim);

                            if (dimensionValueMultiset == null) {
                                dimensionValueMultiset = TreeMultiset.create();
                                dimensionValueMultisets.put(dim, dimensionValueMultiset);
                            }

                            dimensionValueMultiset.add(dimValues.get(0));

                        } else {

                            // Only single-valued dimensions can be used for partitions
                            unusableDimensions.add(dim);
                            dimensionValueMultisets.remove(dim);

                        }

                    }
                }

            }

        }
    } finally {
        firehose.close();
    }

    // ShardSpecs for index generator tasks
    final List<ShardSpec> shardSpecs = Lists.newArrayList();

    // Select highest-cardinality dimension
    Ordering<Map.Entry<String, TreeMultiset<String>>> byCardinalityOrdering = new Ordering<Map.Entry<String, TreeMultiset<String>>>() {
        @Override
        public int compare(Map.Entry<String, TreeMultiset<String>> left,
                Map.Entry<String, TreeMultiset<String>> right) {
            return Ints.compare(left.getValue().elementSet().size(), right.getValue().elementSet().size());
        }
    };

    if (dimensionValueMultisets.isEmpty()) {
        // No suitable partition dimension. We'll make one big segment and hope for the best.
        log.info("No suitable partition dimension found");
        shardSpecs.add(new NoneShardSpec());
    } else {
        // Find best partition dimension (heuristic: highest cardinality).
        final Map.Entry<String, TreeMultiset<String>> partitionEntry = byCardinalityOrdering
                .max(dimensionValueMultisets.entrySet());

        final String partitionDim = partitionEntry.getKey();
        final TreeMultiset<String> partitionDimValues = partitionEntry.getValue();

        log.info("Partitioning on dimension[%s] with cardinality[%d] over rows[%d]", partitionDim,
                partitionDimValues.elementSet().size(), partitionDimValues.size());

        // Iterate over unique partition dimension values in sorted order
        String currentPartitionStart = null;
        int currentPartitionSize = 0;
        for (final String partitionDimValue : partitionDimValues.elementSet()) {
            currentPartitionSize += partitionDimValues.count(partitionDimValue);
            if (currentPartitionSize >= targetPartitionSize) {
                final ShardSpec shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart,
                        partitionDimValue, shardSpecs.size());

                log.info("Adding shard: %s", shardSpec);
                shardSpecs.add(shardSpec);

                currentPartitionSize = partitionDimValues.count(partitionDimValue);
                currentPartitionStart = partitionDimValue;
            }
        }

        if (currentPartitionSize > 0) {
            // One last shard to go
            final ShardSpec shardSpec;

            if (shardSpecs.isEmpty()) {
                shardSpec = new NoneShardSpec();
            } else {
                shardSpec = new SingleDimensionShardSpec(partitionDim, currentPartitionStart, null,
                        shardSpecs.size());
            }

            log.info("Adding shard: %s", shardSpec);
            shardSpecs.add(shardSpec);
        }
    }

    List<Task> nextTasks = Lists.transform(shardSpecs, new Function<ShardSpec, Task>() {
        @Override
        public Task apply(ShardSpec shardSpec) {
            return new IndexGeneratorTask(null, getGroupId(), getImplicitLockInterval().get(), firehoseFactory,
                    new Schema(schema.getDataSource(), schema.getSpatialDimensions(), schema.getAggregators(),
                            schema.getIndexGranularity(), shardSpec),
                    rowFlushBoundary);
        }
    });

    toolbox.getTaskActionClient().submit(new SpawnTasksAction(nextTasks));

    return TaskStatus.success(getId());
}