Example usage for org.apache.cassandra.dht Range intersectionWith

Introduction

In this page you can find the example usage for org.apache.cassandra.dht Range intersectionWith.

Prototype

public Set<Range<T>> intersectionWith(Range<T> that)

Source Link

Usage

From source file:andromache.hadoop.CassandraInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    validateConfiguration(conf);//from   w  w w.  j  a va2 s . c  om

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = CassandraConfigHelper.getInputKeyspace(context.getConfiguration());

    cfNames = CassandraConfigHelper.getInputColumnFamilies(context.getConfiguration());

    // TODO: [IS] make sure this partitioner matches to what is set on each keyspace participating
    partitioner = CassandraConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel

    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<CassandraSplit>>> splitfutures = new ArrayList<Future<List<CassandraSplit>>>();
        KeyRange jobKeyRange = CassandraConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits

                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<CassandraSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    validateConfiguration(conf);//from   w w w  . j av  a2  s  .com

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
    cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:com.tuplejump.calliope.hadoop.AbstractColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);/* www.  ja v a 2 s .  co  m*/

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<ColumnFamilySplit>>> splitfutures = new ArrayList<Future<List<ColumnFamilySplit>>>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder())
                    throw new UnsupportedOperationException(
                            "KeyRange based on keys can only be used with a order preserving paritioner");
                if (jobKeyRange.start_token != null)
                    throw new IllegalArgumentException("only start_key supported");
                if (jobKeyRange.end_token != null)
                    throw new IllegalArgumentException("only start_key supported");
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key), partitioner);
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
            } else {
                logger.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<ColumnFamilySplit>> futureInputSplits : splitfutures) {
            try {
                List<ColumnFamilySplit> allSplits = futureInputSplits.get();
                splits.addAll(allSplits);
            } catch (Exception e) {
                logger.warn("Error reading format", e);
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}