Example usage for org.apache.cassandra.dht Range intersectionWith

List of usage examples for org.apache.cassandra.dht Range intersectionWith

Introduction

In this page you can find the example usage for org.apache.cassandra.dht Range intersectionWith.

Prototype

public Set<Range<T>> intersectionWith(Range<T> that) 

Source Link

Usage

From source file:andromache.hadoop.CassandraInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    validateConfiguration(conf);//from   w  w w.  j  a va2 s . c  om

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = CassandraConfigHelper.getInputKeyspace(context.getConfiguration());

    cfNames = CassandraConfigHelper.getInputColumnFamilies(context.getConfiguration());

    // TODO: [IS] make sure this partitioner matches to what is set on each keyspace participating
    partitioner = CassandraConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel

    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<CassandraSplit>>> splitfutures = new ArrayList<Future<List<CassandraSplit>>>();
        KeyRange jobKeyRange = CassandraConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits

                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<CassandraSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:com.clojurewerkz.cascading.cassandra.hadoop.ColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = context.getConfiguration();

    validateConfiguration(conf);//from   w w w  . j av  a2  s  .com

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = ConfigHelper.getInputKeyspace(context.getConfiguration());
    cfName = ConfigHelper.getInputColumnFamily(context.getConfiguration());
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = Executors.newCachedThreadPool();
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<InputSplit>>> splitfutures = new ArrayList<Future<List<InputSplit>>>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null && jobKeyRange.start_token != null) {
            assert partitioner
                    .preservesOrder() : "ConfigHelper.setInputKeyRange(..) can only be used with a order preserving paritioner";
            assert jobKeyRange.start_key == null : "only start_token supported";
            assert jobKeyRange.end_key == null : "only end_token supported";
            jobRange = new Range<Token>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                    partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<InputSplit>> futureInputSplits : splitfutures) {
            try {
                splits.addAll(futureInputSplits.get());
            } catch (Exception e) {
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}

From source file:com.tuplejump.calliope.hadoop.AbstractColumnFamilyInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    Configuration conf = HadoopCompat.getConfiguration(context);

    validateConfiguration(conf);/* www.  ja v a 2 s .  co  m*/

    // cannonical ranges and nodes holding replicas
    List<TokenRange> masterRangeNodes = getRangeMap(conf);

    keyspace = ConfigHelper.getInputKeyspace(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    logger.debug("partitioner is " + partitioner);

    // cannonical ranges, split into pieces, fetching the splits in parallel
    ExecutorService executor = new ThreadPoolExecutor(0, 128, 60L, TimeUnit.SECONDS,
            new LinkedBlockingQueue<Runnable>());
    List<InputSplit> splits = new ArrayList<InputSplit>();

    try {
        List<Future<List<ColumnFamilySplit>>> splitfutures = new ArrayList<Future<List<ColumnFamilySplit>>>();
        KeyRange jobKeyRange = ConfigHelper.getInputKeyRange(conf);
        Range<Token> jobRange = null;
        if (jobKeyRange != null) {
            if (jobKeyRange.start_key != null) {
                if (!partitioner.preservesOrder())
                    throw new UnsupportedOperationException(
                            "KeyRange based on keys can only be used with a order preserving paritioner");
                if (jobKeyRange.start_token != null)
                    throw new IllegalArgumentException("only start_key supported");
                if (jobKeyRange.end_token != null)
                    throw new IllegalArgumentException("only start_key supported");
                jobRange = new Range<>(partitioner.getToken(jobKeyRange.start_key),
                        partitioner.getToken(jobKeyRange.end_key), partitioner);
            } else if (jobKeyRange.start_token != null) {
                jobRange = new Range<>(partitioner.getTokenFactory().fromString(jobKeyRange.start_token),
                        partitioner.getTokenFactory().fromString(jobKeyRange.end_token), partitioner);
            } else {
                logger.warn("ignoring jobKeyRange specified without start_key or start_token");
            }
        }

        for (TokenRange range : masterRangeNodes) {
            if (jobRange == null) {
                // for each range, pick a live owner and ask it to compute bite-sized splits
                splitfutures.add(executor.submit(new SplitCallable(range, conf)));
            } else {
                Range<Token> dhtRange = new Range<Token>(
                        partitioner.getTokenFactory().fromString(range.start_token),
                        partitioner.getTokenFactory().fromString(range.end_token), partitioner);

                if (dhtRange.intersects(jobRange)) {
                    for (Range<Token> intersection : dhtRange.intersectionWith(jobRange)) {
                        range.start_token = partitioner.getTokenFactory().toString(intersection.left);
                        range.end_token = partitioner.getTokenFactory().toString(intersection.right);
                        // for each range, pick a live owner and ask it to compute bite-sized splits
                        splitfutures.add(executor.submit(new SplitCallable(range, conf)));
                    }
                }
            }
        }

        // wait until we have all the results back
        for (Future<List<ColumnFamilySplit>> futureInputSplits : splitfutures) {
            try {
                List<ColumnFamilySplit> allSplits = futureInputSplits.get();
                splits.addAll(allSplits);
            } catch (Exception e) {
                logger.warn("Error reading format", e);
                throw new IOException("Could not get input splits", e);
            }
        }
    } finally {
        executor.shutdownNow();
    }

    assert splits.size() > 0;
    Collections.shuffle(splits, new Random(System.nanoTime()));
    return splits;
}