Example usage for org.apache.hadoop.mapreduce InputSplit getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce InputSplit getLocations.

Prototype

public abstract String[] getLocations() throws IOException, InterruptedException;

Source Link

Document

Get the list of nodes by name where the data for the split would be local.

Usage

From source file:com.couchbase.sqoop.mapreduce.db.CouchbaseInputFormat.java

License:Apache License

@Override
/** {@inheritDoc} */
public RecordReader<Text, T> createRecordReader(final InputSplit split, final TaskAttemptContext context)
        throws IOException, InterruptedException {
    System.out.printf("Creating Record Reader %s", split.getLocations().toString());
    return createRecordReader(split, context.getConfiguration());
}

From source file:com.google.cloud.hadoop.util.HadoopToStringUtil.java

License:Open Source License

public static String toString(InputSplit input) throws IOException, InterruptedException {
    if (input == null) {
        return "null";
    }//  ww w  .jav  a 2  s. c om

    String result = "InputSplit::";
    result += " length:" + input.getLength();
    result += " locations: " + Arrays.toString(input.getLocations());
    result += " toString(): " + input.toString();
    return result;
}

From source file:com.marklogic.contentpump.CombineDocumentSplit.java

License:Apache License

public CombineDocumentSplit(List<FileSplit> splits) throws IOException, InterruptedException {
    this.splits = splits;
    locations = new HashSet<String>();
    for (InputSplit split : splits) {
        length += split.getLength();//from w  w  w  . j  ava2 s  .  c om
        for (String loc : split.getLocations()) {
            if (!locations.contains(loc)) {
                locations.add(loc);
            }
        }
    }
}

From source file:com.tuplejump.calliope.hadoop.cql3.CqlPagingRecordReader.java

License:Apache License

public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    consistencyLevel = ConsistencyLevel.valueOf(ConfigHelper.getReadConsistencyLevel(conf));
    keyspace = ConfigHelper.getInputKeyspace(conf);
    columns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    Optional<Integer> pageRowSizeOptional = CqlConfigHelper.getInputPageRowSize(conf);
    try {//from www .  ja v  a 2 s .c o m
        pageRowSize = pageRowSizeOptional.isPresent() ? pageRowSizeOptional.get() : DEFAULT_CQL_PAGE_LIMIT;
    } catch (NumberFormatException e) {
        pageRowSize = DEFAULT_CQL_PAGE_LIMIT;
    }

    partitioner = ConfigHelper.getInputPartitioner(HadoopCompat.getConfiguration(context));

    try {
        if (client != null)
            return;

        // create connection using thrift
        String[] locations = split.getLocations();
        Exception lastException = null;
        for (String location : locations) {
            int port = ConfigHelper.getInputRpcPort(conf);
            try {
                client = CqlPagingInputFormat.createAuthenticatedClient(location, port, conf);
                break;
            } catch (Exception e) {
                lastException = e;
                logger.warn("Failed to create authenticated client to {}:{}", location, port);
            }
        }
        if (client == null && lastException != null)
            throw lastException;

        // retrieve partition keys and cluster keys from system.schema_columnfamilies table
        retrieveKeys();

        client.set_keyspace(keyspace);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    rowIterator = new RowIterator();

    logger.debug("created {}", rowIterator);
}

From source file:com.tuplejump.calliope.hadoop.cql3.CqlRecordReader.java

License:Apache License

private void initializeWithColumnFamilySplit(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = split;
    ColumnFamilySplit cfSplit = (ColumnFamilySplit) split;
    Configuration conf = context.getConfiguration();
    totalRowCount = (cfSplit.getLength() < Long.MAX_VALUE) ? (int) cfSplit.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = quote(ConfigHelper.getInputColumnFamily(conf));
    keyspace = quote(ConfigHelper.getInputKeyspace(conf));
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());

    try {/* w ww. jav  a 2  s  .com*/
        if (cluster != null)
            return;
        // create connection using thrift
        String[] locations = split.getLocations();

        Exception lastException = null;
        for (String location : locations) {
            try {
                cluster = CqlConfigHelper.getInputCluster(location, conf);
                break;
            } catch (Exception e) {
                lastException = e;
                logger.warn("Failed to create authenticated client to {}", location);
            }
        }
        if (cluster == null && lastException != null)
            throw lastException;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    if (cluster != null) {
        try {
            session = cluster.connect(keyspace);
        } catch (NoHostAvailableException nha) {
            Map<InetSocketAddress, Throwable> errors = nha.getErrors();
            logger.error(errors.toString());
            for (InetSocketAddress isa : errors.keySet()) {
                logger.error("ERROR ON HOST [" + isa.getAddress() + "/" + isa.getPort() + "] ");
                logger.error(errors.get(isa).getMessage());
                logger.error("Connection Timeout:  "
                        + cluster.getConfiguration().getSocketOptions().getConnectTimeoutMillis());
                logger.error("Local connection limit:  " + cluster.getConfiguration().getPoolingOptions()
                        .getCoreConnectionsPerHost(HostDistance.LOCAL));
                logger.error("Remote connection limit:  " + cluster.getConfiguration().getPoolingOptions()
                        .getCoreConnectionsPerHost(HostDistance.REMOTE));
                //logger.error("Connection Timeout:  " + cluster.getConfiguration().getSocketOptions().);
            }
            throw nha;
        }
    }
    rowIterator = new SingleRangeRowIterator();
    logger.debug("created {}", rowIterator);
}

From source file:com.tuplejump.calliope.hadoop.cql3.CqlRecordReader.java

License:Apache License

private void initializeWithMultiRangeSplit(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = split;
    MultiRangeSplit cfSplit = (MultiRangeSplit) split;
    Configuration conf = context.getConfiguration();
    totalRowCount = (cfSplit.getLength() < Long.MAX_VALUE) ? (int) cfSplit.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = quote(ConfigHelper.getInputColumnFamily(conf));
    keyspace = quote(ConfigHelper.getInputKeyspace(conf));
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    partitioner = ConfigHelper.getInputPartitioner(context.getConfiguration());

    try {//w ww . j a va 2 s . c  o m
        if (cluster != null)
            return;
        // create connection using thrift
        String[] locations = split.getLocations();

        Exception lastException = null;
        for (String location : locations) {
            try {
                cluster = CqlConfigHelper.getInputCluster(location, conf);
                break;
            } catch (Exception e) {
                lastException = e;
                logger.warn("Failed to create authenticated client to {}", location);
            }
        }
        if (cluster == null && lastException != null)
            throw lastException;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    if (cluster != null) {
        try {
            session = cluster.connect(keyspace);
        } catch (NoHostAvailableException nha) {
            Map<InetSocketAddress, Throwable> errors = nha.getErrors();
            logger.error(errors.toString());
            for (InetSocketAddress isa : errors.keySet()) {
                logger.error("ERROR ON HOST [" + isa.getAddress() + "/" + isa.getPort() + "] ");
                logger.error(errors.get(isa).getMessage());
                logger.error("Connection Timeout:  "
                        + cluster.getConfiguration().getSocketOptions().getConnectTimeoutMillis());
                logger.error("Local connection limit:  " + cluster.getConfiguration().getPoolingOptions()
                        .getCoreConnectionsPerHost(HostDistance.LOCAL));
                logger.error("Remote connection limit:  " + cluster.getConfiguration().getPoolingOptions()
                        .getCoreConnectionsPerHost(HostDistance.REMOTE));
                //logger.error("Connection Timeout:  " + cluster.getConfiguration().getSocketOptions().);
            }
            throw nha;
        }
    }
    rowIterator = new MultiRangeRowIterator();
    logger.debug("created {}", rowIterator);
}

From source file:edu.american.student.redis.hadoop.RedisBigTableRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    String[] locations = split.getLocations();
    RedisBigTableKey key = RedisBigTableKey.inflate(locations[0].getBytes());
    byte[] row = key.getRow();
    byte[] cf = key.getColumnFamily();
    byte[] cq = key.getColumnQualifier();
    RedisForeman foreman = new RedisForeman();
    foreman.connect();//  w w w  . ja  v  a2 s.c  om
    Map<RedisBigTableKey, byte[]> keyValues;
    try {
        keyValues = foreman.getByQualifier(locations[1].getBytes(), row, cf, cq);
        totalKVs = keyValues.size();
        keyValueMapIter = keyValues.entrySet().iterator();
    } catch (RedisForemanException e) {
        throw new IOException(MessageFactory.objective("Read Split").toString(), e);
    }
}

From source file:grakn.core.server.session.reader.GraknCqlBridgeRecordReader.java

License:Open Source License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
    this.split = (ColumnFamilySplit) split;
    Configuration conf = HadoopCompat.getConfiguration(context);
    totalRowCount = (this.split.getLength() < Long.MAX_VALUE) ? (int) this.split.getLength()
            : ConfigHelper.getInputSplitSize(conf);
    cfName = ConfigHelper.getInputColumnFamily(conf);
    keyspace = ConfigHelper.getInputKeyspace(conf);
    partitioner = ConfigHelper.getInputPartitioner(conf);
    inputColumns = CqlConfigHelper.getInputcolumns(conf);
    userDefinedWhereClauses = CqlConfigHelper.getInputWhereClauses(conf);

    try {//from  ww w.  j  a  v  a2s  . c  om
        if (cluster != null) {
            return;
        }
        // create a Cluster instance
        String[] locations = split.getLocations();
        // Previous implementation of this class was instantiating a new Clutser with the following comment:
        // "disregard the conf as it brings some unforeseen issues."
        // Cluster.builder().addContactPoints(locations).build();

        // The above ignores the config so it's not possible to use it when we need to change default ports
        // as they won't be correctly propagated. So now we create Cluster using conf.
        // If this keeps breaking we might need to investigate further.
        cluster = CqlConfigHelper.getInputCluster(ConfigHelper.getInputInitialAddress(conf).split(","), conf);
    } catch (Exception e) {
        throw new RuntimeException(
                "Unable to create cluster for table: " + cfName + ", in keyspace: " + keyspace, e);
    }
    // cluster should be represent to a valid cluster now
    session = cluster.connect(quote(keyspace));
    Preconditions.checkState(session != null, "Can't create connection session");
    //get negotiated serialization protocol
    nativeProtocolVersion = cluster.getConfiguration().getProtocolOptions().getProtocolVersion().toInt();

    // If the user provides a CQL query then we will use it without validation
    // otherwise we will fall back to building a query using the:
    //   inputColumns
    //   whereClauses
    cqlQuery = CqlConfigHelper.getInputCql(conf);
    // validate that the user hasn't tried to give us a custom query along with input columns
    // and where clauses
    if (StringUtils.isNotEmpty(cqlQuery)
            && (StringUtils.isNotEmpty(inputColumns) || StringUtils.isNotEmpty(userDefinedWhereClauses))) {
        throw new AssertionError("Cannot define a custom query with input columns and / or where clauses");
    }

    if (StringUtils.isEmpty(cqlQuery)) {
        cqlQuery = buildQuery();
    }
    log.trace("cqlQuery {}", cqlQuery);
    distinctKeyIterator = new DistinctKeyIterator();
    log.trace("created {}", distinctKeyIterator);
}

From source file:ml.shifu.guagua.mapreduce.GuaguaInputFormat.java

License:Apache License

public static List<List<InputSplit>> getCombineGuaguaSplits(List<InputSplit> oneInputSplits,
        long maxCombinedSplitSize) throws IOException, InterruptedException {
    List<Node> nodes = new ArrayList<Node>();
    Map<String, Node> nodeMap = new HashMap<String, Node>();
    List<List<InputSplit>> result = new ArrayList<List<InputSplit>>();
    List<Long> resultLengths = new ArrayList<Long>();
    long comparableSplitId = 0;

    int size = 0, nSplits = oneInputSplits.size();
    InputSplit lastSplit = null;/*from  ww  w.  j  a v  a 2s .  c  o  m*/
    int emptyCnt = 0;
    for (InputSplit split : oneInputSplits) {
        if (split.getLength() == 0) {
            emptyCnt++;
            continue;
        }
        if (split.getLength() >= maxCombinedSplitSize) {
            comparableSplitId++;
            List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
            combinedSplits.add(split);
            result.add(combinedSplits);
            resultLengths.add(split.getLength());
        } else {
            ComparableSplit csplit = new ComparableSplit(split, comparableSplitId++);
            String[] locations = split.getLocations();
            // sort the locations to stabilize the number of maps: PIG-1757
            Arrays.sort(locations);
            HashSet<String> locationSeen = new HashSet<String>();
            for (String location : locations) {
                if (!locationSeen.contains(location)) {
                    Node node = nodeMap.get(location);
                    if (node == null) {
                        node = new Node();
                        nodes.add(node);
                        nodeMap.put(location, node);
                    }
                    node.add(csplit);
                    csplit.add(node);
                    locationSeen.add(location);
                }
            }
            lastSplit = split;
            size++;
        }
    }

    if (nSplits > 0 && emptyCnt == nSplits) {
        // if all splits are empty, add a single empty split as currently an empty directory is
        // not properly handled somewhere
        List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
        combinedSplits.add(oneInputSplits.get(0));
        result.add(combinedSplits);
    } else if (size == 1) {
        List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
        combinedSplits.add(lastSplit);
        result.add(combinedSplits);
    } else if (size > 1) {
        // combine small splits
        Collections.sort(nodes, nodeComparator);
        DummySplit dummy = new DummySplit();
        // dummy is used to search for next split of suitable size to be combined
        ComparableSplit dummyComparableSplit = new ComparableSplit(dummy, -1);
        for (Node node : nodes) {
            // sort the splits on this node in descending order
            node.sort();
            long totalSize = 0;
            List<ComparableSplit> splits = node.getSplits();
            int idx;
            int lenSplits;
            List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
            List<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>();
            while (!splits.isEmpty()) {
                combinedSplits.add(splits.get(0).getSplit());
                combinedComparableSplits.add(splits.get(0));
                int startIdx = 1;
                lenSplits = splits.size();
                totalSize += splits.get(0).getSplit().getLength();
                long spaceLeft = maxCombinedSplitSize - totalSize;
                dummy.setLength(spaceLeft);
                idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits),
                        dummyComparableSplit);
                idx = -idx - 1 + startIdx;
                while (idx < lenSplits) {
                    long thisLen = splits.get(idx).getSplit().getLength();
                    combinedSplits.add(splits.get(idx).getSplit());
                    combinedComparableSplits.add(splits.get(idx));
                    totalSize += thisLen;
                    spaceLeft -= thisLen;
                    if (spaceLeft <= 0)
                        break;
                    // find next combinable chunk
                    startIdx = idx + 1;
                    if (startIdx >= lenSplits)
                        break;
                    dummy.setLength(spaceLeft);
                    idx = Collections.binarySearch(node.getSplits().subList(startIdx, lenSplits),
                            dummyComparableSplit);
                    idx = -idx - 1 + startIdx;
                }
                if (totalSize > maxCombinedSplitSize / 2) {
                    result.add(combinedSplits);
                    resultLengths.add(totalSize);
                    removeSplits(combinedComparableSplits);
                    totalSize = 0;
                    combinedSplits = new ArrayList<InputSplit>();
                    combinedComparableSplits.clear();
                    splits = node.getSplits();
                } else {
                    if (combinedSplits.size() != lenSplits)
                        throw new AssertionError("Combined split logic error!");
                    break;
                }
            }
        }
        // handle leftovers
        List<ComparableSplit> leftoverSplits = new ArrayList<ComparableSplit>();
        Set<InputSplit> seen = new HashSet<InputSplit>();
        for (Node node : nodes) {
            for (ComparableSplit split : node.getSplits()) {
                if (!seen.contains(split.getSplit())) {
                    // remove duplicates. The set has to be on the raw input split not the
                    // comparable input split as the latter overrides the compareTo method
                    // so its equality semantics is changed and not we want here
                    seen.add(split.getSplit());
                    leftoverSplits.add(split);
                }
            }
        }

        if (!leftoverSplits.isEmpty()) {
            long totalSize = 0;
            List<InputSplit> combinedSplits = new ArrayList<InputSplit>();
            List<ComparableSplit> combinedComparableSplits = new ArrayList<ComparableSplit>();

            int splitLen = leftoverSplits.size();
            for (int i = 0; i < splitLen; i++) {
                ComparableSplit split = leftoverSplits.get(i);
                long thisLen = split.getSplit().getLength();
                if (totalSize + thisLen >= maxCombinedSplitSize) {
                    removeSplits(combinedComparableSplits);
                    result.add(combinedSplits);
                    resultLengths.add(totalSize);
                    combinedSplits = new ArrayList<InputSplit>();
                    combinedComparableSplits.clear();
                    totalSize = 0;
                }
                combinedSplits.add(split.getSplit());
                combinedComparableSplits.add(split);
                totalSize += split.getSplit().getLength();
                if (i == splitLen - 1) {
                    // last piece: it could be very small, try to see it can be squeezed into any existing splits
                    for (int j = 0; j < result.size(); j++) {
                        if (resultLengths.get(j) + totalSize <= maxCombinedSplitSize) {
                            List<InputSplit> isList = result.get(j);
                            for (InputSplit csplit : combinedSplits) {
                                isList.add(csplit);
                            }
                            removeSplits(combinedComparableSplits);
                            combinedSplits.clear();
                            break;
                        }
                    }
                    if (!combinedSplits.isEmpty()) {
                        // last piece can not be squeezed in, create a new combined split for them.
                        removeSplits(combinedComparableSplits);
                        result.add(combinedSplits);
                    }
                }
            }
        }
    }
    LOG.info("Total input paths (combined) to process : {}", result.size());
    return result;
}

From source file:ml.shifu.guagua.yarn.GuaguaSplitWriter.java

License:Apache License

private static SplitMetaInfo[] writeOldSplits(org.apache.hadoop.mapred.InputSplit[] splits,
        FSDataOutputStream out, Configuration conf) throws IOException {
    SplitMetaInfo[] info = new SplitMetaInfo[splits.length];
    if (splits.length != 0) {
        int i = 0;
        long offset = out.getPos();
        for (org.apache.hadoop.mapred.InputSplit split : splits) {
            long prevLen = out.getPos();
            Text.writeString(out, split.getClass().getName());
            split.write(out);/* w w  w.ja v  a2  s .  co m*/
            long currLen = out.getPos();
            String[] locations = split.getLocations();
            final int max_loc = conf.getInt(MAX_SPLIT_LOCATIONS, 10);
            if (locations.length > max_loc) {
                LOG.warn("Max block location exceeded for split: " + split + " splitsize: " + locations.length
                        + " maxsize: " + max_loc);
                locations = Arrays.copyOf(locations, max_loc);
            }
            info[i++] = new JobSplit.SplitMetaInfo(locations, offset, split.getLength());
            offset += currLen - prevLen;
        }
    }
    return info;
}