Example usage for org.apache.hadoop.mapreduce Job getJobID

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getJobID.

Prototype

public JobID getJobID()

Source Link

Document

Get the unique ID for the job.

Usage

From source file:org.wonderbee.elasticsearch.hive.ElasticSearchHiveInputFormat.java

License:Apache License

/**
 The number of splits is specified in the Hadoop configuration object.
 *///from   ww w  . j  a v a2s .  c o  m
public InputSplit[] getSplits(JobConf conf, int numSplitsHint) throws IOException {
    this.conf = conf;
    this.indexName = conf.get(ES_INDEX_NAME);
    this.requestSize = Integer.parseInt(conf.get(ES_REQUEST_SIZE, "1000"));

    this.hostPort = conf.get(ElasticSearchStorageHandler.ES_HOSTPORT);

    System.setProperty(ES_CONFIG, conf.get(ES_CONFIG));
    System.setProperty(ES_PLUGINS, conf.get(ES_PLUGINS));

    start_embedded_client();
    LOG.info("Admin client started");
    //Get the mapping of shards to node/hosts with primary
    ClusterState clusterState = client.admin().cluster().prepareState().execute().actionGet().state();
    Map<Integer, String[]> shardToHost = new LinkedHashMap<Integer, String[]>();
    for (IndexRoutingTable indexRoutingTable : clusterState.routingTable()) {
        for (IndexShardRoutingTable indexShardRoutingTable : indexRoutingTable) {
            for (ShardRouting shardRouting : indexShardRoutingTable.getAssignedShards()) {
                if (shardRouting.shardId().index().getName().equals(this.indexName) && shardRouting.primary()) {
                    InetSocketTransportAddress address = (InetSocketTransportAddress) clusterState.nodes()
                            .get(shardRouting.currentNodeId()).getAddress();
                    int shardId = shardRouting.shardId().getId();
                    String hostPort = address.address().getHostName() + ":" + address.address().getPort();
                    String nodeName = shardRouting.currentNodeId();
                    shardToHost.put(shardId, new String[] { hostPort, nodeName });
                }
            }
        }
    }
    this.client.close();
    LOG.info("Admin client closed");
    List<InputSplit> splits = new ArrayList<InputSplit>(shardToHost.size());

    Job job = new Job(conf);
    JobContext jobContext = new JobContext(job.getConfiguration(), job.getJobID());
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    //Consultation with kimchy revealed it should be more efficient to just have as many splits
    //as shards.
    for (Map.Entry<Integer, String[]> pair : shardToHost.entrySet()) {
        int shard = pair.getKey();
        String shardHostPort = pair.getValue()[0];
        String nodeName = pair.getValue()[1];
        LOG.debug("Created split: shard:" + shard + ", host:" + shardHostPort + ", node:" + nodeName);

        splits.add(new HiveInputFormat.HiveInputSplit(new ElasticSearchSplit(0, this.requestSize, shardHostPort,
                nodeName, shard, tablePaths[0].toString()), "ElasticSearchSplit"));
    }
    return splits.toArray(new InputSplit[splits.size()]);
}