Example usage for org.apache.hadoop.mapred JobConf getBoolean

List of usage examples for org.apache.hadoop.mapred JobConf getBoolean

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getBoolean.

Prototype

public boolean getBoolean(String name, boolean defaultValue) 

Source Link

Document

Get the value of the name property as a boolean.

Usage

From source file:com.cloudera.recordservice.avro.AvroJob.java

License:Apache License

public static void setInputFormat(org.apache.hadoop.mapred.JobConf job,
        Class<? extends org.apache.hadoop.mapred.InputFormat> c) {
    if (job.getBoolean(USE_RECORD_SERVICE_INPUT_FORMAT_CONF_KEY, false)) {
        if (c.getName().equals(org.apache.avro.mapred.AvroInputFormat.class.getName())) {
            c = com.cloudera.recordservice.avro.mapred.AvroInputFormat.class;
        } else {//from  w w w  . java  2s. c om
            throw new RuntimeException("Class '" + c.getName() + "' is not supported "
                    + "by the RecordService. Use AvroInputFormat or disable RecordService.");
        }
    }
    LOG.debug("Using input format: " + c.getName());
    job.setInputFormat(c);
}

From source file:com.cloudera.science.avro.streaming.AvroAsJSONOutputFormat.java

License:Open Source License

@Override
public RecordWriter<Text, Text> getRecordWriter(FileSystem ignored, JobConf job, String name,
        Progressable progress) throws IOException {
    if (schema == null) {
        SchemaLoader loader = new SchemaLoader(job);
        this.schema = loader.load(job.get(SCHEMA_LITERAL), job.get(SCHEMA_URL), job.get(SCHEMA_TYPE_NAME));
        this.converter = new JsonConverter(schema);
        this.readKey = job.getBoolean(READ_KEY, true);
    }//from  w w w .  j  a  v a  2  s  .c o  m

    DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>(schema));
    if (getCompressOutput(job)) {
        int level = job.getInt(AvroOutputFormat.DEFLATE_LEVEL_KEY, AvroOutputFormat.DEFAULT_DEFLATE_LEVEL);
        String codecName = job.get(AvroJob.CONF_OUTPUT_CODEC,
                org.apache.avro.file.DataFileConstants.DEFLATE_CODEC);
        CodecFactory codec = codecName.equals(DataFileConstants.DEFLATE_CODEC)
                ? CodecFactory.deflateCodec(level)
                : CodecFactory.fromString(codecName);
        writer.setCodec(codec);
    }
    writer.setSyncInterval(
            job.getInt(AvroOutputFormat.SYNC_INTERVAL_KEY, DataFileConstants.DEFAULT_SYNC_INTERVAL));

    Path path = FileOutputFormat.getTaskOutputPath(job, name + AvroOutputFormat.EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroAsJSONRecordWriter(writer, converter, readKey);
}

From source file:com.datascience.hadoop.CsvInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, ListWritable<Text>> getRecordReader(InputSplit inputSplit, JobConf conf,
        Reporter reporter) throws IOException {
    String charsetName = conf.get(CHARSET);
    Charset charset = charsetName != null ? Charset.forName(charsetName) : StandardCharsets.UTF_8;

    FileSplit split = (FileSplit) inputSplit;
    Path path = split.getPath();/*  w  w  w .  ja  v  a  2s. c om*/
    FileSystem fs = path.getFileSystem(conf);
    InputStream is = fs.open(path);

    // If the input is compressed, load the compression codec.
    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecFactory.getCodec(path);
    if (codec != null) {
        Decompressor decompressor = CodecPool.getDecompressor(codec);
        is = codec.createInputStream(is, decompressor);
    }
    return new CsvRecordReader(new InputStreamReader(is, charset), createFormat(conf), split.getLength(),
            conf.getBoolean(STRICT_MODE, true));
}

From source file:com.digitalpebble.behemoth.ClassifierJob.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    filter = DocumentFilter.getFilters(job);
    lowerCase = job.getBoolean("classification.tokenize", false);
    docFeaturename = job.get("classification.doc.feature.name", "label");

    String modelPath = job.get(ClassifierJob.modelNameParam);

    // optimisation for jvm reuse
    // do not reload the model
    if (classifier != null) {
        LOG.info("Reusing existing classifier [" + classifier.toString() + "]");
        return;/* w w  w  . j  av a 2s. c  o  m*/
    }

    long start = System.currentTimeMillis();
    File modelFile = null;
    try {
        String modelCacheName = new Path(modelPath).getName();
        Path[] cacheFiles = DistributedCache.getLocalCacheArchives(job);
        if (null != cacheFiles && cacheFiles.length > 0) {
            for (Path cachePath : cacheFiles) {
                LOG.info("LocalCache : " + cachePath.toUri());
                LOG.info("modelCacheName : " + modelCacheName);
                if (cachePath.toUri().toString().endsWith(modelCacheName)) {
                    String parent = new File(cachePath.toUri().getPath()).toString();
                    modelFile = new File(parent, modelCacheName.replaceAll(".zip", ""));
                    LOG.info("Unzipped ? " + modelFile.getAbsolutePath());
                    boolean doesExist = modelFile.exists();
                    LOG.info("modelFile exists " + doesExist);
                    // if it does not exist it must have been unpacked at
                    // the parent level
                    if (!doesExist) {
                        modelFile = new File(parent);
                    }
                    break;
                }
            }
        }
    } catch (IOException ioe) {
        throw new RuntimeException("Impossible to retrieve model from distributed cache", ioe);
    }

    try {
        classifier = classifier.getClassifier(modelFile);
    } catch (Exception e) {
        throw new RuntimeException("Impossible to load model from " + modelFile, e);
    }
    long end = System.currentTimeMillis();
    LOG.info("Model loaded in " + (end - start) + " msec");
}

From source file:com.digitalpebble.behemoth.solr.LucidWorksWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    String zkHost = job.get("solr.zkhost");
    if (zkHost != null && zkHost.equals("") == false) {
        String collection = job.get("solr.zk.collection", "collection1");
        LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost);
        solr = new CloudSolrServer(zkHost);
        ((CloudSolrServer) solr).setDefaultCollection(collection);
    } else {/*  w  ww.  ja  v  a  2 s  .c o m*/
        String solrURL = job.get("solr.server.url");
        int queueSize = job.getInt("solr.client.queue.size", 100);
        int threadCount = job.getInt("solr.client.threads", 1);
        solr = new StreamingUpdateSolrServer(solrURL, queueSize, threadCount);
    }
    includeMetadata = job.getBoolean("lw.metadata", false);
    includeAnnotations = job.getBoolean("lw.annotations", false);

    // get the Behemoth annotations types and features
    // to store as SOLR fields
    // solr.f.name = BehemothType.featureName
    // e.g. solr.f.person = Person.string
    Iterator<Entry<String, String>> iterator = job.iterator();
    while (iterator.hasNext()) {
        Entry<String, String> entry = iterator.next();
        if (entry.getKey().startsWith("solr.f.") == false)
            continue;
        String fieldName = entry.getKey().substring("solr.f.".length());
        String val = entry.getValue();
        // see if a feature has been specified
        // if not we'll use '*' to indicate that we want
        // the text covered by the annotation
        HashMap<String, String> featureValMap = new HashMap<String, String>();
        int separator = val.indexOf(".");
        String featureName = "*";
        if (separator != -1)
            featureName = val.substring(separator + 1);
        featureValMap.put(featureName, fieldName);
        fieldMapping.put(entry.getValue(), featureValMap);
        LOG.debug("Adding to mapping " + entry.getValue() + " " + featureName + " " + fieldName);
    }
}

From source file:com.digitalpebble.behemoth.solr.SOLRWriter.java

License:Apache License

public void open(JobConf job, String name) throws IOException {
    String zkHost = job.get("solr.zkhost");
    if (zkHost != null && zkHost.equals("") == false) {
        String collection = job.get("solr.zk.collection", "collection1");
        LOG.info("Indexing to collection: " + collection + " w/ ZK host: " + zkHost);
        solr = new CloudSolrServer(zkHost);
        ((CloudSolrServer) solr).setDefaultCollection(collection);
    } else {/*from   w w w  . j  a  v a2 s  . c o m*/
        String solrURL = job.get("solr.server.url");
        int queueSize = job.getInt("solr.client.queue.size", 100);
        int threadCount = job.getInt("solr.client.threads", 1);
        solr = new ConcurrentUpdateSolrServer(solrURL, queueSize, threadCount);
    }
    String paramsString = job.get("solr.params");
    if (paramsString != null) {
        params = new ModifiableSolrParams();
        String[] pars = paramsString.trim().split("\\&");
        for (String kvs : pars) {
            String[] kv = kvs.split("=");
            if (kv.length < 2) {
                LOG.warn("Invalid Solr param " + kvs + ", skipping...");
                continue;
            }
            params.add(kv[0], kv[1]);
        }
        LOG.info("Using Solr params: " + params.toString());
    }

    includeMetadata = job.getBoolean("solr.metadata", false);
    includeAnnotations = job.getBoolean("solr.annotations", false);
    useMetadataPrefix = job.getBoolean("solr.metadata.use.prefix", false);
    metadataPrefix = job.get("solr.metadata.prefix", "attr_");
    annotationPrefix = job.get("solr.annotation.prefix", "annotate_");
    useAnnotationPrefix = job.getBoolean("solr.annotation.use.prefix", false);
    populateSolrFieldMappingsFromBehemothAnnotationsTypesAndFeatures(job);
}

From source file:com.ebay.erl.mobius.core.datajoin.EvenlyPartitioner.java

License:Apache License

/**
 * Read in the partition file and build indexing data structures.
 * If the keytype is {@link org.apache.hadoop.io.BinaryComparable} and
 * <tt>total.order.partitioner.natural.order</tt> is not false, a trie
 * of the first <tt>total.order.partitioner.max.trie.depth</tt>(2) + 1 bytes
 * will be built. Otherwise, keys will be located using a binary search of
 * the partition keyset using the {@link org.apache.hadoop.io.RawComparator}
 * defined for this job. The input file must be sorted with the same
 * comparator and contain {@link//from ww  w .j a v a  2  s.  co  m
org.apache.hadoop.mapred.JobConf#getNumReduceTasks} - 1 keys.
 */
@SuppressWarnings("unchecked") // keytype from conf not static
public void configure(JobConf job) {
    try {
        String parts = getPartitionFile(job);
        final Path partFile = new Path(parts);
        final FileSystem fs = (DEFAULT_PATH.equals(parts)) ? FileSystem.getLocal(job) // assume in DistributedCache
                : partFile.getFileSystem(job);

        //Class<K> keyClass = (Class<K>)job.getMapOutputKeyClass();
        K[] splitPoints = readPartitions(fs, partFile, (Class<K>) Tuple.class, job);
        if (splitPoints.length != job.getNumReduceTasks() - 1) {
            throw new IOException("Wrong number of partitions in keyset");
        }
        RawComparator<K> comparator = (RawComparator<K>) job.getOutputKeyComparator();
        for (int i = 0; i < splitPoints.length - 1; ++i) {
            if (comparator.compare(splitPoints[i], splitPoints[i + 1]) >= 0) {
                throw new IOException("Split points are out of order");
            }
        }
        boolean natOrder = job.getBoolean("total.order.partitioner.natural.order", true);
        if (natOrder && BinaryComparable.class.isAssignableFrom(Tuple.class)) {
            partitions = buildTrie((BinaryComparable[]) splitPoints, 0, splitPoints.length, new byte[0],
                    job.getInt("total.order.partitioner.max.trie.depth", 2));
        } else {
            partitions = new BinarySearchNode(splitPoints, comparator);
        }
    } catch (IOException e) {
        throw new IllegalArgumentException("Can't read partitions file", e);
    }
}

From source file:com.ebay.erl.mobius.core.mapred.ConfigurableJob.java

License:Apache License

private static void writePartitionFile(JobConf job, Sampler sampler) {
    try {/*  w  ww  .ja  va  2  s.  c o m*/
        ////////////////////////////////////////////////
        // first, getting samples from the data sources
        ////////////////////////////////////////////////
        LOGGER.info("Running local sampling for job [" + job.getJobName() + "]");
        InputFormat inf = job.getInputFormat();
        Object[] samples = sampler.getSample(inf, job);
        LOGGER.info("Samples retrieved, sorting...");

        ////////////////////////////////////////////////
        // sort the samples
        ////////////////////////////////////////////////
        RawComparator comparator = job.getOutputKeyComparator();
        Arrays.sort(samples, comparator);

        if (job.getBoolean("mobius.print.sample", false)) {
            PrintWriter pw = new PrintWriter(
                    new OutputStreamWriter(new GZIPOutputStream(new BufferedOutputStream(new FileOutputStream(
                            new File(job.get("mobius.sample.file", "./samples.txt.gz")))))));
            for (Object obj : samples) {
                pw.println(obj);
            }
            pw.flush();
            pw.close();
        }

        ////////////////////////////////////////////////
        // start to write partition files
        ////////////////////////////////////////////////

        FileSystem fs = FileSystem.get(job);
        Path partitionFile = fs.makeQualified(new Path(TotalOrderPartitioner.getPartitionFile(job)));
        while (fs.exists(partitionFile)) {
            partitionFile = new Path(partitionFile.toString() + "." + System.currentTimeMillis());
        }
        fs.deleteOnExit(partitionFile);
        TotalOrderPartitioner.setPartitionFile(job, partitionFile);
        LOGGER.info("write partition file to:" + partitionFile.toString());

        int reducersNbr = job.getNumReduceTasks();
        Set<Object> wroteSamples = new HashSet<Object>();

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, partitionFile, Tuple.class,
                NullWritable.class);

        float avgReduceSize = samples.length / reducersNbr;

        int lastBegin = 0;
        for (int i = 0; i < samples.length;) {
            // trying to distribute the load for every reducer evenly,
            // dividing the <code>samples</code> into a set of blocks
            // separated by boundaries, objects that selected from the
            // <code>samples</code> array, and each blocks should have
            // about the same size.

            // find the last index of element that equals to samples[i], as
            // such element might appear multiple times in the samples.
            int upperBound = Util.findUpperBound(samples, samples[i], comparator);

            int lowerBound = i;//Util.findLowerBound(samples, samples[i], comparator);

            // the repeat time of samples[i], if the key itself is too big
            // select it as boundary
            int currentElemSize = upperBound - lowerBound + 1;

            if (currentElemSize > avgReduceSize * 2) // greater than two times of average reducer size
            {
                // the current element is too big, greater than
                // two times of the <code>avgReduceSize</code>, 
                // put itself as boundary
                writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                //pw.println(samples[i]);

                // immediate put the next element to the boundary,
                // the next element starts at <code> upperBound+1
                // </code>, to prevent the current one consume even 
                // more.
                if (upperBound + 1 < samples.length) {
                    writer.append(((DataJoinKey) samples[upperBound + 1]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[upperBound + 1]).getKey());
                    //pw.println(samples[upperBound+1]);

                    // move on to the next element of <code>samples[upperBound+1]/code>
                    lastBegin = Util.findUpperBound(samples, samples[upperBound + 1], comparator) + 1;
                    i = lastBegin;
                } else {
                    break;
                }
            } else {
                // current element is small enough to be consider
                // with previous group
                int size = upperBound - lastBegin;
                if (size > avgReduceSize) {
                    // by including the current elements, we have
                    // found a block that's big enough, select it
                    // as boundary
                    writer.append(((DataJoinKey) samples[i]).getKey(), NullWritable.get());
                    wroteSamples.add(((DataJoinKey) samples[i]).getKey());
                    //pw.println(samples[i]);

                    i = upperBound + 1;
                    lastBegin = i;
                } else {
                    i = upperBound + 1;
                }
            }
        }

        writer.close();

        // if the number of wrote samples doesn't equals to number of
        // reducer minus one, then it means the key spaces is too small
        // hence TotalOrderPartitioner won't work, it works only if 
        // the partition boundaries are distinct.
        //
        // we need to change the number of reducers
        if (wroteSamples.size() + 1 != reducersNbr) {
            LOGGER.info("Write complete, but key space is too small, sample size=" + wroteSamples.size()
                    + ", reducer size:" + (reducersNbr));
            LOGGER.info("Set the reducer size to:" + (wroteSamples.size() + 1));

            // add 1 because the wrote samples define boundary, ex, if
            // the sample size is two with two element [300, 1000], then 
            // there should be 3 reducers, one for handling i<300, one 
            // for n300<=i<1000, and another one for 1000<=i
            job.setNumReduceTasks((wroteSamples.size() + 1));
        }

        samples = null;
    } catch (IOException e) {
        LOGGER.error(e.getMessage(), e);
        throw new RuntimeException(e);
    }
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseInputFormatUtil.java

License:Apache License

/**
 * Parse {@code jobConf} to create a {@link Scan} instance.
 *//*from  w  w  w. ja v a  2 s . co m*/
public static Scan getScan(JobConf jobConf) throws IOException {
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);
    List<Integer> readColIDs = ColumnProjectionUtils.getReadColumnIDs(jobConf);
    ColumnMappings columnMappings;

    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
    } catch (SerDeException e) {
        throw new IOException(e);
    }

    if (columnMappings.size() < readColIDs.size()) {
        throw new IOException("Cannot read more columns than the given table contains.");
    }

    boolean readAllColumns = ColumnProjectionUtils.isReadAllColumns(jobConf);
    Scan scan = new Scan();
    boolean empty = true;

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    if (!readAllColumns) {
        ColumnMapping[] columnsMapping = columnMappings.getColumnsMapping();
        for (int i : readColIDs) {
            ColumnMapping colMap = columnsMapping[i];
            if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                continue;
            }

            if (colMap.qualifierName == null) {
                scan.addFamily(colMap.familyNameBytes);
                addedFamilies.add(colMap.familyName);
            } else {
                if (!addedFamilies.contains(colMap.familyName)) {
                    // add only if the corresponding family has not already been added
                    scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
                }
            }

            empty = false;
        }
    }

    // The HBase table's row key maps to a Hive table column. In the corner case when only the
    // row key column is selected in Hive, the HBase Scan will be empty i.e. no column family/
    // column qualifier will have been added to the scan. We arbitrarily add at least one column
    // to the HBase scan so that we can retrieve all of the row keys and return them as the Hive
    // tables column projection.
    if (empty) {
        for (ColumnMapping colMap : columnMappings) {
            if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
                continue;
            }

            if (colMap.qualifierName == null) {
                scan.addFamily(colMap.familyNameBytes);
            } else {
                scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
            }

            if (!readAllColumns) {
                break;
            }
        }
    }

    String scanCache = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHE);
    if (scanCache != null) {
        scan.setCaching(Integer.valueOf(scanCache));
    }
    String scanCacheBlocks = jobConf.get(HBaseSerDe.HBASE_SCAN_CACHEBLOCKS);
    if (scanCacheBlocks != null) {
        scan.setCacheBlocks(Boolean.valueOf(scanCacheBlocks));
    }
    String scanBatch = jobConf.get(HBaseSerDe.HBASE_SCAN_BATCH);
    if (scanBatch != null) {
        scan.setBatch(Integer.valueOf(scanBatch));
    }
    return scan;
}

From source file:com.github.dryangkun.hbase.tidx.hive.HiveHBaseTableInputFormat.java

License:Apache License

private InputSplit[] getSplitsInternal(JobConf jobConf, int numSplits) throws IOException {

    //obtain delegation tokens for the job
    if (UserGroupInformation.getCurrentUser().hasKerberosCredentials()) {
        TableMapReduceUtil.initCredentials(jobConf);
    }//from www.ja  va  2 s. c o m

    String hbaseTableName = jobConf.get(HBaseSerDe.HBASE_TABLE_NAME);
    String hbaseColumnsMapping = jobConf.get(HBaseSerDe.HBASE_COLUMNS_MAPPING);
    boolean doColumnRegexMatching = jobConf.getBoolean(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, true);

    if (hbaseColumnsMapping == null) {
        throw new IOException(HBaseSerDe.HBASE_COLUMNS_MAPPING + " required for HBase Table.");
    }

    ColumnMappings columnMappings = null;
    int iTimeColumn = -1;
    try {
        columnMappings = HBaseSerDe.parseColumnsMapping(hbaseColumnsMapping, doColumnRegexMatching);
        iTimeColumn = HBaseSerDe.getTxTimeColumnIndex(columnMappings, jobConf);
    } catch (SerDeException e) {
        throw new IOException(e);
    }

    int iKey = columnMappings.getKeyIndex();
    int iTimestamp = columnMappings.getTimestampIndex();
    ColumnMapping keyMapping = columnMappings.getKeyMapping();

    if (iTimeColumn != -1) {
        List<org.apache.hadoop.mapreduce.InputSplit> splits = TxHiveTableInputFormatUtil.getSplits(jobConf,
                numSplits, columnMappings, iTimeColumn, hbaseTableName);
        if (splits != null) {
            Job job = new Job(jobConf);
            JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
            Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

            InputSplit[] results = new InputSplit[splits.size()];
            for (int i = 0; i < splits.size(); i++) {
                results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0], true);
            }
            LOG.info("getSplits: TxHiveIndexScan");
            return results;
        }
    }
    LOG.info("getSplits: no TxHiveIndexScan");

    setHTable(new HTable(HBaseConfiguration.create(jobConf), Bytes.toBytes(hbaseTableName)));
    // Take filter pushdown into account while calculating splits; this
    // allows us to prune off regions immediately.  Note that although
    // the Javadoc for the superclass getSplits says that it returns one
    // split per region, the implementation actually takes the scan
    // definition into account and excludes regions which don't satisfy
    // the start/stop row conditions (HBASE-1829).
    Scan scan = createFilterScan(jobConf, iKey, iTimestamp, HiveHBaseInputFormatUtil.getStorageFormatOfKey(
            keyMapping.mappingSpec, jobConf.get(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE, "string")));

    // The list of families that have been added to the scan
    List<String> addedFamilies = new ArrayList<String>();

    // REVIEW:  are we supposed to be applying the getReadColumnIDs
    // same as in getRecordReader?
    for (ColumnMapping colMap : columnMappings) {
        if (colMap.hbaseRowKey || colMap.hbaseTimestamp) {
            continue;
        }

        if (colMap.qualifierName == null) {
            scan.addFamily(colMap.familyNameBytes);
            addedFamilies.add(colMap.familyName);
        } else {
            if (!addedFamilies.contains(colMap.familyName)) {
                // add the column only if the family has not already been added
                scan.addColumn(colMap.familyNameBytes, colMap.qualifierNameBytes);
            }
        }
    }
    setScan(scan);

    Job job = new Job(jobConf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    List<org.apache.hadoop.mapreduce.InputSplit> splits = super.getSplits(jobContext);
    InputSplit[] results = new InputSplit[splits.size()];

    for (int i = 0; i < splits.size(); i++) {
        results[i] = new HBaseSplit((TableSplit) splits.get(i), tablePaths[0]);
    }

    return results;
}