Example usage for org.apache.hadoop.mapred JobConf getLong

List of usage examples for org.apache.hadoop.mapred JobConf getLong

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getLong.

Prototype

public long getLong(String name, long defaultValue) 

Source Link

Document

Get the value of the name property as a long.

Usage

From source file:org.apache.sysml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * //from   ww w . j  a v a 2 s. co  m
 * @param conf the job to sample
 * @param partFile where to write the output file to
 * @return index value
 * @throws IOException if something goes wrong
 * @throws InstantiationException if InstantiationException occurs
 * @throws IllegalAccessException if IllegalAccessException occurs
 */
@SuppressWarnings({ "unchecked", "unused", "deprecation" })
public static int writePartitionFile(JobConf conf, Path partFile)
        throws IOException, InstantiationException, IllegalAccessException {
    SamplingSortMRInputFormat inFormat = new SamplingSortMRInputFormat();
    Sampler sampler = new Sampler();

    Class<? extends WritableComparable> targetKeyClass;
    targetKeyClass = (Class<? extends WritableComparable>) conf.getClass(TARGET_KEY_CLASS,
            WritableComparable.class);
    //get input converter information
    int brlen = MRJobConfiguration.getNumRowsPerBlock(conf, (byte) 0);
    int bclen = MRJobConfiguration.getNumColumnsPerBlock(conf, (byte) 0);

    //indicate whether the matrix value in this mapper is a matrix cell or a matrix block
    int partitions = conf.getNumReduceTasks();

    long sampleSize = conf.getLong(SAMPLE_SIZE, 1000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    // take N samples from different parts of the input

    int totalcount = 0;
    for (int i = 0; i < samples; i++) {
        SequenceFileRecordReader reader = (SequenceFileRecordReader) inFormat
                .getRecordReader(splits[sampleStep * i], conf, null);
        int count = 0;
        WritableComparable key = (WritableComparable) reader.createKey();
        Writable value = (Writable) reader.createValue();
        while (reader.next(key, value) && count < recordsPerSample) {
            Converter inputConverter = MRJobConfiguration.getInputConverter(conf, (byte) 0);
            inputConverter.setBlockSize(brlen, bclen);
            inputConverter.convert(key, value);
            while (inputConverter.hasNext()) {
                Pair pair = inputConverter.next();
                if (pair.getKey() instanceof DoubleWritable) {
                    sampler.addValue(new DoubleWritable(((DoubleWritable) pair.getKey()).get()));
                } else if (pair.getValue() instanceof MatrixCell) {
                    sampler.addValue(new DoubleWritable(((MatrixCell) pair.getValue()).getValue()));
                } else
                    throw new IOException("SamplingSortMRInputFormat unsupported key/value class: "
                            + pair.getKey().getClass() + ":" + pair.getValue().getClass());

                count++;
            }
            key = (WritableComparable) reader.createKey();
            value = (Writable) reader.createValue();
        }
        totalcount += count;
    }

    if (totalcount == 0) //empty input files
        sampler.addValue(new DoubleWritable(0));

    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }

    //note: key value always double/null as expected by partitioner
    SequenceFile.Writer writer = null;
    int index0 = -1;
    try {
        writer = SequenceFile.createWriter(outFs, conf, partFile, DoubleWritable.class, NullWritable.class);
        NullWritable nullValue = NullWritable.get();
        int i = 0;
        boolean lessthan0 = true;
        for (WritableComparable splitValue : sampler.createPartitions(partitions)) {
            writer.append(splitValue, nullValue);
            if (lessthan0 && ((DoubleWritable) splitValue).get() >= 0) {
                index0 = i;
                lessthan0 = false;
            }
            i++;
        }
        if (lessthan0)
            index0 = partitions - 1;
    } finally {
        IOUtilFunctions.closeSilently(writer);
    }

    return index0;
}

From source file:org.apache.tez.mapreduce.hadoop.TestDeprecatedKeys.java

License:Apache License

@Test(timeout = 5000)
public void verifyReduceKeyTranslation() {
    JobConf jobConf = new JobConf();

    jobConf.setFloat(MRJobConfig.SHUFFLE_INPUT_BUFFER_PERCENT, 0.4f);
    jobConf.setLong(MRJobConfig.REDUCE_MEMORY_TOTAL_BYTES, 20000l);
    jobConf.setInt(MRJobConfig.IO_SORT_FACTOR, 2000);
    jobConf.setFloat(MRJobConfig.SHUFFLE_MEMORY_LIMIT_PERCENT, 0.55f);
    jobConf.setFloat(MRJobConfig.REDUCE_MEMTOMEM_THRESHOLD, 0.60f);
    jobConf.setFloat(MRJobConfig.SHUFFLE_MERGE_PERCENT, 0.22f);
    jobConf.setBoolean(MRJobConfig.REDUCE_MEMTOMEM_ENABLED, true);
    jobConf.setFloat(MRJobConfig.REDUCE_INPUT_BUFFER_PERCENT, 0.33f);

    MRHelpers.translateMRConfToTez(jobConf);

    assertEquals(0.4f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_FETCH_BUFFER_PERCENT, 0f),
            0.01f);//from   w  w  w.  ja  va  2  s.c  o  m
    assertEquals(20000l, jobConf.getLong(Constants.TEZ_RUNTIME_TASK_MEMORY, 0));
    assertEquals(2000, jobConf.getInt(TezRuntimeConfiguration.TEZ_RUNTIME_IO_SORT_FACTOR, 0));
    assertEquals(0.55f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMORY_LIMIT_PERCENT, 0),
            0.01f);
    assertEquals(0.60f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MEMTOMEM_SEGMENTS, 0),
            0.01f);
    assertEquals(0.22f, jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_MERGE_PERCENT, 0), 0.01f);
    assertEquals(true, jobConf.getBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_SHUFFLE_ENABLE_MEMTOMEM, false));
    assertEquals(0.33f,
            jobConf.getFloat(TezRuntimeConfiguration.TEZ_RUNTIME_INPUT_POST_MERGE_BUFFER_PERCENT, 0), 0.01f);
}

From source file:org.archive.mapred.ARCMapRunner.java

License:LGPL

public void configure(JobConf job) {
    this.mapper = (ARCRecordMapper) ReflectionUtils.newInstance(job.getMapperClass(), job);
    // Value is in minutes.
    this.maxtime = job.getLong("wax.index.timeout", 60) * 60 * 1000;
}

From source file:org.commoncrawl.mapred.pipelineV3.RegExFilter.java

License:Open Source License

@Override
public void configure(JobConf job) {

    super.configure(job);

    String regEx = job.get(REGEX_KEY);
    pattern = Pattern.compile(regEx);
    inputFile = job.get("map.input.file");
    inputFilePos = job.getLong("map.input.start", -1L);
}

From source file:org.hxx.hadoop.URLCountPartitioner.java

License:Apache License

public void configure(JobConf job) {
    seed = job.getInt("partition.url.seed", 0);
    normalizers = new URLNormalizers(job, URLNormalizers.SCOPE_PARTITION);

    topn = job.getLong(Generator.GENERATOR_TOP_N, 100000);
    hostn = job.getInt(Generator.GENERATOR_MAX_COUNT, -1);
    // cntStr = job.get(GeneratorHbase.GENERATL_CNT);// ?
    // int reduceNum = job.getInt(GeneratorHbase.GENERATL_REDUCENUM, 1);
    // initPart(reduceNum);
}

From source file:org.smartfrog.services.hadoop.mapreduce.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample keys. By default reads 100,000 keys from 10
 * locations in the input, sorts them and picks N-1 keys to generate N equally sized partitions.
 *
 * @param conf     the job to sample//from  ww  w.j  a  v  a  2 s .  com
 * @param partFile where to write the output file to
 * @throws IOException if something goes wrong
 */
public static void writePartitionFile(JobConf conf, Path partFile) throws IOException {
    TeraInputFormat inFormat = new TeraInputFormat();
    TextSampler sampler = new TextSampler();
    Text key = new Text();
    Text value = new Text();
    int partitions = conf.getNumReduceTasks();
    long sampleSize = conf.getLong(TeraConstants.SAMPLE_SIZE, 100000);
    InputSplit[] splits = inFormat.getSplits(conf, conf.getNumMapTasks());
    int samples = Math.min(10, splits.length);
    long recordsPerSample = sampleSize / samples;
    int sampleStep = splits.length / samples;
    long records = 0;
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        RecordReader<Text, Text> reader = inFormat.getRecordReader(splits[sampleStep * i], conf, null);
        while (reader.next(key, value)) {
            sampler.addKey(key);
            records += 1;
            if ((i + 1) * recordsPerSample <= records) {
                break;
            }
        }
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    if (outFs.exists(partFile)) {
        outFs.delete(partFile, false);
    }
    SequenceFile.Writer writer = SequenceFile.createWriter(outFs, conf, partFile, Text.class,
            NullWritable.class);
    NullWritable nullValue = NullWritable.get();
    for (Text split : sampler.createPartitions(partitions)) {
        writer.append(split, nullValue);
    }
    writer.close();
}

From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java

License:Apache License

/** 
 * Generate the list of files and make them into FileSplits.
 * @param job the job context/*from  w  w w.java  2  s  .c  o m*/
 * @throws IOException
 */
@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    LOG.info("StreamWikiDumpInputFormat.getSplits job=" + job + " n=" + numSplits);
    InputSplit[] oldSplits = super.getSplits(job, numSplits);
    List<InputSplit> splits = new ArrayList<InputSplit>();
    FileStatus[] files = listStatus(job);
    // Save the number of input files for metrics/loadgen
    job.setLong(NUM_INPUT_FILES, files.length);
    long totalSize = 0; // compute total size
    for (FileStatus file : files) { // check we have valid files
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        totalSize += file.getLen();
    }
    long minSize = job.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1);
    long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits);
    for (FileStatus file : files) {
        if (file.isDirectory()) {
            throw new IOException("Not a file: " + file.getPath());
        }
        long blockSize = file.getBlockSize();
        long splitSize = computeSplitSize(goalSize, minSize, blockSize);
        LOG.info(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize));
        //System.err.println(String.format("goalsize=%d splitsize=%d blocksize=%d", goalSize, splitSize, blockSize));
        for (InputSplit x : getSplits(job, file, pageBeginPattern, splitSize))
            splits.add(x);
    }
    System.err.println("splits=" + splits);
    return splits.toArray(new InputSplit[splits.size()]);
}

From source file:rugal.hadoop.repartition.enhanced.impl.OptimizedDataJoinReducerBase.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    this.job = job;
    this.maxNumOfValuesPerGroup = job.getLong("datajoin.maxNumOfValuesPerGroup", 100);
}

From source file:skewtune.mapreduce.STJobTracker.java

License:Apache License

@SuppressWarnings("unchecked")
STJobTracker(final JobConf conf, String jobtrackerIndentifier) throws IOException, InterruptedException {
    // find the owner of the process
    // get the desired principal to load
    String keytabFilename = conf.get(JTConfig.JT_KEYTAB_FILE);
    UserGroupInformation.setConfiguration(conf);
    if (keytabFilename != null) {
        String desiredUser = conf.get(JTConfig.JT_USER_NAME, System.getProperty("user.name"));
        UserGroupInformation.loginUserFromKeytab(desiredUser, keytabFilename);
        mrOwner = UserGroupInformation.getLoginUser();
    } else {/*from www . ja va  2  s  . c o  m*/
        mrOwner = UserGroupInformation.getCurrentUser();
    }

    supergroup = conf.get(MR_SUPERGROUP, "supergroup");
    LOG.info("Starting jobtracker with owner as " + mrOwner.getShortUserName() + " and supergroup as "
            + supergroup);

    long secretKeyInterval = conf.getLong(MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_KEY,
            MRConfig.DELEGATION_KEY_UPDATE_INTERVAL_DEFAULT);
    long tokenMaxLifetime = conf.getLong(MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_KEY,
            MRConfig.DELEGATION_TOKEN_MAX_LIFETIME_DEFAULT);
    long tokenRenewInterval = conf.getLong(MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_KEY,
            MRConfig.DELEGATION_TOKEN_RENEW_INTERVAL_DEFAULT);
    secretManager = new DelegationTokenSecretManager(secretKeyInterval, tokenMaxLifetime, tokenRenewInterval,
            DELEGATION_TOKEN_GC_INTERVAL);
    secretManager.startThreads();

    //
    // Grab some static constants
    //

    NUM_HEARTBEATS_IN_SECOND = conf.getInt(JT_HEARTBEATS_IN_SECOND, DEFAULT_NUM_HEARTBEATS_IN_SECOND);
    if (NUM_HEARTBEATS_IN_SECOND < MIN_NUM_HEARTBEATS_IN_SECOND) {
        NUM_HEARTBEATS_IN_SECOND = DEFAULT_NUM_HEARTBEATS_IN_SECOND;
    }

    HEARTBEATS_SCALING_FACTOR = conf.getFloat(JT_HEARTBEATS_SCALING_FACTOR, DEFAULT_HEARTBEATS_SCALING_FACTOR);
    if (HEARTBEATS_SCALING_FACTOR < MIN_HEARTBEATS_SCALING_FACTOR) {
        HEARTBEATS_SCALING_FACTOR = DEFAULT_HEARTBEATS_SCALING_FACTOR;
    }

    // whether to dump or not every heartbeat message even when DEBUG is enabled
    dumpHeartbeat = conf.getBoolean(JT_HEARTBEATS_DUMP, false);

    // This is a directory of temporary submission files. We delete it
    // on startup, and can delete any files that we're done with
    this.conf = conf;
    JobConf jobConf = new JobConf(conf);

    // Set ports, start RPC servers, setup security policy etc.
    InetSocketAddress addr = getAddress(conf);
    this.localMachine = addr.getHostName();
    this.port = addr.getPort();

    int handlerCount = conf.getInt(JT_IPC_HANDLER_COUNT, 10);
    this.interTrackerServer = RPC.getServer(SkewTuneClientProtocol.class, this, addr.getHostName(),
            addr.getPort(), handlerCount, false, conf, secretManager);
    if (LOG.isDebugEnabled()) {
        Properties p = System.getProperties();
        for (Iterator it = p.keySet().iterator(); it.hasNext();) {
            String key = (String) it.next();
            String val = p.getProperty(key);
            LOG.debug("Property '" + key + "' is " + val);
        }
    }

    InetSocketAddress infoSocAddr = NetUtils
            .createSocketAddr(conf.get(JT_HTTP_ADDRESS, String.format("%s:0", this.localMachine)));
    String infoBindAddress = infoSocAddr.getHostName();
    int tmpInfoPort = infoSocAddr.getPort();
    this.startTime = System.currentTimeMillis();
    infoServer = new HttpServer("job", infoBindAddress, tmpInfoPort, tmpInfoPort == 0, conf);
    infoServer.setAttribute("job.tracker", this);
    infoServer.addServlet("jobcompletion", "/completion", JobCompletionServlet.class);
    infoServer.addServlet("taskspeculation", "/speculation", SpeculationEventServlet.class);
    infoServer.addServlet("skewreport", "/skew", SkewReportServlet.class);
    infoServer.addServlet("tasksplit", "/split/*", SplitTaskServlet.class);
    infoServer.addServlet("tasksplitV2", "/splitV2/*", SplitTaskV2Servlet.class);
    infoServer.start();

    this.trackerIdentifier = jobtrackerIndentifier;

    // The rpc/web-server ports can be ephemeral ports...
    // ... ensure we have the correct info
    this.port = interTrackerServer.getListenerAddress().getPort();
    this.conf.set(JT_IPC_ADDRESS, (this.localMachine + ":" + this.port));
    LOG.info("JobTracker up at: " + this.port);
    this.infoPort = this.infoServer.getPort();
    this.conf.set(JT_HTTP_ADDRESS, infoBindAddress + ":" + this.infoPort);
    LOG.info("JobTracker webserver: " + this.infoServer.getPort());
    this.defaultNotificationUrl = String.format("http://%s:%d/completion?jobid=$jobId&status=$jobStatus",
            infoBindAddress, this.infoPort);
    LOG.info("JobTracker completion URI: " + defaultNotificationUrl);
    //        this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?taskid=$taskId&remainTime=$taskRemainTime",infoBindAddress,this.infoPort);
    this.defaultSpeculationEventUrl = String.format("http://%s:%d/speculation?jobid=$jobId", infoBindAddress,
            this.infoPort);
    LOG.info("JobTracker speculation event URI: " + defaultSpeculationEventUrl);
    this.defaultSkewReportUrl = String.format("http://%s:%d/skew", infoBindAddress, this.infoPort);
    LOG.info("JobTracker skew report event URI: " + defaultSkewReportUrl);
    this.trackerHttp = String.format("http://%s:%d", infoBindAddress, this.infoPort);

    while (!Thread.currentThread().isInterrupted()) {
        try {
            // if we haven't contacted the namenode go ahead and do it
            if (fs == null) {
                fs = mrOwner.doAs(new PrivilegedExceptionAction<FileSystem>() {
                    @Override
                    public FileSystem run() throws IOException {
                        return FileSystem.get(conf);
                    }
                });
            }

            // clean up the system dir, which will only work if hdfs is out
            // of safe mode
            if (systemDir == null) {
                systemDir = new Path(getSystemDir());
            }
            try {
                FileStatus systemDirStatus = fs.getFileStatus(systemDir);
                if (!systemDirStatus.getOwner().equals(mrOwner.getShortUserName())) {
                    throw new AccessControlException(
                            "The systemdir " + systemDir + " is not owned by " + mrOwner.getShortUserName());
                }
                if (!systemDirStatus.getPermission().equals(SYSTEM_DIR_PERMISSION)) {
                    LOG.warn("Incorrect permissions on " + systemDir + ". Setting it to "
                            + SYSTEM_DIR_PERMISSION);
                    fs.setPermission(systemDir, new FsPermission(SYSTEM_DIR_PERMISSION));
                } else {
                    break;
                }
            } catch (FileNotFoundException fnf) {
            } // ignore
        } catch (AccessControlException ace) {
            LOG.warn("Failed to operate on " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") because of permissions.");
            LOG.warn("Manually delete the " + JTConfig.JT_SYSTEM_DIR + "(" + systemDir
                    + ") and then start the JobTracker.");
            LOG.warn("Bailing out ... ");
            throw ace;
        } catch (IOException ie) {
            LOG.info("problem cleaning system directory: " + systemDir, ie);
        }
        Thread.sleep(FS_ACCESS_RETRY_PERIOD);
    }

    if (Thread.currentThread().isInterrupted()) {
        throw new InterruptedException();
    }

    // initialize cluster variable
    cluster = new Cluster(this.conf);

    // now create a job client proxy
    jtClient = (ClientProtocol) RPC.getProxy(ClientProtocol.class, ClientProtocol.versionID,
            JobTracker.getAddress(conf), mrOwner, this.conf,
            NetUtils.getSocketFactory(conf, ClientProtocol.class));

    new SpeculativeScheduler().start();

    // initialize task event fetcher
    new TaskCompletionEventFetcher().start();

    // Same with 'localDir' except it's always on the local disk.
    asyncDiskService = new MRAsyncDiskService(FileSystem.getLocal(conf), conf.getLocalDirs());
    asyncDiskService.moveAndDeleteFromEachVolume(SUBDIR);

    // keep at least one asynchronous worker per CPU core
    int numProcs = Runtime.getRuntime().availableProcessors();
    LOG.info("# of available processors = " + numProcs);
    int maxFactor = conf.getInt(JT_MAX_ASYNC_WORKER_FACTOR, 2);
    asyncWorkers = new ThreadPoolExecutor(numProcs, numProcs * maxFactor, 30, TimeUnit.SECONDS,
            new SynchronousQueue<Runnable>(true), new ThreadPoolExecutor.CallerRunsPolicy());

    speculativeSplit = conf.getBoolean(JT_SPECULATIVE_SPLIT, false);
}

From source file:StorageEngineClient.CombineColumnStorageFileInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {

    long minSizeNode = 0;
    long minSizeRack = 0;
    long maxSize = 0;

    if (minSplitSizeNode != 0) {
        minSizeNode = minSplitSizeNode;/*from  w w  w. ja  va  2s  .  c o  m*/
    } else {
        minSizeNode = job.getLong("mapred.min.split.size.per.node", 0);
    }
    if (minSplitSizeRack != 0) {
        minSizeRack = minSplitSizeRack;
    } else {
        minSizeRack = job.getLong("mapred.min.split.size.per.rack", 0);
    }
    if (maxSplitSize != 0) {
        maxSize = maxSplitSize;
    } else {
        maxSize = job.getLong("mapred.max.split.size", 0);
    }
    if (maxSize == 0) {
        maxSize = (long) (job.getLong("dfs.block.size", 512 * 1024 * 1024) * 0.8);
    }
    if (minSizeNode == 0) {
        minSizeNode = maxSize / 2;
    }
    if (minSizeRack == 0) {
        minSizeRack = maxSize / 2;
    }
    if (minSizeNode != 0 && maxSize != 0 && minSizeNode > maxSize) {
        throw new IOException("Minimum split size pernode " + minSizeNode
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && maxSize != 0 && minSizeRack > maxSize) {
        throw new IOException("Minimum split size per rack" + minSizeRack
                + " cannot be larger than maximum split size " + maxSize);
    }
    if (minSizeRack != 0 && minSizeNode > minSizeRack) {
        throw new IOException("Minimum split size per node" + minSizeNode
                + " cannot be smaller than minimum split size per rack " + minSizeRack);
    }

    Path[] paths = FileUtil.stat2Paths(listStatus(job));
    List<CombineFileSplit> splits = new ArrayList<CombineFileSplit>();
    if (paths.length == 0) {
        return splits.toArray(new CombineFileSplit[splits.size()]);
    }

    for (MultiPathFilter onepool : pools) {
        ArrayList<Path> myPaths = new ArrayList<Path>();

        for (int i = 0; i < paths.length; i++) {
            if (paths[i] == null) {
                continue;
            }
            Path p = new Path(paths[i].toUri().getPath());
            if (onepool.accept(p)) {
                myPaths.add(paths[i]);
                paths[i] = null;
            }
        }
        getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack,
                splits);
    }

    ArrayList<Path> myPaths = new ArrayList<Path>();
    for (int i = 0; i < paths.length; i++) {
        if (paths[i] == null) {
            continue;
        }
        myPaths.add(paths[i]);
    }
    LOG.info("myPaths size:\t" + myPaths.size());
    getMoreSplits(job, myPaths.toArray(new Path[myPaths.size()]), maxSize, minSizeNode, minSizeRack, splits);
    if (splits.size() == 0)
        return super.getSplits(job, numSplits);
    LOG.info("splits #:\t" + splits.size());
    return splits.toArray(new CombineFileSplit[splits.size()]);
}