Example usage for org.apache.hadoop.mapred JobConf getNumReduceTasks

List of usage examples for org.apache.hadoop.mapred JobConf getNumReduceTasks

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getNumReduceTasks.

Prototype

public int getNumReduceTasks() 

Source Link

Document

Get the configured number of reduce tasks for this job.

Usage

From source file:com.scaleoutsoftware.soss.hserver.JobScheduler.java

License:Apache License

/**
 * Runs the map-reduce job on ScaleOut hServer.*
 *
 * @param job          the job to run/*from   w w w .  j  a va 2  s  .  com*/
 * @param jobId        the id of the job
 * @param sortEnabled  if key sorting is enabled
 * @param jobParameter user defined parameter object for the job
 * @param grid         the invocation grid to run the job
 * @throws IOException            if errors occurred during the job
 * @throws InterruptedException   if the processing thread is interrupted
 * @throws ClassNotFoundException if the invocation grid does not contain the dependency class
 */
@SuppressWarnings("unchecked")
public void runOldApiJob(JobConf job, org.apache.hadoop.mapred.JobID jobId, boolean sortEnabled,
        Object jobParameter, InvocationGrid grid)
        throws IOException, InterruptedException, ClassNotFoundException {
    //Initialize user credential in advance
    int jobAppId = 0xFFFFFFF & BitConverter.hashStringOneInt(jobId.toString());
    String hadoopVersion = VersionInfo.getVersion();
    long time = System.currentTimeMillis();
    CreateUserCredentials.run(grid);

    try {
        //Check output specs before running the job
        job.getOutputFormat().checkOutputSpecs(FileSystem.get(job), job);

        JobContext jContext = HadoopVersionSpecificCode.getInstance(hadoopVersion, job).createJobContext(job,
                jobId);

        org.apache.hadoop.mapred.OutputCommitter outputCommitter = job.getOutputCommitter();
        outputCommitter.setupJob(jContext);

        //clear all temporary objects
        DataAccessor.clearObjects(jobAppId);

        //Calculating the partition layout
        com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping hostNameToPartition = com.scaleoutsoftware.soss.client.util.HostToPartitionsMapping
                .getCurrent();
        List<InetAddress> hostAddresses = new ArrayList<InetAddress>(hostNameToPartition.getHosts());

        //Generating mapping of Hadoop partitions to SOSS partitions, so they are equally distributed across hosts
        int numHosts = hostAddresses.size();
        int numberOfSlotsPerNode = Math
                .max(grid != null ? grid.getMaxNumberOfCores() : Runtime.getRuntime().availableProcessors(), 1);

        //Set the number of splits to the number of cores
        if (NamedMapInputFormatMapred.class.isAssignableFrom(job.getInputFormat().getClass())) {
            int numberOfSplits = HServerParameters.getSetting(MAP_SPLITS_PER_CORE, job) * numHosts
                    * numberOfSlotsPerNode;
            job.setNumMapTasks(Math.min(numberOfSplits, HServerConstants.MAX_MAP_REDUCE_TASKS));
        }

        //Generating split to hostname map
        org.apache.hadoop.mapred.InputFormat inputFormat = job.getInputFormat();
        List<org.apache.hadoop.mapred.InputSplit> splitList = Arrays
                .asList(inputFormat.getSplits(job, job.getNumMapTasks()));
        Map<InetAddress, List<Integer>> splitToHostAddress = assignSplitsToHost(splitList, hostAddresses, null);

        //Choose the optimal number of reducers for GridOutputFormat
        if (job.getOutputFormat() instanceof NamedMapOutputFormatMapred) {
            job.setNumReduceTasks(numHosts * numberOfSlotsPerNode);
            sortEnabled = false;
        }

        int[] partitionMapping = hostNameToPartition.generateEvenItemDistribution(job.getNumReduceTasks());

        //Generating invocation parameters
        Class<? extends org.apache.hadoop.mapred.InputSplit> splitType = splitList.size() > 0
                ? splitList.get(0).getClass()
                : null;

        HadoopInvocationParameters hadoopParameters = new HadoopInvocationParameters(job, jobId, true);

        HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit> parameters = new HServerInvocationParameters<org.apache.hadoop.mapred.InputSplit>(
                hadoopParameters, jobAppId, partitionMapping, hostNameToPartition, numberOfSlotsPerNode,
                splitType, splitList, splitToHostAddress, false, sortEnabled, hadoopVersion, jobParameter,
                SerializationMode.DEFAULT);

        StringBuilder stringBuilder = new StringBuilder();
        stringBuilder.append("Splits created:\n");
        for (InetAddress address : splitToHostAddress.keySet()) {
            stringBuilder.append("Host ");
            stringBuilder.append(address);
            stringBuilder.append(" has ");
            stringBuilder.append(splitToHostAddress.get(address).size());
            stringBuilder.append(" splits.\n");
        }
        System.out.println(stringBuilder.toString());

        System.out.println("Job initialization completed in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        InvokeResult<MapperResult> mapInvokeResult = MessagingHelper.invoke(grid,
                RunMapper.MapperInvokable.class, parameters, TimeSpan.INFINITE_TIMEOUT.getSeconds());

        if (mapInvokeResult.getErrors() != null && mapInvokeResult.getErrors().size() > 0) {
            throw new IOException("Map invocation failed.", mapInvokeResult.getErrors().get(0));
        }

        System.out.println("Map invocation done in " + (System.currentTimeMillis() - time) + " ms.");
        time = System.currentTimeMillis();

        MapperResult resultObject = mapInvokeResult.getResult();

        if (resultObject == null || mapInvokeResult.getNumFailed() != 0) {
            throw new IOException("Mapper invocation failed. Num failed = " + mapInvokeResult.getNumFailed());
        }

        if (resultObject.getNumberOfSplitsProcessed() != splitList.size()) {
            throw new IOException("Number of splits does not match the number of invocations. Nsplits = "
                    + splitList.size() + ", Ninvokes =" + resultObject.getNumberOfSplitsProcessed());
        }

        if (partitionMapping.length > 0) {
            //Running the reduce step
            InvokeResult<Integer> reduceInvokeResult = MessagingHelper.invoke(grid, ReduceInvokable.class,
                    jobAppId, TimeSpan.INFINITE_TIMEOUT.getSeconds());

            System.out.println("Reduce invocation done in " + (System.currentTimeMillis() - time) + " ms.");

            DataAccessor.clearObjects(jobAppId); //clear all temporary objects

            if (reduceInvokeResult.getErrors() != null && reduceInvokeResult.getErrors().size() > 0) {
                throw new IOException("Reduce invocation failed.", reduceInvokeResult.getErrors().get(0));
            }
            if (reduceInvokeResult.getNumFailed() != 0) {
                throw new IOException("Reduce invocation failed.");
            }
            if (reduceInvokeResult.getResult() != partitionMapping.length) {
                throw new IOException("Not all partitions were reduced. Expected = " + partitionMapping.length
                        + " Actual = " + reduceInvokeResult.getResult());
            }
        }
        outputCommitter.commitJob(jContext);
    } catch (StateServerException e) {
        throw new IOException("ScaleOut hServer access error.", e);
    }

}

From source file:com.tomslabs.grid.avro.TextTypedBytesToAvroOutputFormat.java

License:Apache License

public RecordWriter<TypedBytesWritable, TypedBytesWritable> getRecordWriter(FileSystem ignore, JobConf job,
        String name, Progressable prog) throws IOException {

    boolean isMapOnly = job.getNumReduceTasks() == 0;
    Schema schema = isMapOnly ? AvroJob.getMapOutputSchema(job) : AvroJob.getOutputSchema(job);

    final DataFileWriter<GenericRecord> writer = new DataFileWriter<GenericRecord>(
            new GenericDatumWriter<GenericRecord>(schema));

    if (FileOutputFormat.getCompressOutput(job)) {
        int level = job.getInt(DEFLATE_LEVEL_KEY, DEFAULT_DEFLATE_LEVEL);
        writer.setCodec(CodecFactory.deflateCodec(level));
    }//ww  w.  java  2s.c  o m

    // copy metadata from job
    for (Map.Entry<String, String> e : job) {
        if (e.getKey().startsWith(AvroJob.TEXT_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.TEXT_PREFIX.length()), e.getValue());
        if (e.getKey().startsWith(AvroJob.BINARY_PREFIX))
            writer.setMeta(e.getKey().substring(AvroJob.BINARY_PREFIX.length()),
                    URLDecoder.decode(e.getValue(), "ISO-8859-1").getBytes("ISO-8859-1"));
    }

    Path path = FileOutputFormat.getTaskOutputPath(job, name + EXT);
    writer.create(schema, path.getFileSystem(job).create(path));

    return new AvroRecordWriter(writer, schema);
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public void runJob(JobConf _conf) throws Exception {
    String ret = "BenchmarkBase(" + _conf.getJobName() + ")\n" + "\tInput Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(_conf);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0)
            ret += ", ";
        ret += inputs[ctr].toString();/*  w  w  w.  j a  v  a2 s  .c o  m*/
    }
    ret += "}\n";

    ret += "\tOutput Path: " + FileOutputFormat.getOutputPath(_conf) + "\n" + "\tMap Jobs:    "
            + _conf.getNumMapTasks() + "\n" + "\tReduce Jobs: " + _conf.getNumReduceTasks() + "\n"
            + "\tProperties:  " + this.options;
    System.out.println(ret);

    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(_conf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
    this.last_job = _conf;
    return;
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf)
        throws NotFoundException, InternalException {

    JobInfo info = new JobInfo();

    info.setNativeID(submission.getHadoopID());
    info.setName(job.getJobName());//from  www  .ja v  a2s  .  c om
    info.setTest(false);

    if (conf == null)
        // Can't proceed any further if configuration is unavailable
        return info;

    info.setRequestedMapTasks(conf.getNumMapTasks());
    info.setRequestedReduceTasks(conf.getNumReduceTasks());
    info.setMapper(conf.get(CONF_MAPPER));
    info.setReducer(conf.get(CONF_REDUCER));
    info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false));
    info.setInputPath(
            JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString());
    info.setOutputPath(
            JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString());

    return info;
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java

License:Apache License

public static void run(JobConf job) throws IOException {
    job.setJarByClass(FuzzyJoinDriver.class);
    ////from  w  ww. j  a  v a  2  s .  co m
    // print info
    //
    String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + "  Input Path:  {";
    Path inputs[] = FileInputFormat.getInputPaths(job);
    for (int ctr = 0; ctr < inputs.length; ctr++) {
        if (ctr > 0) {
            ret += "\n                ";
        }
        ret += inputs[ctr].toString();
    }
    ret += "}\n";
    ret += "  Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + "  Map Jobs:    "
            + job.getNumMapTasks() + "\n" + "  Reduce Jobs: " + job.getNumReduceTasks() + "\n"
            + "  Properties:  {";
    String[][] properties = new String[][] {
            new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE },
            new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
                    "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE },
            new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE },
            new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE },
            new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE },
            new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE },
            new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE },
            new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" },
            new String[] { DATA_JOININDEX_PROPERTY, "" }, };
    for (int crt = 0; crt < properties.length; crt++) {
        if (crt > 0) {
            ret += "\n                ";
        }
        ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]);
    }
    ret += "}";
    System.out.println(ret);
    //
    // run job
    //
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    JobClient.runJob(job);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println(
            "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds.");
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from w w w .ja va 2s  . co  m*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridpairs.ppjoin.MapSelfJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ////from ww  w.  j  ava2 s  .  c  om
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
}

From source file:edu.uci.ics.fuzzyjoin.hadoop.ridrecordpairs.ppjoin.MapJoin.java

License:Apache License

@Override
public void configure(JobConf job) {
    ///*from  w  w  w  . j av  a 2 s. com*/
    // set Tokenizer and SimilarityFilters
    //
    tokenizer = TokenizerFactory.getTokenizer(
            job.get(FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE),
            FuzzyJoinConfig.WORD_SEPARATOR_REGEX, FuzzyJoinConfig.TOKEN_SEPARATOR);
    String similarityName = job.get(FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_NAME_VALUE);
    float similarityThreshold = job.getFloat(FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY,
            FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE);
    similarityFilters = SimilarityFiltersFactory.getSimilarityFilters(similarityName, similarityThreshold);
    //
    // set TokenRank and TokenGroup
    //
    Path tokensPath;
    Path lengthstatsPath = null;
    try {
        Path[] cache = DistributedCache.getLocalCacheFiles(job);
        if (cache == null) {
            tokensPath = new Path(job.get(FuzzyJoinConfig.DATA_TOKENS_PROPERTY));
            try {
                lengthstatsPath = new Path(job.get(FuzzyJoinDriver.DATA_LENGTHSTATS_PROPERTY));
            } catch (IllegalArgumentException e) {
            }
        } else {
            tokensPath = cache[0];
            if (job.getBoolean(FuzzyJoinDriver.TOKENS_LENGTHSTATS_PROPERTY,
                    FuzzyJoinDriver.TOKENS_LENGTHSTATS_VALUE)) {
                lengthstatsPath = cache[1];
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);

    }
    String recordGroupClass = job.get(FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_CLASS_VALUE);
    int recordGroupFactor = job.getInt(FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_PROPERTY,
            FuzzyJoinDriver.RIDPAIRS_GROUP_FACTOR_VALUE);
    recordGroup = RecordGroupFactory.getRecordGroup(recordGroupClass,
            Math.max(1, job.getNumReduceTasks() * recordGroupFactor), similarityFilters, "" + lengthstatsPath);
    TokenLoad tokenLoad = new TokenLoad(tokensPath.toString(), tokenRank);
    tokenLoad.loadTokenRank();
    //
    // set dataColumn
    //
    dataColumns[0] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA_PROPERTY, FuzzyJoinConfig.RECORD_DATA_VALUE));
    dataColumns[1] = FuzzyJoinUtil
            .getDataColumns(job.get(FuzzyJoinConfig.RECORD_DATA1_PROPERTY, FuzzyJoinConfig.RECORD_DATA1_VALUE));
    //
    // get suffix for second relation
    //
    suffixSecond = job.get(FuzzyJoinDriver.DATA_SUFFIX_INPUT_PROPERTY, "")
            .split(FuzzyJoinDriver.SEPSARATOR_REGEX)[1];
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopWriteOperatorDescriptor.java

License:Apache License

private static FileSplit[] getOutputSplits(JobConf conf, int noOfMappers) throws ClassNotFoundException {
    int numOutputters = conf.getNumReduceTasks() != 0 ? conf.getNumReduceTasks() : noOfMappers;
    Object outputFormat = null;/*from  w  ww  .j  av  a 2s . c  o m*/
    if (conf.getUseNewMapper()) {
        outputFormat = ReflectionUtils
                .newInstance(new ContextFactory().createJobContext(conf).getOutputFormatClass(), conf);
    } else {
        outputFormat = conf.getOutputFormat();
    }
    if (outputFormat instanceof NullOutputFormat) {
        FileSplit[] outputFileSplits = new FileSplit[numOutputters];
        for (int i = 0; i < numOutputters; i++) {
            String outputPath = "/tmp/" + System.currentTimeMillis() + i;
            outputFileSplits[i] = new FileSplit("localhost", new FileReference(new File(outputPath)));
        }
        return outputFileSplits;
    } else {

        FileSplit[] outputFileSplits = new FileSplit[numOutputters];
        String absolutePath = FileOutputFormat.getOutputPath(conf).toString();
        for (int index = 0; index < numOutputters; index++) {
            String suffix = new String("part-00000");
            suffix = new String(suffix.substring(0, suffix.length() - ("" + index).length()));
            suffix = suffix + index;
            String outputPath = absolutePath + "/" + suffix;
            outputFileSplits[index] = new FileSplit("localhost", outputPath);
        }
        return outputFileSplits;
    }
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private IOperatorDescriptor configureOutput(IOperatorDescriptor previousOperator, JobConf conf,
        JobSpecification spec) throws Exception {
    int instanceCountPreviousOperator = operatorInstanceCount.get(previousOperator.getOperatorId());
    int numOutputters = conf.getNumReduceTasks() != 0 ? conf.getNumReduceTasks()
            : instanceCountPreviousOperator;
    HadoopWriteOperatorDescriptor writer = null;
    writer = new HadoopWriteOperatorDescriptor(spec, conf, numOutputters);
    configurePartitionCountConstraint(spec, writer, numOutputters);
    spec.connect(new OneToOneConnectorDescriptor(spec), previousOperator, 0, writer, 0);
    return writer;
}