Example usage for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:org.apache.mahout.math.hadoop.stochasticsvd.YtYJob.java

License:Apache License

public static void run(Configuration conf, Path[] inputPaths, Path outputPath, int k, int p, long seed)
        throws ClassNotFoundException, InterruptedException, IOException {

    Job job = new Job(conf);
    job.setJobName("YtY-job");
    job.setJarByClass(YtYJob.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);
    FileInputFormat.setInputPaths(job, inputPaths);
    FileOutputFormat.setOutputPath(job, outputPath);

    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(VectorWritable.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(VectorWritable.class);

    job.setMapperClass(YtYMapper.class);

    job.getConfiguration().setLong(PROP_OMEGA_SEED, seed);
    job.getConfiguration().setInt(PROP_K, k);
    job.getConfiguration().setInt(PROP_P, p);

    /*/*from   www.  j a  va 2s  .co  m*/
     * we must reduce to just one matrix which means we need only one reducer.
     * But it's ok since each mapper outputs only one vector (a packed
     * UpperTriangular) so even if there're thousands of mappers, one reducer
     * should cope just fine.
     */
    job.setNumReduceTasks(1);

    job.submit();
    job.waitForCompletion(false);

    if (!job.isSuccessful()) {
        throw new IOException("YtY job unsuccessful.");
    }

}

From source file:org.apache.mahout.utils.SplitInputJob.java

License:Apache License

/**
 * Run job to downsample, randomly permute and split data into test and
 * training sets. This job takes a SequenceFile as input and outputs two
 * SequenceFiles test-r-00000 and training-r-00000 which contain the test and
 * training sets respectively//from w  ww . j  a  v a 2s. com
 *
 * @param initialConf
 * @param inputPath
 *          path to input data SequenceFile
 * @param outputPath
 *          path for output data SequenceFiles
 * @param keepPct
 *          percentage of key value pairs in input to keep. The rest are
 *          discarded
 * @param randomSelectionPercent
 *          percentage of key value pairs to allocate to test set. Remainder
 *          are allocated to training set
 */
@SuppressWarnings("rawtypes")
public static void run(Configuration initialConf, Path inputPath, Path outputPath, int keepPct,
        float randomSelectionPercent) throws IOException, ClassNotFoundException, InterruptedException {

    int downsamplingFactor = (int) (100.0 / keepPct);
    initialConf.setInt(DOWNSAMPLING_FACTOR, downsamplingFactor);
    initialConf.setFloat(RANDOM_SELECTION_PCT, randomSelectionPercent);

    // Determine class of keys and values
    FileSystem fs = FileSystem.get(initialConf);

    SequenceFileDirIterator<? extends WritableComparable, Writable> iterator = new SequenceFileDirIterator<WritableComparable, Writable>(
            inputPath, PathType.LIST, PathFilters.partFilter(), null, false, fs.getConf());
    Class<? extends WritableComparable> keyClass;
    Class<? extends Writable> valueClass;
    if (iterator.hasNext()) {
        Pair<? extends WritableComparable, Writable> pair = iterator.next();
        keyClass = pair.getFirst().getClass();
        valueClass = pair.getSecond().getClass();
    } else {
        throw new IllegalStateException("Couldn't determine class of the input values");
    }

    Job job = new Job(new Configuration(initialConf));

    MultipleOutputs.addNamedOutput(job, TRAINING_TAG, SequenceFileOutputFormat.class, keyClass, valueClass);
    MultipleOutputs.addNamedOutput(job, TEST_TAG, SequenceFileOutputFormat.class, keyClass, valueClass);
    job.setJarByClass(SplitInputJob.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(SplitInputMapper.class);
    job.setReducerClass(SplitInputReducer.class);
    job.setSortComparatorClass(SplitInputComparator.class);
    job.setOutputKeyClass(keyClass);
    job.setOutputValueClass(valueClass);
    job.submit();
    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}

From source file:org.apache.parquet.avro.TestInputOutputFormat.java

License:Apache License

private void waitForJob(Job job) throws Exception {
    job.submit();
    while (!job.isComplete()) {
        LOG.debug("waiting for job " + job.getJobName());
        sleep(100);//ww w  . ja v  a 2  s .com
    }
    LOG.info("status for job " + job.getJobName() + ": " + (job.isSuccessful() ? "SUCCESS" : "FAILURE"));
    if (!job.isSuccessful()) {
        throw new RuntimeException("job failed " + job.getJobName());
    }
}

From source file:org.apache.parquet.hadoop.TestInputFormatColumnProjection.java

License:Apache License

private void waitForJob(Job job) throws Exception {
    job.submit();
    while (!job.isComplete()) {
        sleep(100);//from w  w  w. j  av a 2  s.  co m
    }
    if (!job.isSuccessful()) {
        throw new RuntimeException("job failed " + job.getJobName());
    }
}

From source file:org.apache.parquet.hadoop.thrift.TestInputOutputFormat.java

License:Apache License

public static void waitForJob(Job job) throws Exception {
    job.submit();
    while (!job.isComplete()) {
        LOG.debug("waiting for job " + job.getJobName());
        sleep(100);//from w  ww . ja  va  2s. c  o m
    }
    LOG.info("status for job " + job.getJobName() + ": " + (job.isSuccessful() ? "SUCCESS" : "FAILURE"));
    if (!job.isSuccessful()) {
        throw new RuntimeException("job failed " + job.getJobName());
    }
}

From source file:org.apache.parquet.proto.utils.WriteUsingMR.java

License:Apache License

static void waitForJob(Job job) throws Exception {
    job.submit();
    while (!job.isComplete()) {
        LOG.debug("waiting for job " + job.getJobName());
        sleep(50);/*from ww  w  . j a  v a  2s  . c om*/
    }
    LOG.debug("status for job " + job.getJobName() + ": " + (job.isSuccessful() ? "SUCCESS" : "FAILURE"));
    if (!job.isSuccessful()) {
        throw new RuntimeException("job failed " + job.getJobName());
    }
}

From source file:org.apache.phoenix.mapreduce.index.IndexScrutinyTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Connection connection = null;
    try {//from ww  w .  ja  v a  2 s.c o  m
        /** start - parse command line configs **/
        CommandLine cmdLine = null;
        try {
            cmdLine = parseOptions(args);
        } catch (IllegalStateException e) {
            printHelpAndExit(e.getMessage(), getOptions());
        }
        final Configuration configuration = HBaseConfiguration.addHbaseResources(getConf());
        connection = ConnectionUtil.getInputConnection(configuration);
        final String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPTION.getOpt());
        final String dataTable = cmdLine.getOptionValue(DATA_TABLE_OPTION.getOpt());
        final String indexTable = cmdLine.getOptionValue(INDEX_TABLE_OPTION.getOpt());
        final String qDataTable = SchemaUtil.getQualifiedTableName(schemaName, dataTable);
        String basePath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt());
        boolean isForeground = cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt());
        boolean useSnapshot = cmdLine.hasOption(SNAPSHOT_OPTION.getOpt());
        boolean outputInvalidRows = cmdLine.hasOption(OUTPUT_INVALID_ROWS_OPTION.getOpt());
        SourceTable sourceTable = cmdLine.hasOption(SOURCE_TABLE_OPTION.getOpt())
                ? SourceTable.valueOf(cmdLine.getOptionValue(SOURCE_TABLE_OPTION.getOpt()))
                : SourceTable.BOTH;

        long batchSize = cmdLine.hasOption(BATCH_SIZE_OPTION.getOpt())
                ? Long.parseLong(cmdLine.getOptionValue(BATCH_SIZE_OPTION.getOpt()))
                : PhoenixConfigurationUtil.DEFAULT_SCRUTINY_BATCH_SIZE;

        long ts = cmdLine.hasOption(TIMESTAMP.getOpt())
                ? Long.parseLong(cmdLine.getOptionValue(TIMESTAMP.getOpt()))
                : EnvironmentEdgeManager.currentTimeMillis() - 60000;

        if (indexTable != null) {
            if (!isValidIndexTable(connection, qDataTable, indexTable)) {
                throw new IllegalArgumentException(
                        String.format(" %s is not an index table for %s ", indexTable, qDataTable));
            }
        }

        String outputFormatOption = cmdLine.getOptionValue(OUTPUT_FORMAT_OPTION.getOpt());
        OutputFormat outputFormat = outputFormatOption != null
                ? OutputFormat.valueOf(outputFormatOption.toUpperCase())
                : OutputFormat.TABLE;
        long outputMaxRows = cmdLine.hasOption(OUTPUT_MAX.getOpt())
                ? Long.parseLong(cmdLine.getOptionValue(OUTPUT_MAX.getOpt()))
                : 1000000L;
        /** end - parse command line configs **/

        if (outputInvalidRows && OutputFormat.TABLE.equals(outputFormat)) {
            // create the output table if it doesn't exist
            try (Connection outputConn = ConnectionUtil.getOutputConnection(configuration)) {
                outputConn.createStatement().execute(IndexScrutinyTableOutput.OUTPUT_TABLE_DDL);
                outputConn.createStatement().execute(IndexScrutinyTableOutput.OUTPUT_METADATA_DDL);
            }
        }

        LOG.info(String.format(
                "Running scrutiny [schemaName=%s, dataTable=%s, indexTable=%s, useSnapshot=%s, timestamp=%s, batchSize=%s, outputBasePath=%s, outputFormat=%s, outputMaxRows=%s]",
                schemaName, dataTable, indexTable, useSnapshot, ts, batchSize, basePath, outputFormat,
                outputMaxRows));
        JobFactory jobFactory = new JobFactory(connection, configuration, batchSize, useSnapshot, ts,
                outputInvalidRows, outputFormat, basePath, outputMaxRows);
        // If we are running the scrutiny with both tables as the source, run two separate jobs,
        // one for each direction
        if (SourceTable.BOTH.equals(sourceTable)) {
            jobs.add(jobFactory.createSubmittableJob(schemaName, indexTable, dataTable,
                    SourceTable.DATA_TABLE_SOURCE));
            jobs.add(jobFactory.createSubmittableJob(schemaName, indexTable, dataTable,
                    SourceTable.INDEX_TABLE_SOURCE));
        } else {
            jobs.add(jobFactory.createSubmittableJob(schemaName, indexTable, dataTable, sourceTable));
        }

        if (!isForeground) {
            LOG.info("Running Index Scrutiny in Background - Submit async and exit");
            for (Job job : jobs) {
                job.submit();
            }
            return 0;
        }
        LOG.info(
                "Running Index Scrutiny in Foreground. Waits for the build to complete. This may take a long time!.");
        boolean result = true;
        for (Job job : jobs) {
            result = result && job.waitForCompletion(true);
        }

        // write the results to the output metadata table
        if (outputInvalidRows && OutputFormat.TABLE.equals(outputFormat)) {
            LOG.info("Writing results of jobs to output table "
                    + IndexScrutinyTableOutput.OUTPUT_METADATA_TABLE_NAME);
            IndexScrutinyTableOutput.writeJobResults(connection, args, jobs);
        }

        if (result) {
            return 0;
        } else {
            LOG.error("IndexScrutinyTool job failed! Check logs for errors..");
            return -1;
        }
    } catch (Exception ex) {
        LOG.error("An exception occurred while performing the indexing job: " + ExceptionUtils.getMessage(ex)
                + " at:\n" + ExceptionUtils.getStackTrace(ex));
        return -1;
    } finally {
        try {
            if (connection != null) {
                connection.close();
            }
        } catch (SQLException sqle) {
            LOG.error("Failed to close connection ", sqle.getMessage());
            throw new RuntimeException("Failed to close connection");
        }
    }
}

From source file:org.apache.phoenix.mapreduce.index.IndexTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Connection connection = null;
    HTable htable = null;//from  w w w.  j av a  2s.c om
    try {
        CommandLine cmdLine = null;
        try {
            cmdLine = parseOptions(args);
        } catch (IllegalStateException e) {
            printHelpAndExit(e.getMessage(), getOptions());
        }
        final Configuration configuration = HBaseConfiguration.addHbaseResources(getConf());
        final String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPTION.getOpt());
        final String dataTable = cmdLine.getOptionValue(DATA_TABLE_OPTION.getOpt());
        final String indexTable = cmdLine.getOptionValue(INDEX_TABLE_OPTION.getOpt());
        final boolean isPartialBuild = cmdLine.hasOption(PARTIAL_REBUILD_OPTION.getOpt());
        final String qDataTable = SchemaUtil.getQualifiedTableName(schemaName, dataTable);
        boolean useDirectApi = cmdLine.hasOption(DIRECT_API_OPTION.getOpt());
        String basePath = cmdLine.getOptionValue(OUTPUT_PATH_OPTION.getOpt());
        boolean isForeground = cmdLine.hasOption(RUN_FOREGROUND_OPTION.getOpt());
        boolean useSnapshot = cmdLine.hasOption(SNAPSHOT_OPTION.getOpt());
        connection = ConnectionUtil.getInputConnection(configuration);
        byte[][] splitKeysBeforeJob = null;
        boolean isLocalIndexBuild = false;
        PTable pindexTable = null;
        if (indexTable != null) {
            if (!isValidIndexTable(connection, qDataTable, indexTable)) {
                throw new IllegalArgumentException(
                        String.format(" %s is not an index table for %s ", indexTable, qDataTable));
            }
            pindexTable = PhoenixRuntime.getTable(connection,
                    schemaName != null && !schemaName.isEmpty()
                            ? SchemaUtil.getQualifiedTableName(schemaName, indexTable)
                            : indexTable);
            htable = (HTable) connection.unwrap(PhoenixConnection.class).getQueryServices()
                    .getTable(pindexTable.getPhysicalName().getBytes());
            if (IndexType.LOCAL.equals(pindexTable.getIndexType())) {
                isLocalIndexBuild = true;
                splitKeysBeforeJob = htable.getRegionLocator().getStartKeys();
            }
        }

        PTable pdataTable = PhoenixRuntime.getTableNoCache(connection, qDataTable);
        Path outputPath = null;
        FileSystem fs = null;
        if (basePath != null) {
            outputPath = CsvBulkImportUtil.getOutputPath(new Path(basePath),
                    pindexTable == null ? pdataTable.getPhysicalName().getString()
                            : pindexTable.getPhysicalName().getString());
            fs = outputPath.getFileSystem(configuration);
            fs.delete(outputPath, true);
        }

        Job job = new JobFactory(connection, configuration, outputPath).getJob(schemaName, indexTable,
                dataTable, useDirectApi, isPartialBuild, useSnapshot);
        if (!isForeground && useDirectApi) {
            LOG.info("Running Index Build in Background - Submit async and exit");
            job.submit();
            return 0;
        }
        LOG.info(
                "Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
        boolean result = job.waitForCompletion(true);

        if (result) {
            if (!useDirectApi && indexTable != null) {
                if (isLocalIndexBuild) {
                    validateSplitForLocalIndex(splitKeysBeforeJob, htable);
                }
                LOG.info("Loading HFiles from {}", outputPath);
                LoadIncrementalHFiles loader = new LoadIncrementalHFiles(configuration);
                loader.doBulkLoad(outputPath, htable);
                htable.close();
                // Without direct API, we need to update the index state to ACTIVE from client.
                IndexToolUtil.updateIndexState(connection, qDataTable, indexTable, PIndexState.ACTIVE);
                fs.delete(outputPath, true);
            }
            return 0;
        } else {
            LOG.error("IndexTool job failed! Check logs for errors..");
            return -1;
        }
    } catch (Exception ex) {
        LOG.error("An exception occurred while performing the indexing job: " + ExceptionUtils.getMessage(ex)
                + " at:\n" + ExceptionUtils.getStackTrace(ex));
        return -1;
    } finally {
        try {
            if (connection != null) {
                connection.close();
            }
            if (htable != null) {
                htable.close();
            }
        } catch (SQLException sqle) {
            LOG.error("Failed to close connection ", sqle.getMessage());
            throw new RuntimeException("Failed to close connection");
        }
    }
}

From source file:org.apache.solr.hadoop.ForkedMapReduceIndexerTool.java

License:Apache License

public static int runIndexingPipeline(Job job, JobProcessCallback callback, Configuration conf, Options options,
        long programStartTime, FileSystem fs, Path fullInputList, long numFiles, int realMappers, int reducers)
        throws IOException, KeeperException, InterruptedException, ClassNotFoundException,
        FileNotFoundException {// w  ww.  j a  v a 2s.co m
    long startTime;
    float secs;

    Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR);
    Path outputReduceDir = new Path(options.outputDir, "reducers");
    Path outputTreeMergeStep = new Path(options.outputDir, "mtree-merge-output");

    FileOutputFormat.setOutputPath(job, outputReduceDir);

    if (job.getConfiguration().get(JobContext.REDUCE_CLASS_ATTR) == null) { // enable customization
        job.setReducerClass(SolrReducer.class);
    }
    if (options.updateConflictResolver == null) {
        throw new IllegalArgumentException("updateConflictResolver must not be null");
    }
    job.getConfiguration().set(SolrReducer.UPDATE_CONFLICT_RESOLVER, options.updateConflictResolver);
    job.getConfiguration().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments);

    if (options.zkHost != null) {
        assert options.collection != null;
        /*
         * MapReduce partitioner that partitions the Mapper output such that each
         * SolrInputDocument gets sent to the SolrCloud shard that it would have
         * been sent to if the document were ingested via the standard SolrCloud
         * Near Real Time (NRT) API.
         * 
         * In other words, this class implements the same partitioning semantics
         * as the standard SolrCloud NRT API. This enables to mix batch updates
         * from MapReduce ingestion with updates from standard NRT ingestion on
         * the same SolrCloud cluster, using identical unique document keys.
         */
        if (job.getConfiguration().get(JobContext.PARTITIONER_CLASS_ATTR) == null) { // enable customization
            job.setPartitionerClass(ForkedSolrCloudPartitioner.class);
        }
        job.getConfiguration().set(ForkedSolrCloudPartitioner.ZKHOST, options.zkHost);
        job.getConfiguration().set(ForkedSolrCloudPartitioner.COLLECTION, options.collection);
    }
    job.getConfiguration().setInt(ForkedSolrCloudPartitioner.SHARDS, options.shards);

    job.setOutputFormatClass(SolrOutputFormat.class);
    if (options.solrHomeDir != null) {
        SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job);
    } else {
        assert options.zkHost != null;
        // use the config that this collection uses for the SolrHomeCache.
        ForkedZooKeeperInspector zki = new ForkedZooKeeperInspector();
        SolrZkClient zkClient = zki.getZkClient(options.zkHost);
        try {
            String configName = zki.readConfigName(zkClient, options.collection);
            File tmpSolrHomeDir = zki.downloadConfigDir(zkClient, configName);
            SolrOutputFormat.setupSolrHomeCache(tmpSolrHomeDir, job);
            LOG.debug("Using " + tmpSolrHomeDir + " as solr home");
            options.solrHomeDir = tmpSolrHomeDir;
        } finally {
            zkClient.close();
        }
    }

    //    MorphlineMapRunner runner = setupMorphline(job, options);
    //    if (options.isDryRun && runner != null) {
    //      LOG.info("Indexing {} files in dryrun mode", numFiles);
    //      startTime = System.currentTimeMillis();
    //      dryRun(job, runner, fs, fullInputList);
    //      secs = (System.currentTimeMillis() - startTime) / 1000.0f;
    //      LOG.info("Done. Indexing {} files in dryrun mode took {} secs", numFiles, secs);
    //      goodbye(null, programStartTime);
    //      return 0;
    //    }
    //    job.getConfiguration().set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, options.morphlineFile.getName());

    job.setNumReduceTasks(reducers);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SolrInputDocumentWritable.class);
    LOG.info("Indexing data into {} reducers", new Object[] { reducers });
    startTime = System.currentTimeMillis();
    job.submit();
    callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
    if (!waitForCompletion(job, options.isVerbose)) {
        return -1; // job failed
    }

    secs = (System.currentTimeMillis() - startTime) / 1000.0f;
    LOG.info("Done. Indexing data into {} reducers took {} secs", new Object[] { reducers, secs });

    int mtreeMergeIterations = 0;
    if (reducers > options.shards) {
        mtreeMergeIterations = (int) Math.round(log(options.fanout, reducers / options.shards));
    }
    LOG.debug("MTree merge iterations to do: {}", mtreeMergeIterations);
    int mtreeMergeIteration = 1;
    while (reducers > options.shards) { // run a mtree merge iteration
        job = Job.getInstance(conf);
        job.setJarByClass(ForkedMapReduceIndexerTool.class);
        job.setJobName(ForkedMapReduceIndexerTool.class.getName() + "/"
                + Utils.getShortClassName(ForkedTreeMergeMapper.class));
        job.setMapperClass(ForkedTreeMergeMapper.class);
        job.setOutputFormatClass(ForkedTreeMergeOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        job.setInputFormatClass(NLineInputFormat.class);

        Path inputStepDir = new Path(options.outputDir, "mtree-merge-input-iteration" + mtreeMergeIteration);
        fullInputList = new Path(inputStepDir, FULL_INPUT_LIST);
        LOG.debug("MTree merge iteration {}/{}: Creating input list file for mappers {}",
                new Object[] { mtreeMergeIteration, mtreeMergeIterations, fullInputList });
        numFiles = createTreeMergeInputDirList(job, outputReduceDir, fs, fullInputList);
        if (numFiles != reducers) {
            throw new IllegalStateException("Not same reducers: " + reducers + ", numFiles: " + numFiles);
        }
        NLineInputFormat.addInputPath(job, fullInputList);
        NLineInputFormat.setNumLinesPerSplit(job, options.fanout);
        FileOutputFormat.setOutputPath(job, outputTreeMergeStep);

        LOG.info("MTree merge iteration {}/{}: Merging {} shards into {} shards using fanout {}",
                new Object[] { mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout),
                        options.fanout });
        startTime = System.currentTimeMillis();
        job.submit();
        callback.jobStarted(job.getJobID().toString(), job.getTrackingURL());
        if (!waitForCompletion(job, options.isVerbose)) {
            return -1; // job failed
        }
        if (!renameTreeMergeShardDirs(outputTreeMergeStep, job, fs)) {
            return -1;
        }
        secs = (System.currentTimeMillis() - startTime) / 1000.0f;
        LOG.info(
                "MTree merge iteration {}/{}: Done. Merging {} shards into {} shards using fanout {} took {} secs",
                new Object[] { mtreeMergeIteration, mtreeMergeIterations, reducers, (reducers / options.fanout),
                        options.fanout, secs });

        if (!delete(outputReduceDir, true, fs)) {
            return -1;
        }
        if (!rename(outputTreeMergeStep, outputReduceDir, fs)) {
            return -1;
        }
        assert reducers % options.fanout == 0;
        reducers = reducers / options.fanout;
        mtreeMergeIteration++;
    }
    assert reducers == options.shards;

    // normalize output shard dir prefix, i.e.
    // rename part-r-00000 to part-00000 (stems from zero tree merge iterations)
    // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations)
    for (FileStatus stats : fs.listStatus(outputReduceDir)) {
        String dirPrefix = SolrOutputFormat.getOutputName(job);
        Path srcPath = stats.getPath();
        if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) {
            String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length());
            Path dstPath = new Path(srcPath.getParent(), dstName);
            if (!rename(srcPath, dstPath, fs)) {
                return -1;
            }
        }
    }
    ;

    // publish results dir
    if (!rename(outputReduceDir, outputResultsDir, fs)) {
        return -1;
    }

    if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) {
        return -1;
    }

    goodbye(job, programStartTime);
    return 0;
}

From source file:org.apache.spark.simr.SimrJob.java

License:Apache License

public boolean run() throws Exception {
    checkParams();/*from   ww w .  j  ava 2 s .  c o m*/
    Configuration conf = new Configuration();
    updateConfig(conf);
    String[] program_args;

    System.err.println(
            "         _               \n" + "   _____(_)___ ___  _____\n" + "  / ___/ / __ `__ \\/ ___/\n"
                    + " (__  ) / / / / / / /    \n" + "/____/_/_/ /_/ /_/_/      version " + SIMRVER + "\n");

    System.err.println("Requesting a SIMR cluster with " + conf.get("simr_cluster_slots") + " slots");

    Job job = setupJob(conf);

    boolean retBool = true;

    job.submit();

    if (cmd.containsCommand("shell")) {
        program_args = new String[] { conf.get("simr_tmp_dir") + "/" + Simr.RELAYURL,
                conf.get("simr_tmp_dir") + "/" + Simr.DRIVERURL };
    } else {
        program_args = new String[] { conf.get("simr_tmp_dir") + "/" + Simr.RELAYURL,
                conf.get("simr_tmp_dir") + "/" + Simr.DRIVERURL, "--readonly" };
    }

    org.apache.spark.simr.RelayClient.main(program_args);

    retBool = job.waitForCompletion(false);

    FileSystem fs = FileSystem.get(conf);
    for (FileStatus fstat : fs.listStatus(new Path(conf.get("simr_out_dir")))) { // delete output files
        if (fstat.getPath().getName().startsWith("part-m-")) {
            fs.delete(fstat.getPath(), false);
        }
    }

    fs.delete(new Path(conf.get("simr_tmp_dir")), true); // delete tmp dir

    System.err.println("Output logs can be found in hdfs://" + new Path(conf.get("simr_out_dir")));
    return retBool;
}