Example usage for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:edu.iu.kmeans.sgxsimu.KMeansLauncher.java

License:Apache License

/**
 * Launches all the tasks in order./*from   w ww. j a v  a 2 s  .c om*/
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    // load args
    init.loadSysArgs();

    init.loadDistributedLibs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2]));
    conf.setInt(Constants.ENCLAVE_TOTAL, Integer.parseInt(args[init.getSysArgNum() + 3]));
    conf.setInt(Constants.ENCLAVE_PER_THD, Integer.parseInt(args[init.getSysArgNum() + 4]));
    conf.setInt(Constants.ENCLAVE_TASK, Integer.parseInt(args[init.getSysArgNum() + 5]));
    conf.setBoolean(Constants.ENABLE_SIMU, Boolean.parseBoolean(args[init.getSysArgNum() + 6]));

    // config job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job kmeansJob = init.createJob("kmeansJob", KMeansLauncher.class, KMeansCollectiveMapper.class);

    // initialize centroids data
    JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]);
    int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]);
    Path workPath = init.getWorkPath();
    Path cenDir = new Path(workPath, "centroids");
    fs.mkdirs(cenDir);
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }

    Path initCenDir = new Path(cenDir, "init_centroids");
    DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs);
    thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString());
    thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids");

    //generate Data if required
    boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 7]);
    if (generateData) {
        Path inputPath = init.getInputPath();
        int total_points = Integer.parseInt(args[init.getSysArgNum() + 8]);
        int total_files = Integer.parseInt(args[init.getSysArgNum() + 9]);
        String tmpDirPathName = args[init.getSysArgNum() + 10];

        DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath,
                tmpDirPathName, fs);
    }

    // finish job
    boolean jobSuccess = kmeansJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        kmeansJob.killJob();
        System.out.println("kmeansJob failed");
    }

    return 0;
}

From source file:edu.iu.lda.LDALauncher.java

License:Apache License

private Job configureLDAJob(Path docDir, int numTopics, double alpha, double beta, int numIterations,
        int minBound, int maxBound, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem,
        boolean printModel, Path modelDir, Path outputDir, Configuration configuration, int jobID)
        throws IOException, URISyntaxException {
    configuration.setInt(Constants.NUM_TOPICS, numTopics);
    configuration.setDouble(Constants.ALPHA, alpha);
    configuration.setDouble(Constants.BETA, beta);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.MIN_BOUND, minBound);
    configuration.setInt(Constants.MAX_BOUND, maxBound);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio);
    System.out.println("Model Dir Path: " + modelDir.toString());
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.setBoolean(Constants.PRINT_MODEL, printModel);
    Job job = Job.getInstance(configuration, "lda_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");
    // mapreduce.map.collective.memory.mb
    // 125000//from  w w w.j  a va 2  s.co  m
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, docDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(LDALauncher.class);
    job.setMapperClass(LDAMPCollectiveMapper.class);
    job.setNumReduceTasks(0);
    return job;
}

From source file:edu.iu.mds.MDSMapReduce.java

License:Apache License

private Job prepareBCCalcJob(String inputDir, String xFile, String outputDirPath, int iterationCount,
        int jobCount, Configuration configuration, int numPoints, int xWidth, int numMapTasks,
        int partitionPerWorker)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(configuration, "map-collective-mds-bc" + jobCount);
    Configuration jobConfig = job.getConfiguration();
    Path outputDir = new Path(outputDirPath);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    jobConfig.setInt(MDSConstants.ITERATION, iterationCount);
    jobConfig.setInt(MDSConstants.NUMPOINTS, numPoints);
    jobConfig.setInt(MDSConstants.XWIDTH, xWidth);
    jobConfig.set(MDSConstants.XFILE, xFile);
    jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks);
    jobConfig.setInt(MDSConstants.PARTITION_PER_WORKER, partitionPerWorker);
    // input class to file-based class
    job.setInputFormatClass(MultiFileInputFormat.class);
    // job.setInputFormatClass(DataFileInputFormat.class);
    // job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(MDSMapReduce.class);
    job.setMapperClass(MDSAllgatherMultiThreadMapper.class);
    // When use MultiFileInputFormat, remember to set the number of map tasks
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    job.setNumReduceTasks(0);//  w  w w .  j  a v a 2 s.c  o m
    return job;
}

From source file:edu.iu.pagerank.PRJobLauncher.java

License:Apache License

private Job prepareJob(String inputDir, int totalVtx, int iterationCount, int jobCount, int numMapTasks,
        int partiitonPerWorker, String outputDirPath, Configuration configuration)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = new Job(configuration, "harp-pagerank-" + jobCount);
    Configuration jobConfig = job.getConfiguration();
    Path outputDir = new Path(outputDirPath);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    jobConfig.setInt(PRConstants.ITERATION, iterationCount);
    jobConfig.setInt(PRConstants.TOTAL_VTX, totalVtx);
    jobConfig.setInt(PRConstants.NUM_MAPS, numMapTasks);
    jobConfig.setInt(PRConstants.PARTITION_PER_WORKER, partiitonPerWorker);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(PRJobLauncher.class);
    job.setMapperClass(PRMultiThreadMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    job.setNumReduceTasks(0);//from w  ww .  j av a  2  s.  c o m
    return job;
}

From source file:edu.iu.sgd.SGDLauncher.java

License:Apache License

private Job configureSGDJob(Path inputDir, int r, double lambda, double epsilon, int numIterations,
        int trainRatio, int numMapTasks, int numThreadsPerWorker, double scheduleRatio, int mem, Path modelDir,
        Path outputDir, String testFilePath, Configuration configuration, int jobID)
        throws IOException, URISyntaxException {
    configuration.setInt(Constants.R, r);
    configuration.setDouble(Constants.LAMBDA, lambda);
    configuration.setDouble(Constants.EPSILON, epsilon);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.TRAIN_RATIO, trainRatio);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    configuration.setDouble(Constants.SCHEDULE_RATIO, scheduleRatio);
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.set(Constants.TEST_FILE_PATH, testFilePath);
    Job job = Job.getInstance(configuration, "sgd_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    // mapreduce.map.collective.memory.mb
    // 125000/*ww  w  .j  a va 2  s.  c o  m*/
    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);
    // mapreduce.map.collective.java.opts
    // -Xmx120000m -Xms120000m
    int xmx = (mem - 5000) > (mem * 0.9) ? (mem - 5000) : (int) Math.ceil(mem * 0.9);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(SGDLauncher.class);
    job.setMapperClass(SGDCollectiveMapper.class);
    job.setNumReduceTasks(0);
    return job;
}

From source file:edu.iu.wdamds.MDSLauncher.java

License:Apache License

private Job prepareWDAMDSJob(int numMapTasks, Path dataDirPath, Path xFilePath, Path xOutFilePath,
        Path outDirPath, String idsFile, String labelsFile, double threshold, int d, double alpha, int n,
        int cgIter, int numThreads)
        throws IOException, URISyntaxException, InterruptedException, ClassNotFoundException {
    Job job = Job.getInstance(getConf(), "map-collective-wdamds");
    Configuration jobConfig = job.getConfiguration();
    FileInputFormat.setInputPaths(job, dataDirPath);
    FileOutputFormat.setOutputPath(job, outDirPath);
    jobConfig.setInt(MDSConstants.NUM_MAPS, numMapTasks);
    // Load from HDFS
    // Now we ignore and don't read x file from
    // HDFS/*from ww  w .  jav  a2s. c om*/
    jobConfig.set(MDSConstants.X_FILE_PATH, xFilePath.toString());
    // Output to HDFS
    jobConfig.set(MDSConstants.X_OUT_FILE_PATH, xOutFilePath.toString());
    // Load from shared file system
    jobConfig.set(MDSConstants.IDS_FILE, idsFile);
    // Load from shared file system
    jobConfig.set(MDSConstants.LABELS_FILE, labelsFile);
    jobConfig.setDouble(MDSConstants.THRESHOLD, threshold);
    jobConfig.setInt(MDSConstants.D, d);
    jobConfig.setDouble(MDSConstants.ALPHA, alpha);
    jobConfig.setInt(MDSConstants.N, n);
    jobConfig.setInt(MDSConstants.CG_ITER, cgIter);
    jobConfig.setInt(MDSConstants.NUM_THREADS, numThreads);
    // input class to file-based class
    job.setInputFormatClass(SingleFileInputFormat.class);
    job.setJarByClass(MDSLauncher.class);
    job.setMapperClass(WDAMDSMapper.class);
    // When use MultiFileInputFormat, remember to
    // set the number of map tasks
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    job.setNumReduceTasks(0);
    return job;
}

From source file:edu.iu.wordcount.CollectiveWordCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: wordcount <in> <out>");
        System.exit(2);/*from   w  w w .  j av  a  2s . c o  m*/
    }
    Job job = new Job(conf, "collective word count");
    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumReduceTasks(0);
    job.setJarByClass(CollectiveWordCount.class);
    job.setMapperClass(WordCountMapper.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java

public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException {
    JobConf conf = new JobConf(configuration);

    conf.setJobName("Single End Alignment");
    conf.setJarByClass(Cloudbreak.class);
    FileInputFormat.addInputPath(conf, new Path(hdfsDataDir));
    Path outputDir = new Path(hdfsAlignmentsDir);
    FileSystem.get(conf).delete(outputDir);
    FileOutputFormat.setOutputPath(conf, outputDir);

    addDistributedCacheFile(conf, reference, "novoalign.reference");

    addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable");
    if (pathToNovoalignLicense != null) {
        addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license");
    }//from   w  w w .  j a va 2  s. c  o m

    DistributedCache.createSymlink(conf);
    conf.set("mapred.task.timeout", "3600000");
    conf.set("novoalign.threshold", threshold);
    conf.set("novoalign.quality.format", qualityFormat);

    conf.setInputFormat(SequenceFileInputFormat.class);

    conf.setMapperClass(NovoalignSingleEndMapper.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setCompressMapOutput(true);

    conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.set("mapred.output.compress", "true");
    conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec");

    JobClient.runJob(conf);

}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public void submit(JobRequest request, long submissionID, File mapperFile, File reducerFile, File packageDir,
        Path inputPath) throws ValidationException, NotFoundException, CompilationException, InternalException {
    // Generate job output path
    Path outputDir = new Path(_homeDir, "out");
    Path outputPath;//ww w .jav  a  2  s.  com
    try {
        FileSystem fs = outputDir.getFileSystem(new Configuration());
        outputPath = JobServiceHandler.getNonexistantPath(outputDir, request.getName(), fs);
    } catch (IOException ex) {
        throw JobServiceHandler.wrapException("Could not construct output path.", ex);
    }

    JobConf conf = new JobConf();
    conf.setJobName(request.getName());

    // Set mapper and number of tasks if specified
    StreamJob.setStreamMapper(conf, mapperFile.toString());
    if (request.isSetMapTasks())
        conf.setNumMapTasks(request.getMapTasks());

    // Set reducer and number of tasks if specified
    StreamJob.setStreamReducer(conf, reducerFile.toString());
    if (request.isSetReduceTasks())
        conf.setNumReduceTasks(request.getReduceTasks());

    // Create and set job JAR, including necessary files
    ArrayList<String> jarFiles = new ArrayList<String>();
    jarFiles.add(packageDir.toString());
    String jarPath;
    try {
        jarPath = StreamJob.createJobJar(conf, jarFiles, _tempDir);
    } catch (IOException ex) {
        throw JobServiceHandler.wrapException("Could not create job jar.", ex);
    }
    if (jarPath != null)
        conf.setJar(jarPath);

    // TODO: This is a hack. Rewrite streaming to use DistributedCache.
    //conf.setPattern("mapreduce.job.jar.unpack.pattern",
    //              Pattern.compile(".*"));

    // Set I/O formats and paths
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    FileInputFormat.addInputPath(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // Use numeric sort if appropriate
    conf.setBoolean(CONF_NUMERIC, request.isNumericSort());
    if (request.isNumericSort()) {
        conf.setOutputKeyComparatorClass(KeyFieldBasedComparator.class);
        conf.setPartitionerClass(KeyFieldBasedPartitioner.class);
        conf.setKeyFieldComparatorOptions("-n");
        conf.setKeyFieldPartitionerOptions("-n");
    }

    // Set other job information
    conf.set(CONF_USER, request.getUser());
    conf.set(CONF_LANGUAGE, request.getLanguage());
    conf.set(CONF_MAPPER, request.getMapper());
    conf.set(CONF_REDUCER, request.getReducer());

    // Attempt to submit the job

    RunningJob job;
    try {
        JobClient client = new JobClient(new JobConf());
        job = client.submitJob(conf);
    } catch (IOException ex) {
        throw JobServiceHandler.wrapException("There was a serious error while attempting to submit the job.",
                ex);
    }

    try {
        SubmissionDatabase.setSubmitted(submissionID);
        SubmissionDatabase.setHadoopID(submissionID, job.getID().toString());
    } catch (SQLException ex) {
        throw JobServiceHandler.wrapException("Could not update submission in database.", ex);
    }
}

From source file:edu.stolaf.cs.wmrserver.streaming.StreamJob.java

License:Apache License

public static void setStreamMapper(JobConf conf, String mapCommand) {
    conf.setMapperClass(PipeMapper.class);
    conf.setMapRunnerClass(PipeMapRunner.class);
    try {//from  w w w .j a  v a  2  s .c o  m
        conf.set("stream.map.streamprocessor", URLEncoder.encode(mapCommand, "UTF-8"));
    } catch (UnsupportedEncodingException ex) {
        // This is VERY likely to happen. Especially since the ENTIRE FREAKING
        // STRING IMPLEMENTATION is based on UTF-8. Thanks, Java.
        throw new RuntimeException("The sky is falling! Java doesn't support UTF-8.");
    }
}