Example usage for org.apache.hadoop.mapred JobConf set

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf set.

Prototype

public void set(String name, String value)

Source Link

Document

Set the value of the name property.

Usage

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob5.java

License:Apache License

@Override
public void configure(JobConf job) {
    String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }//from   w w w.java2 s. c  o m
    try {
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
            DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class);
    job.setMapperClass(JoBimMapper.class);
    job.setReducerClass(JoBimReducer.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMemoryForMapTask(4096);
    job.setMemoryForReduceTask(4096);
    job.set("mapred.child.java.opts", "-Xmx4096m");
    job.setNumReduceTasks(1); // reset to default
}

From source file:de.tudarmstadt.lt.n2n.hadoop.PreparsedJob.java

License:Apache License

@Override
public void configure(JobConf job) {
    String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }/* w  ww . j  a v a2s  .c  o m*/
    try {
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
            DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job);
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class);
    job.setOutputFormat(NullOutputFormat.class); // ignore the serialized cas and use only the output from the CasConsumer
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.examples.CasConsumerExample.java

License:Apache License

@Override
public void configure(JobConf job) {
    job.set("mapreduce.job.queuename", "smalljob");
    job.setInputFormat(Text2CASInputFormat.class);
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.examples.Text2CASExample.java

License:Apache License

@Override
public void configure(JobConf job) {
    job.set("mapreduce.job.queuename", "smalljob");
    /*/*from w w  w. j av  a2 s .  co  m*/
     * Use Text2Cas InputFormat, read texts directly from h
     */

    job.setInputFormat(Text2CASInputFormat.class);
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop.XCASSequenceFileWriter.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/*from w  w  w.j a  v a  2s . c  o m*/
public void initialize(org.apache.uima.UimaContext context)
        throws org.apache.uima.resource.ResourceInitializationException {
    super.initialize(context);
    final JobConf conf = new JobConf();
    this.path = new File((String) context.getConfigParameterValue(PARAM_PATH));
    conf.set("fs.default.name", this.fileSystemName);
    // Compress Map output
    if (this.compress) {
        System.out.println("compressing");
        conf.set("mapred.compress.map.output", "true");
        conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");
    }
    // Compress MapReduce output
    conf.set("mapred.output.compress", "true");
    conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec");
    FileOutputFormat.setCompressOutput(conf, true);
    // FileOutputFormat.setOutputCompressorClass(conf, SnappyCodec.class);

    final String filename = this.path + "/" + "part-00000";
    try {
        final FileSystem fs = FileSystem.get(URI.create(filename), conf);
        final Path path = new Path(URI.create(filename).toString());
        this.writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class);

    } catch (final IOException e) {
        throw new ResourceInitializationException();
    }

}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public JobConf getJobConf() {
    JobConf jobConf = new JobConf(this.conf, this.benchmarkClass);
    //// w ww  . j a  v a 2 s .  c o m
    // Options
    //
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        try {
            //
            // Print property and exit
            //
            if ("-property".equals(args[i])) {
                String prop = jobConf.get(args[i + 1]);
                System.out.println(prop);
                System.exit(0);
                //
                // # of Maps
                //
            } else if ("-m".equals(args[i])) {
                this.num_of_maps = Integer.parseInt(args[++i]);
                //
                // # of Reduces
                //
            } else if ("-r".equals(args[i])) {
                this.num_of_reduces = Integer.parseInt(args[++i]);
                //
                // Enable debug
                //
            } else if ("-debug".equals(args[i])) {
                this.debug = true;
                //
                // Enable single output file for results
                //
            } else if ("-combine".equals(args[i])) {
                this.combine = true;
                //
                // Tell jobs to compress their intermediate output files
                //
            } else if ("-compress".equals(args[i])) {
                this.compress = true;
                //
                // We're using TupleWritable (which has to be in a SequenceFile)
                //
            } else if ("-tuple".equals(args[i])) {
                this.tuple_data = true;
                this.sequence_file = true;
                //
                // Use SequenceFiles for initial input
                //
            } else if ("-sequence".equals(args[i])) {
                this.sequence_file = true;
                //
                // Recursively load directories
                //
            } else if ("-recursive-dirs".equals(args[i])) {
                this.load_directories = true;
                //
                // Job Basename
                //
            } else if ("-basename".equals(args[i])) {
                this.job_name = args[++i];
                //
                // Misc. Properties
                //
            } else if ("-D".equals(args[i].substring(0, 2))) {
                String arg = args[i].substring(2);
                int pos = arg.indexOf('=');
                if (pos == -1) {
                    System.err.println("ERROR: Invalid properties option '" + arg + "'");
                    System.exit(1);
                }
                this.options.put(arg.substring(0, pos), arg.substring(pos + 1));
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.err.println("ERROR: Integer expected instead of " + args[i]);
            System.exit(1);
        } catch (ArrayIndexOutOfBoundsException except) {
            System.err.println("ERROR: Required parameter missing from " + args[i - 1]);
            System.exit(1);
        }
    } // FOR
      //
      // Make sure there are exactly 2 parameters left.
      //
    if (otherArgs.size() < 2) {
        System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size());
        System.exit(1);
    }

    //
    // Set these flags so the jobs know about them
    //
    if (this.getSequenceFile())
        this.options.put(PROPERTY_SEQUENCEFILE, "true");
    if (this.getTupleData())
        this.options.put(PROPERTY_TUPLEDATA, "true");
    if (this.getDebug())
        this.options.put(PROPERTY_DEBUG, "true");

    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(-1);
    }

    //
    // Input Paths
    //
    int cnt = otherArgs.size() - 1;
    this.input_paths = new ArrayList<Path>();
    for (int ctr = 0; ctr < cnt; ctr++) {
        Path new_path = new Path(otherArgs.get(ctr));
        try {
            if (this.load_directories && fs.getFileStatus(new_path).isDir()) {
                //int limit = 10;
                FileStatus paths[] = fs.listStatus(new_path);
                for (FileStatus p : paths) {
                    this.input_paths.add(p.getPath());
                    FileInputFormat.addInputPath(jobConf, p.getPath());
                    //if (limit-- <= 0) break;
                } // FOR
            } else {
                this.input_paths.add(new_path);
                FileInputFormat.addInputPath(jobConf, new_path);
            }
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
    } // FOR
    if (this.input_paths.isEmpty()) {
        System.err.println(
                "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'");
        System.exit(-1);
    }

    //
    // Output Paths
    //
    this.output_path = new Path(otherArgs.get(otherArgs.size() - 1));
    FileOutputFormat.setOutputPath(jobConf, this.output_path);

    jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName());
    if (this.num_of_maps >= 0)
        jobConf.setNumMapTasks(this.num_of_maps);
    if (this.num_of_reduces >= 0)
        jobConf.setNumReduceTasks(this.num_of_reduces);

    //
    // Set all properties
    //
    for (String key : this.options.keySet()) {
        jobConf.set(key, this.options.get(key));
    }

    return (jobConf);
}

From source file:edu.iu.benchmark.JobLauncher.java

License:Apache License

private Job configureBenchmarkJob(String cmd, int bytesPerPartition, int numPartitions, int numMappers,
        int numIterations, Path inputDirPath, Path outputDirPath) throws IOException, URISyntaxException {
    Job job = Job.getInstance(getConf(), "benchmark_job");
    FileInputFormat.setInputPaths(job, inputDirPath);
    FileOutputFormat.setOutputPath(job, outputDirPath);
    job.setInputFormatClass(SingleFileInputFormat.class);
    job.setJarByClass(JobLauncher.class);
    job.setMapperClass(BenchmarkMapper.class);
    org.apache.hadoop.mapred.JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMappers);//from   w ww  .  j a  va 2  s  .c o m
    job.setNumReduceTasks(0);
    jobConf.set(Constants.BENCHMARK_CMD, cmd);
    jobConf.setInt(Constants.BYTES_PER_PARTITION, bytesPerPartition);
    jobConf.setInt(Constants.NUM_PARTITIONS, numPartitions);
    jobConf.setInt(Constants.NUM_MAPPERS, numMappers);
    jobConf.setInt(Constants.NUM_ITERATIONS, numIterations);
    return job;
}

From source file:edu.iu.ccd.CCDLauncher.java

License:Apache License

private Job configureCCDJob(Path inputDir, int r, double lambda, int numIterations, int numMapTasks,
        int numThreadsPerWorker, int numModelSlices, Path modelDir, Path outputDir, String testFilePath,
        Configuration configuration, int jobID) throws IOException, URISyntaxException {
    configuration.setInt(Constants.R, r);
    configuration.setDouble(Constants.LAMBDA, lambda);
    configuration.setInt(Constants.NUM_ITERATIONS, numIterations);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    System.out.println("Model Dir Path: " + modelDir.toString());
    configuration.set(Constants.MODEL_DIR, modelDir.toString());
    configuration.setInt(Constants.NUM_MODEL_SLICES, numModelSlices);
    configuration.set(Constants.TEST_FILE_PATH, testFilePath);
    Job job = Job.getInstance(configuration, "ccd_job_" + jobID);
    JobConf jobConf = (JobConf) job.getConfiguration();
    jobConf.set("mapreduce.framework.name", "map-collective");
    jobConf.setNumMapTasks(numMapTasks);
    jobConf.setInt("mapreduce.job.max.split.locations", 10000);
    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(CCDLauncher.class);
    job.setMapperClass(CCDMPCollectiveMapper.class);
    job.setNumReduceTasks(0);//from  w w w .  j a  v  a 2s.c o m
    return job;
}

From source file:edu.iu.daal_cov.COVDaalLauncher.java

License:Apache License

private Job configureCOVJob(Path inputDir, int mem, int numMapTasks, int numThreadsPerWorker, Path modelDir,
        Path outputDir, Configuration configuration) throws IOException, URISyntaxException {

    // configuration.set(Constants.TEST_FILE_PATH, testDirPath);
    // configuration.set(Constants.TEST_TRUTH_PATH, testGroundTruthDirPath);
    configuration.setInt(Constants.NUM_MAPPERS, numMapTasks);
    configuration.setInt(Constants.NUM_THREADS, numThreadsPerWorker);
    // configuration.setInt(Constants.BATCH_SIZE, batchSize);

    Job job = Job.getInstance(configuration, "cov_job");
    JobConf jobConf = (JobConf) job.getConfiguration();

    jobConf.set("mapreduce.framework.name", "map-collective");

    jobConf.setInt("mapreduce.job.max.split.locations", 10000);

    jobConf.setInt("mapreduce.map.collective.memory.mb", mem);

    int xmx = (int) Math.ceil((mem - 2000) * 0.5);
    int xmn = (int) Math.ceil(0.25 * xmx);
    jobConf.set("mapreduce.map.collective.java.opts",
            "-Xmx" + xmx + "m -Xms" + xmx + "m" + " -Xmn" + xmn + "m");

    jobConf.setNumMapTasks(numMapTasks);

    FileInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.setInputFormatClass(MultiFileInputFormat.class);
    job.setJarByClass(COVDaalLauncher.class);
    job.setMapperClass(COVDaalCollectiveMapper.class);
    job.setNumReduceTasks(0);//from   w w  w  . ja va2  s.  co  m

    System.out.println("Launcher launched");
    return job;
}

From source file:edu.iu.daal_kmeans.regroupallgather.KMeansDaalLauncher.java

License:Apache License

/**
 * Launches all the tasks in order./*from   ww  w .ja  v  a 2s.  c  om*/
 */
@Override
public int run(String[] args) throws Exception {

    /* Put shared libraries into the distributed cache */
    Configuration conf = this.getConf();

    Initialize init = new Initialize(conf, args);

    /* Put shared libraries into the distributed cache */
    init.loadDistributedLibs();

    // load args
    init.loadSysArgs();

    //load app args
    conf.setInt(HarpDAALConstants.FILE_DIM, Integer.parseInt(args[init.getSysArgNum()]));
    conf.setInt(HarpDAALConstants.FEATURE_DIM, Integer.parseInt(args[init.getSysArgNum() + 1]));
    conf.setInt(HarpDAALConstants.NUM_CENTROIDS, Integer.parseInt(args[init.getSysArgNum() + 2]));

    // config job
    System.out.println("Starting Job");
    long perJobSubmitTime = System.currentTimeMillis();
    System.out.println(
            "Start Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    Job kmeansJob = init.createJob("kmeansJob", KMeansDaalLauncher.class, KMeansDaalCollectiveMapper.class);

    // initialize centroids data
    JobConf thisjobConf = (JobConf) kmeansJob.getConfiguration();
    FileSystem fs = FileSystem.get(conf);
    int nFeatures = Integer.parseInt(args[init.getSysArgNum() + 1]);
    int numCentroids = Integer.parseInt(args[init.getSysArgNum() + 2]);
    Path workPath = init.getWorkPath();
    Path cenDir = new Path(workPath, "centroids");
    fs.mkdirs(cenDir);
    if (fs.exists(cenDir)) {
        fs.delete(cenDir, true);
    }

    Path initCenDir = new Path(cenDir, "init_centroids");
    DataGenerator.generateDenseDataSingle(numCentroids, nFeatures, 1000, 0, " ", initCenDir, fs);
    thisjobConf.set(HarpDAALConstants.CEN_DIR, cenDir.toString());
    thisjobConf.set(HarpDAALConstants.CENTROID_FILE_NAME, "init_centroids");

    //generate Data if required
    boolean generateData = Boolean.parseBoolean(args[init.getSysArgNum() + 3]);
    if (generateData) {
        Path inputPath = init.getInputPath();
        int total_points = Integer.parseInt(args[init.getSysArgNum() + 4]);
        int total_files = Integer.parseInt(args[init.getSysArgNum() + 5]);
        String tmpDirPathName = args[init.getSysArgNum() + 6];

        DataGenerator.generateDenseDataMulti(total_points, nFeatures, total_files, 2, 1, ",", inputPath,
                tmpDirPathName, fs);
    }

    // finish job
    boolean jobSuccess = kmeansJob.waitForCompletion(true);
    System.out.println(
            "End Job#" + " " + new SimpleDateFormat("HH:mm:ss.SSS").format(Calendar.getInstance().getTime()));
    System.out.println(
            "| Job#" + " Finished in " + (System.currentTimeMillis() - perJobSubmitTime) + " miliseconds |");
    if (!jobSuccess) {
        kmeansJob.killJob();
        System.out.println("kmeansJob failed");
    }

    return 0;
}