Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:de.tudarmstadt.lt.n2n.hadoop.GoogleSyntacticsJobDkbd.java

License:Apache License

@Override
public void configure(JobConf job) {
    String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }//from w  w w  .  ja  va 2 s. com
    try {
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
            DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class);
    job.setOutputFormat(NullOutputFormat.class); // ignore the serialized cas and use only the output from the CasConsumer
}

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob2.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob2.class);
    conf.setJobName(GoogleSyntacticsJob2.class.getSimpleName());

    conf.setMapperClass(GoogleSyntacticsJob2Mapper.class);
    conf.setReducerClass(GoogleSyntacticsJob2Reducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // conf.setMapOutputKeyClass(Text.class);
    // conf.setMapOutputValueClass(NullWritable.class);

    conf.setOutputKeyClass(JoBimFormat.class);
    conf.setOutputValueClass(IntWritable.class);

    args = new GenericOptionsParser(conf, args).getRemainingArgs();
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }/*ww w  .  j  ava  2  s .  c  o m*/

    String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
    for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
        DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf);

    JobClient.runJob(conf);
    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob4.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), GoogleSyntacticsJob4.class);
    conf.setJobName(GoogleSyntacticsJob4.class.getSimpleName());

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    // delete output path for testing purposes
    // FileSystem.get(conf).delete(new Path(args[1]), true);

    String extractorConfigurationFiles = conf.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        conf.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }//from  w ww  . ja  v a 2  s  .  c  o  m

    String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
    for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
        DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), conf);

    conf.setMapperClass(GoogleSyntacticsJob4Mapper.class);
    conf.setInputFormat(TextInputFormat.class);
    conf.setMapOutputKeyClass(NullWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setNumReduceTasks(0);
    conf.setCombinerClass(IdentityReducer.class);

    JobClient.runJob(conf);
    return 0;
}

From source file:de.tudarmstadt.lt.n2n.hadoop.pipetests.GoogleSyntacticsJob5.java

License:Apache License

@Override
public void configure(JobConf job) {
    String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }/* w  w  w. j  av  a  2 s .c  o  m*/
    try {
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
            DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job);
    } catch (IOException e) {
        e.printStackTrace();
    }
    Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class);
    job.setMapperClass(JoBimMapper.class);
    job.setReducerClass(JoBimReducer.class);
    job.setOutputFormat(TextOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);
    job.setMemoryForMapTask(4096);
    job.setMemoryForReduceTask(4096);
    job.set("mapred.child.java.opts", "-Xmx4096m");
    job.setNumReduceTasks(1); // reset to default
}

From source file:de.tudarmstadt.lt.n2n.hadoop.PreparsedJob.java

License:Apache License

@Override
public void configure(JobConf job) {
    String extractorConfigurationFiles = job.get(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS);
    if (extractorConfigurationFiles == null) {
        extractorConfigurationFiles = StringUtils.join(SHARED_CONSTANTS.DEFAULT_EXTRACTOR_CONFIGURATIONS, ',');
        System.out.format("Extractorconfigurationfile parameter not set. Assuming -D%s=%s %n",
                SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
        job.set(SHARED_CONSTANTS.PARAM_EXTRACTORCONFIGS, extractorConfigurationFiles);
    }/*from  w  w w .  jav  a2 s  .c  om*/
    try {
        String[] extractorConfigurationFilesArr = extractorConfigurationFiles.split(",");
        for (int i = 0; i < extractorConfigurationFilesArr.length; i++)
            DistributedCache.addFileToClassPath(new Path(extractorConfigurationFilesArr[i]), job);
    } catch (IOException e1) {
        e1.printStackTrace();
    }

    Text2CASInputFormat.setDocumentTextExtractorClass(job, KeyPlusValueAsDocumentExtractor.class);
    job.setOutputFormat(NullOutputFormat.class); // ignore the serialized cas and use only the output from the CasConsumer
}

From source file:de.tudarmstadt.ukp.dkpro.bigdata.io.hadoop.Text2CASInputFormat.java

License:Apache License

@Override
public RecordReader<Text, CASWritable> getRecordReader(InputSplit split, JobConf jobConf, Reporter reporter)
        throws IOException {
    DocumentTextExtractor textConverter = null;
    String textConverterClass = jobConf.get("dkpro.uima.text2casinputformat.documenttextextractor");
    if (textConverterClass != null) {
        try {/*w  w w  . ja v a  2  s .c om*/
            textConverter = (DocumentTextExtractor) Class.forName(textConverterClass).newInstance();
        } catch (InstantiationException e) {
            e.printStackTrace();
        } catch (IllegalAccessException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }
    return new Text2CASRecordReader((FileSplit) split, jobConf, reporter, textConverter);
}

From source file:edu.berkeley.chukwa_xtrace.TestXtrExtract.java

License:Apache License

public void testArchiving() throws Exception {

    System.out.println("starting archive test");
    Configuration conf = new Configuration();
    System.setProperty("hadoop.log.dir", System.getProperty("test.build.data", "/tmp"));
    MiniDFSCluster dfs = new MiniDFSCluster(conf, NUM_HADOOP_SLAVES, true, null);
    FileSystem fileSys = dfs.getFileSystem();
    fileSys.delete(OUTPUT_DIR, true);//nuke output dir

    writeASinkFile(conf, fileSys, INPUT_DIR, 1000);

    FileStatus fstat = fileSys.getFileStatus(INPUT_DIR);
    assertTrue(fstat.getLen() > 10);//from   w ww.ja v  a  2  s  . com

    System.out.println("filesystem is " + fileSys.getUri());
    conf.set("fs.default.name", fileSys.getUri().toString());
    conf.setInt("io.sort.mb", 1);
    conf.setInt("io.sort.factor", 5);
    conf.setInt("mapred.tasktracker.map.tasks.maximum", 2);
    conf.setInt("mapred.tasktracker.reduce.tasks.maximum", 2);

    MiniMRCluster mr = new MiniMRCluster(NUM_HADOOP_SLAVES, fileSys.getUri().toString(), 1);
    String[] archiveArgs = { INPUT_DIR.toString(), fileSys.getUri().toString() + OUTPUT_DIR.toString() };

    JobConf jc = mr.createJobConf(new JobConf(conf));
    assertEquals("true", jc.get("archive.groupByClusterName"));
    assertEquals(1, jc.getInt("io.sort.mb", 5));

    int returnVal = ToolRunner.run(jc, new XtrExtract(), archiveArgs);
    assertEquals(0, returnVal);
    fstat = fileSys.getFileStatus(new Path("/chukwa/archives/foocluster/HadoopLogProcessor_2008_05_29.arc"));
    assertTrue(fstat.getLen() > 10);

    Thread.sleep(1000);

    System.out.println("done!");
}

From source file:edu.brown.cs.mapreduce.BenchmarkBase.java

License:Open Source License

public JobConf getJobConf() {
    JobConf jobConf = new JobConf(this.conf, this.benchmarkClass);
    ///*from w  w  w.  j  ava2s . co m*/
    // Options
    //
    List<String> otherArgs = new ArrayList<String>();
    for (int i = 0; i < args.length; i++) {
        try {
            //
            // Print property and exit
            //
            if ("-property".equals(args[i])) {
                String prop = jobConf.get(args[i + 1]);
                System.out.println(prop);
                System.exit(0);
                //
                // # of Maps
                //
            } else if ("-m".equals(args[i])) {
                this.num_of_maps = Integer.parseInt(args[++i]);
                //
                // # of Reduces
                //
            } else if ("-r".equals(args[i])) {
                this.num_of_reduces = Integer.parseInt(args[++i]);
                //
                // Enable debug
                //
            } else if ("-debug".equals(args[i])) {
                this.debug = true;
                //
                // Enable single output file for results
                //
            } else if ("-combine".equals(args[i])) {
                this.combine = true;
                //
                // Tell jobs to compress their intermediate output files
                //
            } else if ("-compress".equals(args[i])) {
                this.compress = true;
                //
                // We're using TupleWritable (which has to be in a SequenceFile)
                //
            } else if ("-tuple".equals(args[i])) {
                this.tuple_data = true;
                this.sequence_file = true;
                //
                // Use SequenceFiles for initial input
                //
            } else if ("-sequence".equals(args[i])) {
                this.sequence_file = true;
                //
                // Recursively load directories
                //
            } else if ("-recursive-dirs".equals(args[i])) {
                this.load_directories = true;
                //
                // Job Basename
                //
            } else if ("-basename".equals(args[i])) {
                this.job_name = args[++i];
                //
                // Misc. Properties
                //
            } else if ("-D".equals(args[i].substring(0, 2))) {
                String arg = args[i].substring(2);
                int pos = arg.indexOf('=');
                if (pos == -1) {
                    System.err.println("ERROR: Invalid properties option '" + arg + "'");
                    System.exit(1);
                }
                this.options.put(arg.substring(0, pos), arg.substring(pos + 1));
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.err.println("ERROR: Integer expected instead of " + args[i]);
            System.exit(1);
        } catch (ArrayIndexOutOfBoundsException except) {
            System.err.println("ERROR: Required parameter missing from " + args[i - 1]);
            System.exit(1);
        }
    } // FOR
      //
      // Make sure there are exactly 2 parameters left.
      //
    if (otherArgs.size() < 2) {
        System.err.println("ERROR: Wrong number of parameters: " + otherArgs.size());
        System.exit(1);
    }

    //
    // Set these flags so the jobs know about them
    //
    if (this.getSequenceFile())
        this.options.put(PROPERTY_SEQUENCEFILE, "true");
    if (this.getTupleData())
        this.options.put(PROPERTY_TUPLEDATA, "true");
    if (this.getDebug())
        this.options.put(PROPERTY_DEBUG, "true");

    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
    } catch (Exception ex) {
        ex.printStackTrace();
        System.exit(-1);
    }

    //
    // Input Paths
    //
    int cnt = otherArgs.size() - 1;
    this.input_paths = new ArrayList<Path>();
    for (int ctr = 0; ctr < cnt; ctr++) {
        Path new_path = new Path(otherArgs.get(ctr));
        try {
            if (this.load_directories && fs.getFileStatus(new_path).isDir()) {
                //int limit = 10;
                FileStatus paths[] = fs.listStatus(new_path);
                for (FileStatus p : paths) {
                    this.input_paths.add(p.getPath());
                    FileInputFormat.addInputPath(jobConf, p.getPath());
                    //if (limit-- <= 0) break;
                } // FOR
            } else {
                this.input_paths.add(new_path);
                FileInputFormat.addInputPath(jobConf, new_path);
            }
        } catch (Exception ex) {
            ex.printStackTrace();
            System.exit(-1);
        }
    } // FOR
    if (this.input_paths.isEmpty()) {
        System.err.println(
                "ERROR: No input paths were defined for '" + this.benchmarkClass.getSimpleName() + "'");
        System.exit(-1);
    }

    //
    // Output Paths
    //
    this.output_path = new Path(otherArgs.get(otherArgs.size() - 1));
    FileOutputFormat.setOutputPath(jobConf, this.output_path);

    jobConf.setJobName(this.job_name != null ? this.job_name : this.benchmarkClass.getSimpleName());
    if (this.num_of_maps >= 0)
        jobConf.setNumMapTasks(this.num_of_maps);
    if (this.num_of_reduces >= 0)
        jobConf.setNumReduceTasks(this.num_of_reduces);

    //
    // Set all properties
    //
    for (String key : this.options.keySet()) {
        jobConf.set(key, this.options.get(key));
    }

    return (jobConf);
}

From source file:edu.iit.marketbasket.Map.java

@Override
public void configure(JobConf job) {
    mapTaskId = job.get("mapred.task.id");
    inputFile = job.get("map.input.file");
}

From source file:edu.stolaf.cs.wmrserver.HadoopEngine.java

License:Apache License

public JobInfo getInfo(Submission submission, RunningJob job, JobConf conf)
        throws NotFoundException, InternalException {

    JobInfo info = new JobInfo();

    info.setNativeID(submission.getHadoopID());
    info.setName(job.getJobName());//from w w w  .j av  a2s . c o  m
    info.setTest(false);

    if (conf == null)
        // Can't proceed any further if configuration is unavailable
        return info;

    info.setRequestedMapTasks(conf.getNumMapTasks());
    info.setRequestedReduceTasks(conf.getNumReduceTasks());
    info.setMapper(conf.get(CONF_MAPPER));
    info.setReducer(conf.get(CONF_REDUCER));
    info.setNumericSort(conf.getBoolean(CONF_NUMERIC, false));
    info.setInputPath(
            JobServiceHandler.relativizePath(_homeDir, FileInputFormat.getInputPaths(conf)[0]).toString());
    info.setOutputPath(
            JobServiceHandler.relativizePath(_homeDir, FileOutputFormat.getOutputPath(conf)).toString());

    return info;
}