Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:babel.prep.langidtime.LangAndTimeExtractor.java

License:Apache License

/**
 * Configures a map-only language id job.
 *///from ww  w.  j  a  va2s.c  o  m
protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("identify languages and collect time for pages in " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(LangAndTimeMapper.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    //ANNI EDIT
    job.setNumMapTasks(2);
    job.setNumReduceTasks(2);
    //END ANNI EDIT

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langidtime." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    job.set(JOB_PROP_JOB_REFERRER, referrer);

    return job;
}

From source file:babel.prep.merge.PageMerger.java

License:Apache License

/**
 * Configures a reduce-only page merge job.
 *//*from   w w  w .ja va  2  s.  c  o  m*/
protected JobConf createJobConf(String crawlDir, String pagesSubDirOne, String pagesSubDirTwo)
        throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("merge pages in " + pagesSubDirOne + " and " + pagesSubDirTwo);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setReducerClass(PageMergeReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirOne));
    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirTwo));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.merge." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.PrepStep.java

License:Apache License

public PrepStep() throws IOException {
    this(new JobConf(PrepStep.class));
    m_fs = FileSystem.get(getConf());
}

From source file:bixo.config.BixoPlatform.java

License:Apache License

public BixoPlatform(Class applicationJarClass, Configuration conf) throws Exception {
    this(applicationJarClass, new JobConf(conf));
}

From source file:boa.aggregators.MLAggregator.java

License:Apache License

public void saveModel(Object model) {
     FSDataOutputStream out = null;/*  w  w w. j  ava 2s.co m*/
     FileSystem fileSystem = null;
     Path filePath = null;
     try {
         JobContext context = (JobContext) getContext();
         Configuration configuration = context.getConfiguration();
         int boaJobId = configuration.getInt("boa.hadoop.jobid", 0);
         JobConf job = new JobConf(configuration);
         Path outputPath = FileOutputFormat.getOutputPath(job);
         fileSystem = outputPath.getFileSystem(context.getConfiguration());

         fileSystem.mkdirs(new Path("/boa", new Path("" + boaJobId)));
         filePath = new Path("/boa",
                 new Path("" + boaJobId, new Path(("" + getKey()).split("\\[")[0] + "ML.model")));

         if (fileSystem.exists(filePath))
             return;

         out = fileSystem.create(filePath);
         ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
         ObjectOutputStream objectOut = new ObjectOutputStream(byteOutStream);
         objectOut.writeObject(model);
         objectOut.close();

         byte[] serializedObject = byteOutStream.toByteArray();
         out.write(serializedObject, 0, serializedObject.length);

         this.collect(filePath.toString());

     } catch (Exception e) {
         e.printStackTrace();
     } finally {
         try {
             if (out != null)
                 out.close();
         } catch (final Exception e) {
             e.printStackTrace();
         }
     }
 }

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);

    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob(
            (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0))
        switch (event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId()))
                diag += s + "\n";
            diag += "\n";
            break;
        }//from   w  w w. ja  v a  2 s . c  o m
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}

From source file:br.eti.kinoshita.hadoop.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJarByClass(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path("hdfs://chuva:9000/test/leiseca."));
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/* ww  w .  j  av  a  2 s.com*/
}

From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMain.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(IndexerMain.class);
    conf.setJobName("indexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IndexerMap.class);
    conf.setCombinerClass(IndexerReduce.class);
    conf.setReducerClass(IndexerReduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);//from  w  w w. j a va 2 s  . c  o  m
}

From source file:Brush.AdjustMateEdge.java

License:Apache License

public RunningJob run(String inputPath, String outputPath, long reads, long ctg_sum) throws Exception {
    sLogger.info("Tool name: AdjustMateEdge");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    //JobConf conf = new JobConf(Stats.class);
    JobConf conf = new JobConf(AdjustMateEdge.class);
    conf.setJobName("AdjustMateEdge " + inputPath);

    conf.setLong("READS", reads);
    conf.setLong("CTG_SUM", ctg_sum);
    BrushConfig.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(AdjustMateEdgeMapper.class);
    conf.setReducerClass(AdjustMateEdgeReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Brush.Compressible.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Compressible");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    //JobConf conf = new JobConf(Stats.class);
    JobConf conf = new JobConf(Compressible.class);
    conf.setJobName("Compressible " + inputPath);

    BrushConfig.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(CompressibleMapper.class);
    conf.setReducerClass(CompressibleReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}