Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:babel.prep.langidtime.LangAndTimeExtractor.java

License:Apache License

/**
 * Configures a map-only language id job.
 *///from ww  w.  j  a  va2s.c  o  m
protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("identify languages and collect time for pages in " + pagesSubDir);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setMapperClass(LangAndTimeMapper.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    //ANNI EDIT
    job.setNumMapTasks(2);
    job.setNumReduceTasks(2);
    //END ANNI EDIT

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langidtime." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    job.set(JOB_PROP_JOB_REFERRER, referrer);

    return job;
}

From source file:babel.prep.merge.PageMerger.java

License:Apache License

/**
 * Configures a reduce-only page merge job.
 *//*from   w w  w .ja va  2  s.  c  o  m*/
protected JobConf createJobConf(String crawlDir, String pagesSubDirOne, String pagesSubDirTwo)
        throws IOException {
    JobConf job = new JobConf(getConf());
    job.setJobName("merge pages in " + pagesSubDirOne + " and " + pagesSubDirTwo);

    job.setInputFormat(SequenceFileInputFormat.class);
    job.setReducerClass(PageMergeReducer.class);
    job.setOutputFormat(SequenceFileOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Page.class);

    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirOne));
    FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirTwo));

    Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.merge." + getCurTimeStamp());
    m_fs.delete(outDir, true);

    FileOutputFormat.setOutputPath(job, outDir);

    setUniqueTempDir(job);

    return job;
}

From source file:babel.prep.PrepStep.java

License:Apache License

public PrepStep() throws IOException {
    this(new JobConf(PrepStep.class));
    m_fs = FileSystem.get(getConf());
}

From source file:bixo.config.BixoPlatform.java

License:Apache License

public BixoPlatform(Class applicationJarClass, Configuration conf) throws Exception {
    this(applicationJarClass, new JobConf(conf));
}

From source file:boa.aggregators.MLAggregator.java

License:Apache License

public void saveModel(Object model) {
     FSDataOutputStream out = null;/*  w  w w. j  ava 2s.co m*/
     FileSystem fileSystem = null;
     Path filePath = null;
     try {
         JobContext context = (JobContext) getContext();
         Configuration configuration = context.getConfiguration();
         int boaJobId = configuration.getInt("boa.hadoop.jobid", 0);
         JobConf job = new JobConf(configuration);
         Path outputPath = FileOutputFormat.getOutputPath(job);
         fileSystem = outputPath.getFileSystem(context.getConfiguration());

         fileSystem.mkdirs(new Path("/boa", new Path("" + boaJobId)));
         filePath = new Path("/boa",
                 new Path("" + boaJobId, new Path(("" + getKey()).split("\\[")[0] + "ML.model")));

         if (fileSystem.exists(filePath))
             return;

         out = fileSystem.create(filePath);
         ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream();
         ObjectOutputStream objectOut = new ObjectOutputStream(byteOutStream);
         objectOut.writeObject(model);
         objectOut.close();

         byte[] serializedObject = byteOutStream.toByteArray();
         out.write(serializedObject, 0, serializedObject.length);

         this.collect(filePath.toString());

     } catch (Exception e) {
         e.printStackTrace();
     } finally {
         try {
             if (out != null)
                 out.close();
         } catch (final Exception e) {
             e.printStackTrace();
         }
     }
 }

From source file:boa.io.BoaOutputCommitter.java

License:Apache License

@Override
public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException {
    super.abortJob(context, runState);

    final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration()));
    final RunningJob job = jobClient.getJob(
            (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id")));
    String diag = "";
    for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0))
        switch (event.getTaskStatus()) {
        case SUCCEEDED:
            break;
        case FAILED:
        case KILLED:
        case OBSOLETE:
        case TIPFAILED:
            diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n";
            for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId()))
                diag += s + "\n";
            diag += "\n";
            break;
        }//from   w  w w. ja  v a  2 s . c  o m
    updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0));
}

From source file:br.eti.kinoshita.hadoop.WordCount.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(WordCount.class);
    conf.setJarByClass(WordCount.class);
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    conf.setMapperClass(Map.class);
    conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    //FileInputFormat.setInputPaths(conf, new Path("hdfs://chuva:9000/test/leiseca."));
    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);/* ww  w .  j  av  a  2 s.com*/
}

From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMain.java

License:Open Source License

public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(IndexerMain.class);
    conf.setJobName("indexer");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(IndexerMap.class);
    conf.setCombinerClass(IndexerReduce.class);
    conf.setReducerClass(IndexerReduce.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);//from  w  w w. j a va 2 s  . c  o  m
}

From source file:Brush.AdjustMateEdge.java

License:Apache License

public RunningJob run(String inputPath, String outputPath, long reads, long ctg_sum) throws Exception {
    sLogger.info("Tool name: AdjustMateEdge");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    //JobConf conf = new JobConf(Stats.class);
    JobConf conf = new JobConf(AdjustMateEdge.class);
    conf.setJobName("AdjustMateEdge " + inputPath);

    conf.setLong("READS", reads);
    conf.setLong("CTG_SUM", ctg_sum);
    BrushConfig.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(AdjustMateEdgeMapper.class);
    conf.setReducerClass(AdjustMateEdgeReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}

From source file:Brush.Compressible.java

License:Apache License

public RunningJob run(String inputPath, String outputPath) throws Exception {
    sLogger.info("Tool name: Compressible");
    sLogger.info(" - input: " + inputPath);
    sLogger.info(" - output: " + outputPath);

    //JobConf conf = new JobConf(Stats.class);
    JobConf conf = new JobConf(Compressible.class);
    conf.setJobName("Compressible " + inputPath);

    BrushConfig.initializeConfiguration(conf);

    FileInputFormat.addInputPath(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(CompressibleMapper.class);
    conf.setReducerClass(CompressibleReducer.class);

    //delete the output directory if it exists already
    FileSystem.get(conf).delete(new Path(outputPath), true);

    return JobClient.runJob(conf);
}