List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:babel.prep.langidtime.LangAndTimeExtractor.java
License:Apache License
/** * Configures a map-only language id job. *///from ww w. j a va2s.c o m protected JobConf createJobConf(String crawlDir, String pagesSubDir, String referrer) throws IOException { JobConf job = new JobConf(getConf()); job.setJobName("identify languages and collect time for pages in " + pagesSubDir); job.setInputFormat(SequenceFileInputFormat.class); job.setMapperClass(LangAndTimeMapper.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Page.class); //ANNI EDIT job.setNumMapTasks(2); job.setNumReduceTasks(2); //END ANNI EDIT FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDir)); Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.langidtime." + getCurTimeStamp()); m_fs.delete(outDir, true); FileOutputFormat.setOutputPath(job, outDir); setUniqueTempDir(job); job.set(JOB_PROP_JOB_REFERRER, referrer); return job; }
From source file:babel.prep.merge.PageMerger.java
License:Apache License
/** * Configures a reduce-only page merge job. *//*from w w w .ja va 2 s. c o m*/ protected JobConf createJobConf(String crawlDir, String pagesSubDirOne, String pagesSubDirTwo) throws IOException { JobConf job = new JobConf(getConf()); job.setJobName("merge pages in " + pagesSubDirOne + " and " + pagesSubDirTwo); job.setInputFormat(SequenceFileInputFormat.class); job.setReducerClass(PageMergeReducer.class); job.setOutputFormat(SequenceFileOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Page.class); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirOne)); FileInputFormat.addInputPath(job, new Path(crawlDir, pagesSubDirTwo)); Path outDir = new Path(new Path(crawlDir, PAGES_SUBDIR), "pages.merge." + getCurTimeStamp()); m_fs.delete(outDir, true); FileOutputFormat.setOutputPath(job, outDir); setUniqueTempDir(job); return job; }
From source file:babel.prep.PrepStep.java
License:Apache License
public PrepStep() throws IOException { this(new JobConf(PrepStep.class)); m_fs = FileSystem.get(getConf()); }
From source file:bixo.config.BixoPlatform.java
License:Apache License
public BixoPlatform(Class applicationJarClass, Configuration conf) throws Exception { this(applicationJarClass, new JobConf(conf)); }
From source file:boa.aggregators.MLAggregator.java
License:Apache License
public void saveModel(Object model) { FSDataOutputStream out = null;/* w w w. j ava 2s.co m*/ FileSystem fileSystem = null; Path filePath = null; try { JobContext context = (JobContext) getContext(); Configuration configuration = context.getConfiguration(); int boaJobId = configuration.getInt("boa.hadoop.jobid", 0); JobConf job = new JobConf(configuration); Path outputPath = FileOutputFormat.getOutputPath(job); fileSystem = outputPath.getFileSystem(context.getConfiguration()); fileSystem.mkdirs(new Path("/boa", new Path("" + boaJobId))); filePath = new Path("/boa", new Path("" + boaJobId, new Path(("" + getKey()).split("\\[")[0] + "ML.model"))); if (fileSystem.exists(filePath)) return; out = fileSystem.create(filePath); ByteArrayOutputStream byteOutStream = new ByteArrayOutputStream(); ObjectOutputStream objectOut = new ObjectOutputStream(byteOutStream); objectOut.writeObject(model); objectOut.close(); byte[] serializedObject = byteOutStream.toByteArray(); out.write(serializedObject, 0, serializedObject.length); this.collect(filePath.toString()); } catch (Exception e) { e.printStackTrace(); } finally { try { if (out != null) out.close(); } catch (final Exception e) { e.printStackTrace(); } } }
From source file:boa.io.BoaOutputCommitter.java
License:Apache License
@Override public void abortJob(JobContext context, JobStatus.State runState) throws java.io.IOException { super.abortJob(context, runState); final JobClient jobClient = new JobClient(new JobConf(context.getConfiguration())); final RunningJob job = jobClient.getJob( (org.apache.hadoop.mapred.JobID) JobID.forName(context.getConfiguration().get("mapred.job.id"))); String diag = ""; for (final TaskCompletionEvent event : job.getTaskCompletionEvents(0)) switch (event.getTaskStatus()) { case SUCCEEDED: break; case FAILED: case KILLED: case OBSOLETE: case TIPFAILED: diag += "Diagnostics for: " + event.getTaskTrackerHttp() + "\n"; for (final String s : job.getTaskDiagnostics(event.getTaskAttemptId())) diag += s + "\n"; diag += "\n"; break; }//from w w w. ja v a 2 s . c o m updateStatus(diag, context.getConfiguration().getInt("boa.hadoop.jobid", 0)); }
From source file:br.eti.kinoshita.hadoop.WordCount.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(WordCount.class); conf.setJarByClass(WordCount.class); conf.setJobName("wordcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); conf.setMapperClass(Map.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); //FileInputFormat.setInputPaths(conf, new Path("hdfs://chuva:9000/test/leiseca.")); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);/* ww w . j av a 2 s.com*/ }
From source file:br.ufrj.nce.recureco.distributedindex.indexer.IndexerMain.java
License:Open Source License
public static void main(String[] args) throws Exception { JobConf conf = new JobConf(IndexerMain.class); conf.setJobName("indexer"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(IndexerMap.class); conf.setCombinerClass(IndexerReduce.class); conf.setReducerClass(IndexerReduce.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf);//from w w w. j a va 2 s . c o m }
From source file:Brush.AdjustMateEdge.java
License:Apache License
public RunningJob run(String inputPath, String outputPath, long reads, long ctg_sum) throws Exception { sLogger.info("Tool name: AdjustMateEdge"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(AdjustMateEdge.class); conf.setJobName("AdjustMateEdge " + inputPath); conf.setLong("READS", reads); conf.setLong("CTG_SUM", ctg_sum); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AdjustMateEdgeMapper.class); conf.setReducerClass(AdjustMateEdgeReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }
From source file:Brush.Compressible.java
License:Apache License
public RunningJob run(String inputPath, String outputPath) throws Exception { sLogger.info("Tool name: Compressible"); sLogger.info(" - input: " + inputPath); sLogger.info(" - output: " + outputPath); //JobConf conf = new JobConf(Stats.class); JobConf conf = new JobConf(Compressible.class); conf.setJobName("Compressible " + inputPath); BrushConfig.initializeConfiguration(conf); FileInputFormat.addInputPath(conf, new Path(inputPath)); FileOutputFormat.setOutputPath(conf, new Path(outputPath)); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(CompressibleMapper.class); conf.setReducerClass(CompressibleReducer.class); //delete the output directory if it exists already FileSystem.get(conf).delete(new Path(outputPath), true); return JobClient.runJob(conf); }