List of usage examples for org.apache.hadoop.mapreduce Job setCombinerClass
public void setCombinerClass(Class<? extends Reducer> cls) throws IllegalStateException
From source file:nl.utwente.trafficanalyzer.SensorCountPerRoadPerDay.java
License:Apache License
public void run(String inputPath, String outPath) throws Exception { Configuration conf = getConf(); Job job = Job.getInstance(conf); job.setJarByClass(SensorCountPerRoadPerDay.class); job.setJobName(String.format("%s [%s, %s]", this.getClass().getName(), inputPath, outPath)); // -- check if output directory already exists; and optionally delete String outputAlreadyExistsOption = "exit"; Path outDir = new Path(outPath); if (FileSystem.get(conf).exists(outDir)) { if (outputAlreadyExistsOption.equalsIgnoreCase("delete")) { FileSystem.get(conf).delete(outDir, true); } else {//from w w w . j ava 2 s .c om System.err.println("Directory " + outPath + " already exists; exiting"); System.exit(1); } } // ---- Input (Format) Options String inputFormat = "text"; if (inputFormat.equalsIgnoreCase("text")) { job.setInputFormatClass(TextInputFormat.class); } else if (inputFormat.equalsIgnoreCase("text")) { job.setInputFormatClass(SequenceFileInputFormat.class); } // Utils.recursivelyAddInputPaths(job, new Path(inputPath)); FileInputFormat.addInputPath(job, new Path(inputPath)); // Add files that should be available localy at each mapper // Utils.addCacheFiles(job, new String[] { }); // ---- Mapper job.setMapperClass(MyMapper.class); job.setMapOutputKeyClass(MyMapper.KOUT); job.setMapOutputValueClass(MyMapper.VOUT); // ---- Combiner job.setCombinerClass(MyCombiner.class); // ---- Partitioner // job.setPartitionerClass(MyPartitioner.class); // ---- Reducer // set the number of reducers to influence the number of output files job.setNumReduceTasks(1); job.setReducerClass(MyReducer.class); job.setOutputKeyClass(MyReducer.KOUT); job.setOutputValueClass(MyReducer.VOUT); // ---- Output Options String outputFormat = "text"; if (outputFormat.equalsIgnoreCase("sequence")) { job.setOutputFormatClass(SequenceFileOutputFormat.class); } else if (outputFormat.equalsIgnoreCase("text")) { job.setOutputFormatClass(TextOutputFormat.class); } else if (outputFormat.equalsIgnoreCase("null")) { job.setOutputFormatClass(NullOutputFormat.class); } FileOutputFormat.setOutputPath(job, outDir); FileOutputFormat.setCompressOutput(job, false); // ---- Start job job.waitForCompletion(true); return; }
From source file:org.acacia.csr.java.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { /*/*from w w w .j av a 2 s . com*/ String dir1 = "/user/miyuru/wcout"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if(fs1.exists(new Path(dir1))){ fs1.delete(new Path(dir1), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); FileInputFormat.addInputPath(job, new Path("/user/miyuru/input")); FileOutputFormat.setOutputPath(job, new Path(dir1)); job.waitForCompletion(true); */ String dir3 = "/user/miyuru/wcout"; String dir5 = "/user/miyuru/input"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs3 = FileSystem.get(new JobConf()); if (fs3.exists(new Path(dir3))) { fs3.delete(new Path(dir3), true); } JobConf conf3 = new JobConf(); conf3.setNumMapTasks(96); FileInputFormat.addInputPath(conf3, new Path(dir5)); FileOutputFormat.setOutputPath(conf3, new Path(dir3)); Job job3 = new Job(conf3, "word count"); job3.setJarByClass(WordCount.class); job3.setMapperClass(TokenizerMapper.class); job3.setCombinerClass(IntSumReducer.class); job3.setReducerClass(IntSumReducer.class); job3.setOutputKeyClass(LongWritable.class); job3.setOutputValueClass(LongWritable.class); job3.setSortComparatorClass(SortComparator.class); job3.waitForCompletion(true); PrintWriter writer; try { writer = new PrintWriter("/tmp/wfile", "UTF-8"); writer.println(""); writer.flush(); writer.close(); } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (UnsupportedEncodingException e) { // TODO Auto-generated catch block e.printStackTrace(); } System.out.println("------Done Word Count---------------"); }
From source file:org.acacia.csr.java.ZeroVertexSearcher.java
License:Apache License
public static void main(String[] args) throws Exception { /*//from ww w . j a va 2s . c om String dir1 = "/user/miyuru/wcout"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs1 = FileSystem.get(new JobConf()); if(fs1.exists(new Path(dir1))){ fs1.delete(new Path(dir1), true); } JobConf conf = new JobConf(); conf.setNumMapTasks(96); Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(LongWritable.class); job.setSortComparatorClass(SortComparator.class); FileInputFormat.addInputPath(job, new Path("/user/miyuru/input")); FileOutputFormat.setOutputPath(job, new Path(dir1)); job.waitForCompletion(true); */ String dir3 = "/user/miyuru/zout"; String dir5 = "/user/miyuru/input"; //We first delete the temporary directories if they exist on the HDFS FileSystem fs3 = FileSystem.get(new JobConf()); if (fs3.exists(new Path(dir3))) { fs3.delete(new Path(dir3), true); } JobConf conf3 = new JobConf(); conf3.setNumMapTasks(96); FileInputFormat.addInputPath(conf3, new Path(dir5)); FileOutputFormat.setOutputPath(conf3, new Path(dir3)); conf3.set("mapred.map.max.attempts", "0");//If the job fails we assume that it happens because we found zero. Therfore we do not attempt again. Job job3 = new Job(conf3, "zero_vertex_search"); job3.setJarByClass(ZeroVertexSearcher.class); job3.setMapperClass(TokenizerMapper.class); job3.setCombinerClass(IntSumReducer.class); job3.setReducerClass(IntSumReducer.class); job3.setOutputKeyClass(LongWritable.class); job3.setOutputValueClass(LongWritable.class); job3.setNumReduceTasks(0); job3.setSortComparatorClass(SortComparator.class); try { job3.waitForCompletion(true); } catch (org.acacia.csr.java.ZeroFoundException ex) { System.out.println("Found Zero vertex"); job3.killJob(); } System.out.println("------Done Zero Vertex search---------------"); }
From source file:org.apache.accumulo.examples.mapreduce.UniqueColumns.java
License:Apache License
@Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(UniqueColumns.class.getName(), args); String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = Job.getInstance(getConf()); job.setJobName(jobName);/*from w ww. ja va 2 s. c om*/ job.setJarByClass(this.getClass()); String clone = opts.getTableName(); Connector conn = null; opts.setAccumuloConfigs(job); if (opts.offline) { /* * this example clones the table and takes it offline. If you plan to run map reduce jobs over a table many times, it may be more efficient to compact the * table, clone it, and then keep using the same clone as input for map reduce. */ conn = opts.getConnector(); clone = opts.getTableName() + "_" + jobName; conn.tableOperations().clone(opts.getTableName(), clone, true, new HashMap<String, String>(), new HashSet<String>()); conn.tableOperations().offline(clone); AccumuloInputFormat.setOfflineTableScan(job, true); AccumuloInputFormat.setInputTableName(job, clone); } job.setInputFormatClass(AccumuloInputFormat.class); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(UReducer.class); job.setReducerClass(UReducer.class); job.setNumReduceTasks(opts.reducers); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(opts.output)); job.waitForCompletion(true); if (opts.offline) { conn.tableOperations().delete(clone); } return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.examples.simple.mapreduce.UniqueColumns.java
License:Apache License
@Override public int run(String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(UniqueColumns.class.getName(), args); String jobName = this.getClass().getSimpleName() + "_" + System.currentTimeMillis(); Job job = JobUtil.getJob(getConf()); job.setJobName(jobName);/*www . ja v a2 s .c o m*/ job.setJarByClass(this.getClass()); String clone = opts.getTableName(); Connector conn = null; opts.setAccumuloConfigs(job); if (opts.offline) { /* * this example clones the table and takes it offline. If you plan to run map reduce jobs over a table many times, it may be more efficient to compact the * table, clone it, and then keep using the same clone as input for map reduce. */ conn = opts.getConnector(); clone = opts.getTableName() + "_" + jobName; conn.tableOperations().clone(opts.getTableName(), clone, true, new HashMap<String, String>(), new HashSet<String>()); conn.tableOperations().offline(clone); AccumuloInputFormat.setOfflineTableScan(job, true); AccumuloInputFormat.setInputTableName(job, clone); } job.setInputFormatClass(AccumuloInputFormat.class); job.setMapperClass(UMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(UReducer.class); job.setReducerClass(UReducer.class); job.setNumReduceTasks(opts.reducers); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(opts.output)); job.waitForCompletion(true); if (opts.offline) { conn.tableOperations().delete(clone); } return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.airavata.gfac.hadoop.provider.impl.HadoopProvider.java
License:Apache License
public void execute(JobExecutionContext jobExecutionContext) throws GFacProviderException { HadoopApplicationDeploymentDescriptionType hadoopAppDesc = (HadoopApplicationDeploymentDescriptionType) jobExecutionContext .getApplicationContext().getApplicationDeploymentDescription().getType(); MessageContext inMessageContext = jobExecutionContext.getInMessageContext(); HadoopApplicationDeploymentDescriptionType.HadoopJobConfiguration jobConf = hadoopAppDesc .getHadoopJobConfiguration(); try {/*from w w w . j a v a 2 s . c om*/ // Preparing Hadoop configuration Configuration hadoopConf = HadoopUtils.createHadoopConfiguration(jobExecutionContext, isWhirrBasedDeployment, hadoopConfigDir); // Load jar containing map-reduce job implementation ArrayList<URL> mapRedJars = new ArrayList<URL>(); mapRedJars.add(new File(jobConf.getJarLocation()).toURL()); URLClassLoader childClassLoader = new URLClassLoader(mapRedJars.toArray(new URL[mapRedJars.size()]), this.getClass().getClassLoader()); Job job = new Job(hadoopConf); job.setJobName(jobConf.getJobName()); job.setOutputKeyClass(Class.forName(jobConf.getOutputKeyClass(), true, childClassLoader)); job.setOutputValueClass(Class.forName(jobConf.getOutputValueClass(), true, childClassLoader)); job.setMapperClass( (Class<? extends Mapper>) Class.forName(jobConf.getMapperClass(), true, childClassLoader)); job.setCombinerClass( (Class<? extends Reducer>) Class.forName(jobConf.getCombinerClass(), true, childClassLoader)); job.setReducerClass( (Class<? extends Reducer>) Class.forName(jobConf.getCombinerClass(), true, childClassLoader)); job.setInputFormatClass((Class<? extends InputFormat>) Class.forName(jobConf.getInputFormatClass(), true, childClassLoader)); job.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(jobConf.getOutputFormatClass(), true, childClassLoader)); FileInputFormat.setInputPaths(job, new Path(hadoopAppDesc.getInputDataDirectory())); FileOutputFormat.setOutputPath(job, new Path(hadoopAppDesc.getOutputDataDirectory())); job.waitForCompletion(true); System.out.println(job.getTrackingURL()); if (jobExecutionContext.getOutMessageContext() == null) { jobExecutionContext.setOutMessageContext(new MessageContext()); } OutputParameterType[] outputParametersArray = jobExecutionContext.getApplicationContext() .getServiceDescription().getType().getOutputParametersArray(); for (OutputParameterType outparamType : outputParametersArray) { String paramName = outparamType.getParameterName(); if (paramName.equals("test-hadoop")) { ActualParameter outParam = new ActualParameter(); outParam.getType().changeType(StringParameterType.type); ((StringParameterType) outParam.getType()).setValue(job.getTrackingURL()); jobExecutionContext.getOutMessageContext().addParameter("test-hadoop", outParam); } } } catch (Exception e) { String errMessage = "Error occurred during Map-Reduce job execution."; logger.error(errMessage, e); throw new GFacProviderException(errMessage, e); } }
From source file:org.apache.cassandra.example.hadoop.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { String outputReducerType = "filesystem"; if (args != null && args[0].startsWith(OUTPUT_REDUCER_VAR)) { String[] s = args[0].split("="); if (s != null && s.length == 2) outputReducerType = s[1];//ww w . ja va 2 s .com } logger.info("output reducer type: " + outputReducerType); for (int i = 0; i < WordCountSetup.TEST_COUNT; i++) { String columnName = "text" + i; getConf().set(CONF_COLUMN_NAME, columnName); Job job = new Job(getConf(), "wordcount"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); if (outputReducerType.equalsIgnoreCase("filesystem")) { job.setCombinerClass(ReducerToFilesystem.class); job.setReducerClass(ReducerToFilesystem.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH_PREFIX + i)); } else { job.setReducerClass(ReducerToCassandra.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(ByteBuffer.class); job.setOutputValueClass(List.class); job.setOutputFormatClass(ColumnFamilyOutputFormat.class); ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE, OUTPUT_COLUMN_FAMILY); } job.setInputFormatClass(ColumnFamilyInputFormat.class); ConfigHelper.setRpcPort(job.getConfiguration(), "9160"); ConfigHelper.setInitialAddress(job.getConfiguration(), "localhost"); ConfigHelper.setPartitioner(job.getConfiguration(), "org.apache.cassandra.dht.RandomPartitioner"); ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE, COLUMN_FAMILY); SlicePredicate predicate = new SlicePredicate() .setColumn_names(Arrays.asList(ByteBufferUtil.bytes(columnName))); ConfigHelper.setInputSlicePredicate(job.getConfiguration(), predicate); job.waitForCompletion(true); } return 0; }
From source file:org.apache.crunch.impl.mr.plan.JobPrototype.java
License:Apache License
private CrunchControlledJob build(Class<?> jarClass, Configuration conf, Pipeline pipeline, int numOfJobs) throws IOException { Job job = new Job(conf); conf = job.getConfiguration();/*from ww w.j av a 2 s . c o m*/ conf.set(PlanningParameters.CRUNCH_WORKING_DIRECTORY, workingPath.toString()); job.setJarByClass(jarClass); Set<DoNode> outputNodes = Sets.newHashSet(); Path outputPath = new Path(workingPath, "output"); MSCROutputHandler outputHandler = new MSCROutputHandler(job, outputPath, group == null); for (Target target : targetsToNodePaths.keySet()) { DoNode node = null; for (NodePath nodePath : targetsToNodePaths.get(target)) { if (node == null) { PType<?> ptype = nodePath.tail().getPType(); node = DoNode.createOutputNode(target.toString(), target.getConverter(ptype), ptype); outputHandler.configureNode(node, target); } outputNodes.add(walkPath(nodePath.descendingIterator(), node)); } } Set<DoNode> mapSideNodes = Sets.newHashSet(); if (mapSideNodePaths != null) { for (Target target : mapSideNodePaths.keySet()) { DoNode node = null; for (NodePath nodePath : mapSideNodePaths.get(target)) { if (node == null) { PType<?> ptype = nodePath.tail().getPType(); node = DoNode.createOutputNode(target.toString(), target.getConverter(ptype), ptype); outputHandler.configureNode(node, target); } mapSideNodes.add(walkPath(nodePath.descendingIterator(), node)); } } } job.setMapperClass(CrunchMapper.class); List<DoNode> inputNodes; DoNode reduceNode = null; if (group != null) { job.setReducerClass(CrunchReducer.class); List<DoNode> reduceNodes = Lists.newArrayList(outputNodes); serialize(reduceNodes, conf, workingPath, NodeContext.REDUCE); reduceNode = reduceNodes.get(0); if (combineFnTable != null) { job.setCombinerClass(CrunchCombiner.class); DoNode combinerInputNode = group.createDoNode(); DoNode combineNode = combineFnTable.createCombineNode(); combineNode.addChild(group.getGroupingNode()); combinerInputNode.addChild(combineNode); serialize(ImmutableList.of(combinerInputNode), conf, workingPath, NodeContext.COMBINE); } group.configureShuffle(job); DoNode mapOutputNode = group.getGroupingNode(); Set<DoNode> mapNodes = Sets.newHashSet(mapSideNodes); for (NodePath nodePath : mapNodePaths) { // Advance these one step, since we've already configured // the grouping node, and the PGroupedTableImpl is the tail // of the NodePath. Iterator<PCollectionImpl<?>> iter = nodePath.descendingIterator(); iter.next(); mapNodes.add(walkPath(iter, mapOutputNode)); } inputNodes = Lists.newArrayList(mapNodes); } else { // No grouping job.setNumReduceTasks(0); inputNodes = Lists.newArrayList(outputNodes); } serialize(inputNodes, conf, workingPath, NodeContext.MAP); if (inputNodes.size() == 1) { DoNode inputNode = inputNodes.get(0); inputNode.getSource().configureSource(job, -1); } else { for (int i = 0; i < inputNodes.size(); i++) { inputNodes.get(i).getSource().configureSource(job, i); } job.setInputFormatClass(CrunchInputFormat.class); } job.setJobName(createJobName(conf, pipeline.getName(), inputNodes, reduceNode, numOfJobs)); return new CrunchControlledJob(jobID, job, new CrunchJobHooks.PrepareHook(job), new CrunchJobHooks.CompletionHook(job, outputPath, outputHandler.getMultiPaths(), group == null)); }
From source file:org.apache.druid.indexer.DeterminePartitionsJob.java
License:Apache License
@Override public boolean run() { try {/*from w w w. j a v a 2 s.co m*/ /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!(config.getPartitionsSpec() instanceof SingleDimensionPartitionsSpec)) { throw new ISE( "DeterminePartitionsJob can only be run for SingleDimensionPartitionsSpec, partitionSpec found [%s]", config.getPartitionsSpec()); } final SingleDimensionPartitionsSpec partitionsSpec = (SingleDimensionPartitionsSpec) config .getPartitionsSpec(); if (!partitionsSpec.isAssumeGrouped()) { groupByJob = Job.getInstance(new Configuration(), StringUtils.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); JobHelper.injectSystemProperties(groupByJob); config.addJobProperties(groupByJob); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), groupByJob); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); // Store the jobId in the file if (groupByJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), groupByJob.getJobID().toString()); } try { if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); failureCause = Utils.getFailureMessage(groupByJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, groupByJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = Job.getInstance(new Configuration(), StringUtils.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); JobHelper.injectSystemProperties(dimSelectionJob); config.addJobProperties(dimSelectionJob); if (!partitionsSpec.isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob, DeterminePartitionsDimSelectionPartitioner.class); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setNumReduceTasks(config.getGranularitySpec().bucketIntervals().get().size()); JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()), JobHelper.distributedClassPath(config.makeIntermediatePath()), dimSelectionJob); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); // Store the jobId in the file if (dimSelectionJob.getJobID() != null) { JobHelper.writeJobIdToFile(config.getHadoopJobIdFileName(), dimSelectionJob.getJobID().toString()); } try { if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); failureCause = Utils.getFailureMessage(dimSelectionJob, config.JSON_MAPPER); return false; } } catch (IOException ioe) { if (!Utils.checkAppSuccessForJobIOException(ioe, dimSelectionJob, config.isUseYarnRMJobStatusFallback())) { throw ioe; } } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals().get()) { final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(segmentGranularity); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (Utils.exists(dimSelectionJob, fileSystem, partitionInfoPath)) { List<ShardSpec> specs = config.JSON_MAPPER.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", segmentGranularity, i, actualSpecs.get(i)); } shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.apache.hadoop.examples.DBCountPageView.java
License:Apache License
@Override //Usage DBCountPageView [driverClass dburl] public int run(String[] args) throws Exception { String driverClassName = DRIVER_CLASS; String url = DB_URL;/*from ww w.ja va 2 s.co m*/ if (args.length > 1) { driverClassName = args[0]; url = args[1]; } initialize(driverClassName, url); Configuration conf = getConf(); DBConfiguration.configureDB(conf, driverClassName, url); Job job = Job.getInstance(conf); job.setJobName("Count Pageviews of URLs"); job.setJarByClass(DBCountPageView.class); job.setMapperClass(PageviewMapper.class); job.setCombinerClass(LongSumReducer.class); job.setReducerClass(PageviewReducer.class); DBInputFormat.setInput(job, AccessRecord.class, "HAccess", null, "url", AccessFieldNames); DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(PageviewRecord.class); job.setOutputValueClass(NullWritable.class); int ret; try { ret = job.waitForCompletion(true) ? 0 : 1; boolean correct = verify(); if (!correct) { throw new RuntimeException("Evaluation was not correct!"); } } finally { shutdown(); } return ret; }