List of usage examples for org.apache.hadoop.mapred Reporter NULL
Reporter NULL
To view the source code for org.apache.hadoop.mapred Reporter NULL.
Click Source Link
From source file:com.inmobi.databus.readers.DatabusStreamReader.java
License:Apache License
protected boolean openCurrentFile(boolean next) throws IOException { closeCurrentFile();// ww w .ja v a 2 s. c o m if (getCurrentFile() == null) { return false; } if (next) { resetCurrentFileSettings(); } LOG.info("Opening file:" + getCurrentFile() + " NumLinesTobeSkipped when" + " opening:" + currentLineNum); try { FileStatus status = fsGetFileStatus(getCurrentFile()); if (status != null) { currentFileSplit = new FileSplit(getCurrentFile(), 0L, status.getLen(), new String[0]); recordReader = input.getRecordReader(currentFileSplit, new JobConf(conf), Reporter.NULL); metrics.incrementNumberRecordReaders(); msgKey = recordReader.createKey(); msgValue = recordReader.createValue(); if (msgValue instanceof Writable) { needsSerialize = true; } else { assert (msgValue instanceof Message); needsSerialize = false; } skipLines(currentLineNum); } else { LOG.info("CurrentFile:" + getCurrentFile() + " does not exist"); } } catch (FileNotFoundException fnfe) { LOG.info("CurrentFile:" + getCurrentFile() + " does not exist"); } return true; }
From source file:com.m6d.filecrush.crush.Crush.java
License:Apache License
private void standAlone() throws IOException { String absSrcDir = fs.makeQualified(srcDir).toUri().getPath(); String absOutDir = fs.makeQualified(outDir).toUri().getPath(); Text bucket = new Text(absSrcDir + "-0"); List<Text> files = new ArrayList<Text>(); FileStatus[] contents = fs.listStatus(new Path(absSrcDir)); for (FileStatus content : contents) { if (!content.isDir()) { if (ignoredFilesMatcher != null) { // Check for files to skip ignoredFilesMatcher.reset(content.getPath().toUri().getPath()); if (ignoredFilesMatcher.matches()) { LOG.info("Ignoring " + content.getPath().toString()); continue; }//from w ww. ja va 2s.co m } files.add(new Text(content.getPath().toUri().getPath())); } } /* * Is the directory empty? */ if (files.isEmpty()) { return; } /* * We trick the reducer into doing some work for us by setting these configuration properties. */ job.set("mapred.tip.id", "task_000000000000_00000_r_000000"); job.set("mapred.task.id", "attempt_000000000000_0000_r_000000_0"); job.set("mapred.output.dir", absOutDir); /* * File output committer needs this. */ fs.mkdirs(new Path(absOutDir, "_temporary")); CrushReducer reducer = new CrushReducer(); reducer.configure(job); reducer.reduce(bucket, files.iterator(), new NullOutputCollector<Text, Text>(), Reporter.NULL); reducer.close(); /* * Use a glob here because the temporary and task attempt work dirs have funny names. * Include a * at the end to cover wildcards for compressed files. */ Path crushOutput = new Path(absOutDir + "/*/*/crush" + absSrcDir + "/" + dest.getName() + "*"); FileStatus[] statuses = fs.globStatus(crushOutput); if (statuses == null || 1 != statuses.length) { throw new AssertionError("Did not find the expected output in " + crushOutput.toString()); } rename(statuses[0].getPath(), dest.getParent(), dest.getName()); }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapred.java
License:Apache License
/** * Runs mapper for the single split./*from w ww .j a v a 2 s . co m*/ * * @param mapOutputAccumulator mapOutputAccumulator to use * @param split split ot run on */ @Override @SuppressWarnings("unchecked") public void runSplit(final MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex) throws IOException, ClassNotFoundException, InterruptedException { JobConf jobConf = new JobConf(this.jobConf); //Clone JobConf to prevent unexpected task interaction TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex)); ReducerWrapperMapred.updateJobConf(jobConf, taskAttemptID, splitIndex); updateJobWithSplit(jobConf, split); InputFormat inputFormat = jobConf.getInputFormat(); Reporter reporter = Reporter.NULL; //Create RecordReader org.apache.hadoop.mapred.RecordReader<INKEY, INVALUE> recordReader = inputFormat .getRecordReader((InputSplit) split, jobConf, reporter); //Make a mapper org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper; try { mapper = (org.apache.hadoop.mapred.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor .newInstance(); mapper.configure(jobConf); } catch (Exception e) { throw new RuntimeException("Cannot instantiate mapper " + mapperConstructor.getDeclaringClass(), e); } //These are to support map only jobs which write output directly to HDFS. final RecordWriter outputRecordWriter; OutputCommitter outputCommitter = null; TaskAttemptContext taskAttemptContext = null; if (mapOnlyJob) { taskAttemptContext = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); outputRecordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat .getRecordWriter(fs, jobConf, ReducerWrapperMapred.getOutputName(splitIndex), Reporter.NULL); outputCommitter = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The MapTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.MapTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, JobSplit.TaskSplitIndex.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, splitIndex, new JobSplit.TaskSplitIndex(), 0); task.setConf(jobConf); task.initialize(jobConf, jobId, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize MapTask", e); } outputCommitter.setupTask(taskAttemptContext); } else { outputRecordWriter = null; } OutputCollector<OUTKEY, OUTVALUE> outputCollector; if (!mapOnlyJob) { outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { try { mapOutputAccumulator.combine(outkey, outvalue); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } }; } else { outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { outputRecordWriter.write(outkey, outvalue); } }; } INKEY key = recordReader.createKey(); INVALUE value = recordReader.createValue(); while (recordReader.next(key, value)) { mapper.map(key, value, outputCollector, reporter); } mapper.close(); recordReader.close(); if (mapOnlyJob) { outputRecordWriter.close(Reporter.NULL); outputCommitter.commitTask(taskAttemptContext); } }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
public ReducerWrapperMapred(HServerInvocationParameters invocationParameters, int hadoopPartition, int appId, int region, boolean sort) throws IOException, ClassNotFoundException, InterruptedException { this.invocationParameters = invocationParameters; JobConf jobConf = new JobConf((Configuration) invocationParameters.getConfiguration()); //Clone JobConf, so the temporary settings do not pollute other tasks LOG.info("Starting reducer:" + HadoopInvocationParameters.dumpConfiguration(jobConf)); JobID jobID = (JobID) invocationParameters.getJobId(); this.hadoopPartition = hadoopPartition; hadoopVersionSpecificCode = HadoopVersionSpecificCode.getInstance(invocationParameters.getHadoopVersion(), jobConf);//w w w . j av a 2 s . co m TaskAttemptID taskAttemptID = TaskAttemptID .downgrade(hadoopVersionSpecificCode.createTaskAttemptId(jobID, false, hadoopPartition)); updateJobConf(jobConf, taskAttemptID, region); context = hadoopVersionSpecificCode.createTaskAttemptContextMapred(jobConf, taskAttemptID); reducer = (org.apache.hadoop.mapred.Reducer<INKEY, INVALUE, OUTKEY, OUTVALUE>) ReflectionUtils .newInstance(jobConf.getReducerClass(), jobConf); reducer.configure(jobConf); OutputFormat outputFormat = jobConf.getOutputFormat(); FileSystem fs = FileSystem.get(jobConf); recordWriter = (org.apache.hadoop.mapred.RecordWriter<OUTKEY, OUTVALUE>) outputFormat.getRecordWriter(fs, jobConf, getOutputName(hadoopPartition), Reporter.NULL); committer = jobConf.getOutputCommitter(); //Create task object so it can handle file format initialization //The ReduceTask is private in the Hadoop 1.x so we have to go through reflection. try { Class reduceTask = Class.forName("org.apache.hadoop.mapred.ReduceTask"); Constructor reduceTaskConstructor = reduceTask.getDeclaredConstructor(String.class, TaskAttemptID.class, int.class, int.class, int.class); reduceTaskConstructor.setAccessible(true); Task task = (Task) reduceTaskConstructor.newInstance(null, taskAttemptID, hadoopPartition, 0, 0); task.setConf(jobConf); task.initialize(jobConf, jobID, Reporter.NULL, false); } catch (Exception e) { throw new IOException("Cannot initialize ReduceTask", e); } committer.setupTask(context); Class<INKEY> keyClass = (Class<INKEY>) jobConf.getMapOutputKeyClass(); WritableSerializerDeserializer<INKEY> firstKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); WritableSerializerDeserializer<INKEY> secondKeySerializer = new WritableSerializerDeserializer<INKEY>( keyClass, null); Class<INVALUE> valueClass = (Class<INVALUE>) jobConf.getMapOutputValueClass(); WritableSerializerDeserializer<INVALUE> valueSerializer = new WritableSerializerDeserializer<INVALUE>( valueClass, null); DataGridReaderParameters<INKEY, INVALUE> params = new DataGridReaderParameters<INKEY, INVALUE>(region, appId, HServerParameters.getSetting(REDUCE_USEMEMORYMAPPEDFILES, jobConf) > 0, firstKeySerializer, valueSerializer, invocationParameters.getSerializationMode(), secondKeySerializer, keyClass, valueClass, sort, HServerParameters.getSetting(REDUCE_CHUNKSTOREADAHEAD, jobConf), 1024 * HServerParameters.getSetting(REDUCE_INPUTCHUNKSIZE_KB, jobConf), HServerParameters.getSetting(REDUCE_CHUNKREADTIMEOUT, jobConf)); transport = DataGridChunkedCollectionReader.getGridReader(params); outputCollector = new OutputCollector<OUTKEY, OUTVALUE>() { @Override public void collect(OUTKEY outkey, OUTVALUE outvalue) throws IOException { recordWriter.write(outkey, outvalue); } }; }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
@Override public void runReducer() throws IOException, InterruptedException { LOG.info("Starting reduce:" + hadoopPartition + "," + getOutputName(hadoopPartition) + "," + recordWriter); //Run the reducer try {/*from ww w . j av a 2 s . c om*/ while (transport.readNext()) { reducer.reduce(transport.getKey(), transport.getValue().iterator(), outputCollector, Reporter.NULL); } reducer.close(); recordWriter.close(Reporter.NULL); committer.commitTask(context); LOG.info("Reduce done:" + hadoopPartition + "," + getOutputName(hadoopPartition) + "," + recordWriter); } catch (IOException e) { committer.abortTask(context); throw new IOException("Exception occurred during reduce: " + hadoopPartition + "," + getOutputName(hadoopPartition) + "," + recordWriter, e); } catch (TimeoutException e) { committer.abortTask(context); throw new IOException(e); } }
From source file:com.scaleoutsoftware.soss.hserver.hadoop.ReducerWrapperMapred.java
License:Apache License
@Override public void reduce(INKEY key, Iterable<INVALUE> values) throws IOException { reducer.reduce(key, values.iterator(), outputCollector, Reporter.NULL); }
From source file:com.twitter.maple.jdbc.JDBCTapCollector.java
License:Apache License
private void initialize() throws IOException { tap.sinkConfInit(hadoopFlowProcess, conf); OutputFormat outputFormat = conf.getOutputFormat(); LOG.info("Output format class is: " + outputFormat.getClass().toString()); writer = outputFormat.getRecordWriter(null, conf, tap.getIdentifier(), Reporter.NULL); sinkCall.setOutput(this); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void text() throws Exception { String input = "On the top of the Crumpetty Tree\n" + "The Quangle Wangle sat,\n" + "But his face you could not see,\n" + "On account of his Beaver Hat."; writeInput(input);//from www . j a va2 s.c o m TextInputFormat format = new TextInputFormat(); format.configure(conf); InputSplit[] splits = format.getSplits(conf, 1); RecordReader<LongWritable, Text> recordReader = format.getRecordReader(splits[0], conf, Reporter.NULL); checkNextLine(recordReader, 0, "On the top of the Crumpetty Tree"); checkNextLine(recordReader, 33, "The Quangle Wangle sat,"); checkNextLine(recordReader, 57, "But his face you could not see,"); checkNextLine(recordReader, 89, "On account of his Beaver Hat."); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void keyValue() throws Exception { String input = "line1\tOn the top of the Crumpetty Tree\n" + "line2\tThe Quangle Wangle sat,\n" + "line3\tBut his face you could not see,\n" + "line4\tOn account of his Beaver Hat."; writeInput(input);//from w w w . java2 s . co m KeyValueTextInputFormat format = new KeyValueTextInputFormat(); format.configure(conf); InputSplit[] splits = format.getSplits(conf, 1); RecordReader<Text, Text> recordReader = format.getRecordReader(splits[0], conf, Reporter.NULL); checkNextLine(recordReader, "line1", "On the top of the Crumpetty Tree"); checkNextLine(recordReader, "line2", "The Quangle Wangle sat,"); checkNextLine(recordReader, "line3", "But his face you could not see,"); checkNextLine(recordReader, "line4", "On account of his Beaver Hat."); }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void nLine() throws Exception { String input = "On the top of the Crumpetty Tree\n" + "The Quangle Wangle sat,\n" + "But his face you could not see,\n" + "On account of his Beaver Hat."; writeInput(input);// w w w. j a v a 2 s . co m conf.setInt("mapred.line.input.format.linespermap", 2); NLineInputFormat format = new NLineInputFormat(); format.configure(conf); InputSplit[] splits = format.getSplits(conf, 2); RecordReader<LongWritable, Text> recordReader = format.getRecordReader(splits[0], conf, Reporter.NULL); checkNextLine(recordReader, 0, "On the top of the Crumpetty Tree"); checkNextLine(recordReader, 33, "The Quangle Wangle sat,"); recordReader = format.getRecordReader(splits[1], conf, Reporter.NULL); checkNextLine(recordReader, 57, "But his face you could not see,"); checkNextLine(recordReader, 89, "On account of his Beaver Hat."); }