List of usage examples for org.apache.hadoop.mapreduce MRJobConfig SKIP_RECORDS
String SKIP_RECORDS
To view the source code for org.apache.hadoop.mapreduce MRJobConfig SKIP_RECORDS.
Click Source Link
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test PipesMapRunner test the transfer data from reader * * @throws Exception/*from w w w. j a v a 2s . com*/ */ @Test public void testRunner() throws Exception { // clean old password files File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(Submitter.IS_JAVA_RR, "true"); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); job.setInputFormatClass(DummyInputFormat.class); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); RecordReader<FloatWritable, NullWritable> rReader = input_format.createRecordReader(isplit, tcontext); TestMapContext context = new TestMapContext(conf, taskAttemptid, rReader, null, null, null, isplit); // stub for client File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); // token for authorization Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); PipesMapper<FloatWritable, NullWritable, IntWritable, Text> mapper = new PipesMapper<FloatWritable, NullWritable, IntWritable, Text>( context); initStdOut(conf); mapper.run(context); String stdOut = readStdOut(conf); // test part of translated data. As common file for client and test - // clients stdOut // check version assertTrue(stdOut.contains("CURRENT_PROTOCOL_VERSION:0")); // check key and value classes assertTrue(stdOut.contains("Key class:org.apache.hadoop.io.FloatWritable")); assertTrue(stdOut.contains("Value class:org.apache.hadoop.io.NullWritable")); // test have sent all data from reader assertTrue(stdOut.contains("value:0.0")); assertTrue(stdOut.contains("value:9.0")); } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test org.apache.hadoop.mapreduce.pipes.Application * test a internal functions: /*from w w w . j a v a 2 s .co m*/ * MessageType.REGISTER_COUNTER, INCREMENT_COUNTER, STATUS, PROGRESS... * * @throws Throwable */ @Test public void testApplication() throws Throwable { System.err.println("testApplication"); File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationStub"); //getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeApplicationRunnableStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); System.err.println("fCommand" + fCommand.getAbsolutePath()); Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); TestReporter reporter = new TestReporter(); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); DummyRecordReader reader = (DummyRecordReader) input_format.createRecordReader(isplit, tcontext); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(Text.class); RecordWriter<IntWritable, Text> writer = new TestRecordWriter( new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile")); MapContextImpl<IntWritable, Text, IntWritable, Text> context = new MapContextImpl<IntWritable, Text, IntWritable, Text>( conf, taskAttemptid, null, writer, null, reporter, null); System.err.println("ready to launch application"); Application<IntWritable, Text, IntWritable, Text> application = new Application<IntWritable, Text, IntWritable, Text>( context, reader); System.err.println("done"); application.getDownlink().flush(); application.getDownlink().mapItem(new IntWritable(3), new Text("txt")); application.getDownlink().flush(); application.waitForFinish(); // test getDownlink().mapItem(); String stdOut = readStdOut(conf); assertTrue(stdOut.contains("key:3")); assertTrue(stdOut.contains("value:txt")); assertEquals(0.0, context.getProgress(), 0.01); assertNotNull(context.getCounter("group", "name")); // test status MessageType.STATUS assertEquals(context.getStatus(), "PROGRESS"); // check MessageType.PROGRESS assertEquals(0.55f, reader.getProgress(), 0.001); application.getDownlink().close(); // test MessageType.OUTPUT stdOut = readFile(new File(workSpace.getAbsolutePath() + File.separator + "outfile")); assertTrue(stdOut.contains("key:123")); assertTrue(stdOut.contains("value:value")); try { // try to abort application.abort(new Throwable()); fail(); } catch (IOException e) { // abort works ? assertEquals("pipe child exception", e.getMessage()); } } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:it.crs4.pydoop.mapreduce.pipes.TestPipeApplication.java
License:Apache License
/** * test org.apache.hadoop.mapreduce.pipes.PipesReducer * test the transfer of data: key and value * * @throws Exception//w ww .ja va2 s. c o m */ @Test public void testPipesReducer() throws Exception { System.err.println("testPipesReducer"); File[] psw = cleanTokenPasswordFile(); try { JobID jobId = new JobID("201408272347", 0); TaskID taskId = new TaskID(jobId, TaskType.MAP, 0); TaskAttemptID taskAttemptid = new TaskAttemptID(taskId, 0); Job job = new Job(new Configuration()); job.setJobID(jobId); Configuration conf = job.getConfiguration(); conf.set(MRJobConfig.TASK_ATTEMPT_ID, taskAttemptid.toString()); FileSystem fs = new RawLocalFileSystem(); fs.setConf(conf); File fCommand = getFileCommand("it.crs4.pydoop.mapreduce.pipes.PipeReducerStub"); conf.set(MRJobConfig.CACHE_LOCALFILES, fCommand.getAbsolutePath()); System.err.println("fCommand" + fCommand.getAbsolutePath()); Token<AMRMTokenIdentifier> token = new Token<AMRMTokenIdentifier>("user".getBytes(), "password".getBytes(), new Text("kind"), new Text("service")); TokenCache.setJobToken(token, job.getCredentials()); conf.setBoolean(MRJobConfig.SKIP_RECORDS, true); TestReporter reporter = new TestReporter(); DummyInputFormat input_format = new DummyInputFormat(); List<InputSplit> isplits = input_format.getSplits(job); InputSplit isplit = isplits.get(0); TaskAttemptContextImpl tcontext = new TaskAttemptContextImpl(conf, taskAttemptid); RecordWriter<IntWritable, Text> writer = new TestRecordWriter( new FileOutputStream(workSpace.getAbsolutePath() + File.separator + "outfile")); BooleanWritable bw = new BooleanWritable(true); List<Text> texts = new ArrayList<Text>(); texts.add(new Text("first")); texts.add(new Text("second")); texts.add(new Text("third")); DummyRawKeyValueIterator kvit = new DummyRawKeyValueIterator(); ReduceContextImpl<BooleanWritable, Text, IntWritable, Text> context = new ReduceContextImpl<BooleanWritable, Text, IntWritable, Text>( conf, taskAttemptid, kvit, null, null, writer, null, null, null, BooleanWritable.class, Text.class); PipesReducer<BooleanWritable, Text, IntWritable, Text> reducer = new PipesReducer<BooleanWritable, Text, IntWritable, Text>(); reducer.setup(context); initStdOut(conf); reducer.reduce(bw, texts, context); reducer.cleanup(context); String stdOut = readStdOut(conf); // test data: key assertTrue(stdOut.contains("reducer key :true")); // and values assertTrue(stdOut.contains("reduce value :first")); assertTrue(stdOut.contains("reduce value :second")); assertTrue(stdOut.contains("reduce value :third")); } finally { if (psw != null) { // remove password files for (File file : psw) { file.deleteOnExit(); } } } }
From source file:it.crs4.pydoop.pipes.PipesMapRunner.java
License:Apache License
/** * Run the map task.//from w ww. j a v a 2s . co m * @param input the set of inputs * @param output the object to collect the outputs of the map * @param reporter the object to update with status */ @SuppressWarnings("unchecked") public void run(RecordReader<K1, V1> input, OutputCollector<K2, V2> output, Reporter reporter) throws IOException { Application<K1, V1, K2, V2> application = null; try { RecordReader<FloatWritable, NullWritable> fakeInput = (!Submitter.getIsJavaRecordReader(job) && !Submitter.getIsJavaMapper(job)) ? (RecordReader<FloatWritable, NullWritable>) input : null; application = new Application<K1, V1, K2, V2>(job, fakeInput, output, reporter, (Class<? extends K2>) job.getOutputKeyClass(), (Class<? extends V2>) job.getOutputValueClass()); } catch (InterruptedException ie) { throw new RuntimeException("interrupted", ie); } DownwardProtocol<K1, V1> downlink = application.getDownlink(); boolean isJavaInput = Submitter.getIsJavaRecordReader(job); downlink.runMap(reporter.getInputSplit(), job.getNumReduceTasks(), isJavaInput); boolean skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); try { if (isJavaInput) { // allocate key & value instances that are re-used for all entries K1 key = input.createKey(); V1 value = input.createValue(); downlink.setInputTypes(key.getClass().getName(), value.getClass().getName()); while (input.next(key, value)) { // map pair to output downlink.mapItem(key, value); if (skipping) { //flush the streams on every record input if running in skip mode //so that we don't buffer other records surrounding a bad record. downlink.flush(); } } downlink.endOfInput(); } application.waitForFinish(); } catch (Throwable t) { application.abort(t); } finally { application.cleanup(); } }
From source file:it.crs4.pydoop.pipes.PipesReducer.java
License:Apache License
public void configure(JobConf job) { this.job = job; //disable the auto increment of the counter. For pipes, no of processed //records could be different(equal or less) than the no of records input. SkipBadRecords.setAutoIncrReducerProcCount(job, false); skipping = job.getBoolean(MRJobConfig.SKIP_RECORDS, false); }