List of usage examples for org.apache.hadoop.mapreduce RecordReader getCurrentValue
public abstract VALUEIN getCurrentValue() throws IOException, InterruptedException;
From source file:com.inmobi.conduit.distcp.tools.mapred.lib.TestDynamicInputFormat.java
License:Apache License
@Test public void testGetSplits() throws Exception { DistCpOptions options = getOptions(); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing( new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testDynInputFormat/fileList.seq"), options);/*w ww . j a va2s . co m*/ JobID jobId = new JobID(); JobContext jobContext = mock(JobContext.class); when(jobContext.getConfiguration()).thenReturn(configuration); when(jobContext.getJobID()).thenReturn(jobId); DynamicInputFormat<Text, FileStatus> inputFormat = new DynamicInputFormat<Text, FileStatus>(); List<InputSplit> splits = inputFormat.getSplits(jobContext); int nFiles = 0; int taskId = 0; for (InputSplit split : splits) { TaskAttemptID tId = new TaskAttemptID("", 0, true, taskId, 0); final TaskAttemptContext taskAttemptContext = mock(TaskAttemptContext.class); when(taskAttemptContext.getConfiguration()).thenReturn(configuration); when(taskAttemptContext.getTaskAttemptID()).thenReturn(tId); RecordReader<Text, FileStatus> recordReader = inputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(splits.get(0), taskAttemptContext); float previousProgressValue = 0f; while (recordReader.nextKeyValue()) { FileStatus fileStatus = recordReader.getCurrentValue(); String source = fileStatus.getPath().toString(); System.out.println(source); Assert.assertTrue(expectedFilePaths.contains(source)); final float progress = recordReader.getProgress(); Assert.assertTrue(progress >= previousProgressValue); Assert.assertTrue(progress >= 0.0f); Assert.assertTrue(progress <= 1.0f); previousProgressValue = progress; ++nFiles; } Assert.assertTrue(recordReader.getProgress() == 1.0f); ++taskId; } Assert.assertEquals(expectedFilePaths.size(), nFiles); }
From source file:com.inmobi.conduit.distcp.tools.mapred.TestUniformSizeInputFormat.java
License:Apache License
public void testGetSplits(int nMaps) throws Exception { DistCpOptions options = getOptions(nMaps); Configuration configuration = new Configuration(); configuration.set("mapred.map.tasks", String.valueOf(options.getMaxMaps())); Path listFile = new Path(cluster.getFileSystem().getUri().toString() + "/tmp/testGetSplits_1/fileList.seq"); CopyListing.getCopyListing(configuration, CREDENTIALS, options).buildListing(listFile, options); JobContext jobContext = Mockito.mock(JobContext.class); Mockito.when(jobContext.getConfiguration()).thenReturn(configuration); Mockito.when(jobContext.getJobID()).thenReturn(new JobID()); UniformSizeInputFormat uniformSizeInputFormat = new UniformSizeInputFormat(); List<InputSplit> splits = uniformSizeInputFormat.getSplits(jobContext); //Removing the legacy check - Refer HADOOP-9230 int sizePerMap = totalFileSize / nMaps; checkSplits(listFile, splits);/*from w w w. j a va 2s . c o m*/ int doubleCheckedTotalSize = 0; int previousSplitSize = -1; for (int i = 0; i < splits.size(); ++i) { InputSplit split = splits.get(i); int currentSplitSize = 0; TaskAttemptID taskId = new TaskAttemptID("", 0, true, 0, 0); final TaskAttemptContext taskAttemptContext = Mockito.mock(TaskAttemptContext.class); Mockito.when(taskAttemptContext.getConfiguration()).thenReturn(configuration); Mockito.when(taskAttemptContext.getTaskAttemptID()).thenReturn(taskId); RecordReader<Text, FileStatus> recordReader = uniformSizeInputFormat.createRecordReader(split, taskAttemptContext); recordReader.initialize(split, taskAttemptContext); while (recordReader.nextKeyValue()) { Path sourcePath = recordReader.getCurrentValue().getPath(); FileSystem fs = sourcePath.getFileSystem(configuration); FileStatus fileStatus[] = fs.listStatus(sourcePath); Assert.assertEquals(fileStatus.length, 1); currentSplitSize += fileStatus[0].getLen(); } Assert.assertTrue(previousSplitSize == -1 || Math.abs(currentSplitSize - previousSplitSize) < 0.1 * sizePerMap || i == splits.size() - 1); doubleCheckedTotalSize += currentSplitSize; } Assert.assertEquals(totalFileSize, doubleCheckedTotalSize); }
From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java
License:Apache License
/** * read the the given split./*from w ww . j ava 2 s . c om*/ * @return List : List of read messages */ private List<Message> readSplit(DatabusInputFormat format, org.apache.hadoop.mapreduce.InputSplit split, JobConf job) throws IOException, InterruptedException { List<Message> result = new ArrayList<Message>(); RecordReader<LongWritable, Message> reader = format .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) split, context); ((DatabusRecordReader) reader).initialize(split, context); while (reader.nextKeyValue()) { result.add(reader.getCurrentValue()); } reader.close(); return result; }
From source file:com.marcolotz.lung.debug.InputTester.java
License:Creative Commons License
/*** * Method used for local testing the record reader and the Input format. It * generates an input split from the local file system file. * //from w w w . j a v a 2 s. c o m * @param filePath */ public void localTest(String filePath) { DICOM image; Configuration testConf = new Configuration(false); /* Reads the local file system */ testConf.set("fs.default.name", "file:///"); File testFile = new File(filePath); Path path = new Path(testFile.getAbsoluteFile().toURI()); FileSplit split = new FileSplit(path, 0, testFile.length(), null); InputFormat<NullWritable, BytesWritable> inputFormat = ReflectionUtils .newInstance(WholeFileInputFormat.class, testConf); TaskAttemptContext context = new TaskAttemptContextImpl(testConf, new TaskAttemptID()); try { RecordReader<NullWritable, BytesWritable> reader = inputFormat.createRecordReader(split, context); while (reader.nextKeyValue()) { /* get the bytes array */ BytesWritable inputBytesWritable = (BytesWritable) reader.getCurrentValue(); byte[] inputContent = inputBytesWritable.getBytes(); /* Check for Correct value */ // generateLocalOutput("path/to/output"); InputStream is = new ByteArrayInputStream(inputContent); image = new DICOM(is); image.run("Dicom Test"); /* Prints the bytes as an ImagePlus image */ ImageViewer debug = new ImageViewer(); debug.setImage(image); } } catch (Exception e) { } }
From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java
License:Apache License
@Test public void testReadFile() throws Exception { MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat(); FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(), null);/* w ww . j a v a2 s .c o m*/ org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat .createRecordReader(split, protoTestObjects.getContext()); recordReader.initialize(split, protoTestObjects.getContext()); for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) { Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue()); Message message = recordReader.getCurrentValue(); protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message); } recordReader.close(); }
From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java
License:Apache License
@Test public void testReadFileNoMetadata() throws Exception { MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat(); inputFormat.setBuilder(Testing.TestItem.newBuilder()); FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(), null);//from w ww . j a v a2s . c o m org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat .createRecordReader(split, protoTestObjects.getContext()); recordReader.initialize(split, protoTestObjects.getContext()); for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) { Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue()); Message message = recordReader.getCurrentValue(); protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message); } recordReader.close(); }
From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java
License:Apache License
private long collectRecords(Set<String> fileNames, RecordReader<String, InputStream> recordReader) throws IOException, InterruptedException { long count = 0L; while (recordReader.nextKeyValue()) { String key = recordReader.getCurrentKey(); key = key.replaceAll("/+", "/"); // some platforms add more "/" at the beginning, coalesce them for equality check Assert.assertTrue("Seen the same file twice!", fileNames.add(key)); InputStream is = recordReader.getCurrentValue(); try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) { String n;//from w w w.j a v a2 s . c o m while ((n = br.readLine()) != null) { count++; } } } return count; }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java
License:Apache License
private List<Map.Entry> previewTextBatch(FileStatus fileStatus, int batchSize) throws IOException, InterruptedException { TextInputFormat textInputFormat = new TextInputFormat(); InputSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), null); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, TaskAttemptID.forName("attempt_1439420318532_0011_m_000000_0")); RecordReader<LongWritable, Text> recordReader = textInputFormat.createRecordReader(fileSplit, taskAttemptContext);// w w w . j ava2s . c o m recordReader.initialize(fileSplit, taskAttemptContext); boolean hasNext = recordReader.nextKeyValue(); List<Map.Entry> batch = new ArrayList<>(); while (hasNext && batch.size() < batchSize) { batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + recordReader.getCurrentKey(), String.valueOf(recordReader.getCurrentValue()))); hasNext = recordReader.nextKeyValue(); // not like iterator.hasNext, actually advances } return batch; }
From source file:edu.uci.ics.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final List<FileSplit> inputSplits = splitsFactory.getSplits(); return new AbstractUnaryOutputSourceOperatorNodePushable() { private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId(); private ContextFactory ctxFactory = new ContextFactory(); @SuppressWarnings("unchecked") @Override//from www.j a va 2 s . c o m public void initialize() throws HyracksDataException { ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader()); Job job = confFactory.getConf(); job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx); writer.open(); InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); int size = inputSplits.size(); for (int i = 0; i < size; i++) { /** * read all the partitions scheduled to the current node */ if (scheduledLocations[i].equals(nodeName)) { /** * pick an unread split to read synchronize among * simultaneous partitions in the same machine */ synchronized (executed) { if (executed[i] == false) { executed[i] = true; } else { continue; } } /** * read the split */ TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i); context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); while (reader.nextKeyValue() == true) { parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString()); } } } parser.close(writer); writer.close(); } catch (Exception e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } }; }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static long spatialJoinLocal(Path[] inFiles, Path outFile, OperationsParams params) throws IOException, InterruptedException { // Read the inputs and store them in memory List<Shape>[] datasets = new List[inFiles.length]; final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); for (int i = 0; i < inFiles.length; i++) { datasets[i] = new ArrayList<Shape>(); FileSystem inFs = inFiles[i].getFileSystem(params); Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFiles[i]); for (InputSplit split : inputFormat.getSplits(job)) { FileSplit fsplit = (FileSplit) split; RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); }/*w w w . j a va 2 s.co m*/ while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { datasets[i].add(shape.clone()); } } reader.close(); } } // Apply the spatial join algorithm ResultCollector2<Shape, Shape> output = null; PrintStream out = null; if (outFile != null) { FileSystem outFS = outFile.getFileSystem(params); out = new PrintStream(outFS.create(outFile)); final PrintStream outout = out; output = new ResultCollector2<Shape, Shape>() { @Override public void collect(Shape r, Shape s) { outout.println(r.toText(new Text()) + "," + s.toText(new Text())); } }; } long resultCount = SpatialJoin_planeSweep(datasets[0], datasets[1], output, null); if (out != null) out.close(); return resultCount; }