List of usage examples for org.apache.hadoop.mapreduce.lib.input FileSplit toString
@Override
public String toString()
From source file:eu.scape_project.pt.mapred.input.ControlFileInputFormatTest.java
License:Apache License
@Test public void testWriteNewControlFileAndCreateSplits() throws IOException { MockupFileSystem fs = new MockupFileSystem(); Path newControlFile = new Path("newControlFile"); fs.addFile("newControlFile", true, null); Map<String, ArrayList<String>> locationMap = new HashMap<String, ArrayList<String>>() { {//from ww w . j ava2 s .c o m put("host1", new ArrayList<String>() { { add("line1-1"); add("line1-2"); add("line1-3"); } }); put("host2", new ArrayList<String>() { { add("line2-1"); add("line2-2"); add("line2-3"); add("line2-4"); add("line2-5"); add("line2-6"); } }); put("host3", new ArrayList<String>() { { add("line3-1"); add("line3-2"); add("line3-3"); add("line3-4"); add("line3-5"); add("line3-6"); add("line3-7"); add("line3-8"); } }); put("host4", new ArrayList<String>() { { add("line4-1"); add("line4-2"); add("line4-3"); add("line4-4"); add("line4-5"); add("line4-6"); add("line4-7"); add("line4-8"); add("line4-9"); add("line4-10"); } }); } }; List<FileSplit> splits = ControlFileInputFormat.writeNewControlFileAndCreateSplits(newControlFile, fs, locationMap, 3); FSDataInputStream bis = fs.open(newControlFile); try { int i = 0; for (FileSplit split : splits) { LOG.debug(++i + ".split = " + split.toString()); byte[] content = new byte[(int) split.getLength()]; bis.read((int) split.getStart(), content, 0, (int) split.getLength()); String cont = new String(content); LOG.debug(" content = " + new String(content)); if (cont.startsWith("line1-1")) { String expected = ""; for (String line : locationMap.get("host1")) { expected += line + "\n"; } assertEquals(expected, cont); } else if (cont.startsWith("line2-1")) { String expected = ""; int j = 0; for (String line : locationMap.get("host2")) { expected += line + "\n"; if (++j == 3) break; } assertEquals(expected, cont); } else if (cont.startsWith("line2-4")) { String expected = ""; int j = 0; for (String line : locationMap.get("host2")) { if (++j <= 3) continue; expected += line + "\n"; } assertEquals(expected, cont); } else if (cont.startsWith("line3-1")) { String expected = ""; int j = 0; for (String line : locationMap.get("host3")) { expected += line + "\n"; if (++j == 4) break; } assertEquals(expected, cont); } else if (cont.startsWith("line3-5")) { String expected = ""; int j = 0; for (String line : locationMap.get("host3")) { if (++j <= 4) continue; expected += line + "\n"; } assertEquals(expected, cont); } else if (cont.startsWith("line4-1")) { String expected = ""; int j = 0; for (String line : locationMap.get("host4")) { expected += line + "\n"; if (++j == 3) break; } assertEquals(expected, cont); } else if (cont.startsWith("line4-4")) { String expected = ""; int j = 0; for (String line : locationMap.get("host4")) { if (++j <= 3) continue; expected += line + "\n"; if (++j > 7) break; } assertEquals(expected, cont); } else if (cont.startsWith("line4-7")) { String expected = ""; int j = 0; for (String line : locationMap.get("host4")) { if (++j <= 6) continue; expected += line + "\n"; } assertEquals(expected, cont); } else { fail("wrong split"); } } } finally { bis.close(); } }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopSplitWrapperSelfTest.java
License:Apache License
/** * Tests serialization of wrapper and the wrapped native split. * @throws Exception If fails./* w w w . j a v a 2s. c om*/ */ public void testSerialization() throws Exception { FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500, new String[] { "host1", "host2" }); assertEquals("/path/to/file:100+500", nativeSplit.toString()); GridHadoopSplitWrapper split = GridHadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations()); assertEquals("[host1, host2]", Arrays.toString(split.hosts())); ByteArrayOutputStream buf = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(buf); out.writeObject(split); ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray())); final GridHadoopSplitWrapper res = (GridHadoopSplitWrapper) in.readObject(); assertEquals("/path/to/file:100+500", GridHadoopUtils.unwrapSplit(res).toString()); GridTestUtils.assertThrows(log, new Callable<Object>() { @Override public Object call() throws Exception { res.hosts(); return null; } }, AssertionError.class, null); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopSplitWrapperSelfTest.java
License:Apache License
/** * Tests serialization of wrapper and the wrapped native split. * @throws Exception If fails.//from www. j av a 2 s . com */ public void testSerialization() throws Exception { FileSplit nativeSplit = new FileSplit(new Path("/path/to/file"), 100, 500, new String[] { "host1", "host2" }); assertEquals("/path/to/file:100+500", nativeSplit.toString()); HadoopSplitWrapper split = HadoopUtils.wrapSplit(10, nativeSplit, nativeSplit.getLocations()); assertEquals("[host1, host2]", Arrays.toString(split.hosts())); ByteArrayOutputStream buf = new ByteArrayOutputStream(); ObjectOutput out = new ObjectOutputStream(buf); out.writeObject(split); ObjectInput in = new ObjectInputStream(new ByteArrayInputStream(buf.toByteArray())); final HadoopSplitWrapper res = (HadoopSplitWrapper) in.readObject(); assertEquals("/path/to/file:100+500", HadoopUtils.unwrapSplit(res).toString()); GridTestUtils.assertThrows(log, new Callable<Object>() { @Override public Object call() throws Exception { res.hosts(); return null; } }, AssertionError.class, null); }
From source file:org.apache.trevni.avro.mapreduce.AvroTrevniRecordReaderBase.java
License:Apache License
/** {@inheritDoc} */ @Override/*from w w w .ja v a2 s . c om*/ public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { final FileSplit file = (FileSplit) inputSplit; context.setStatus(file.toString()); final AvroColumnReader.Params params = new AvroColumnReader.Params( new HadoopInput(file.getPath(), context.getConfiguration())); params.setModel(ReflectData.get()); if (AvroJob.getInputKeySchema(context.getConfiguration()) != null) { params.setSchema(AvroJob.getInputKeySchema(context.getConfiguration())); } reader = new AvroColumnReader<T>(params); rows = reader.getRowCount(); }
From source file:org.bgi.flexlab.gaea.data.mapreduce.input.adaptor.AdaptorRecordReader.java
License:Open Source License
@Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; System.out.println(split.toString()); Configuration job = context.getConfiguration(); System.err.println(split.getPath().toString()); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//from www . j a v a 2 s . c o m end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:org.mrgeo.hdfs.vector.DelimitedVectorInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false); if (useNLineFormat) { List<InputSplit> splits = new NLineInputFormat().getSplits(context); // This is a workaround to what appears to be a bug in in how NLineInputFormat // computes its splits. When there are multiple splits in a file, it seems // the start position in the last split is off by one. Note that this corrective // code needs to check the last split for each different file that appears // in the list of splits. for (int index = 2; index < splits.size(); index++) { FileSplit previousSplit = (FileSplit) splits.get(index - 1); FileSplit currSplit = (FileSplit) splits.get(index); // If this index is the last split, or we've moved on to splits from a different // file, then we need to adjust the last split for that file. int lastFileIndex = -1; if (index == splits.size() - 1) { lastFileIndex = index;/*from w ww . j a v a2s .c o m*/ } else if (!currSplit.getPath().equals(previousSplit.getPath())) { lastFileIndex = index - 1; } if (lastFileIndex >= 2) { FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex); FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1); if (lastFileSplit.getPath().equals(priorSplit.getPath())) { if (priorSplit.getPath().equals(lastFileSplit.getPath()) && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) { // Adjust the start of previous split FileSplit replacement = new FileSplit(lastFileSplit.getPath(), priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1, lastFileSplit.getLocations()); log.info("Replacing split: " + lastFileSplit.toString()); log.info(" With split: " + replacement.toString()); splits.set(lastFileIndex, replacement); } } } } return splits; } else { List<InputSplit> splits = new TextInputFormat().getSplits(context); return splits; } }
From source file:org.mrgeo.hdfs.vector.HdfsVectorInputFormat.java
License:Apache License
@Override public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException { boolean useNLineFormat = context.getConfiguration().getBoolean(USE_NLINE_FORMAT, false); if (useNLineFormat) { List<InputSplit> splits = new NLineInputFormat().getSplits(context); // This is a workaround to what appears to be a bug in in how NLineInputFormat // computes its splits. When there are multiple splits in a file, it seems // the start position in the last split is off by one. Note that this corrective // code needs to check the last split for each different file that appears // in the list of splits. for (int index = 2; index < splits.size(); index++) { FileSplit previousSplit = (FileSplit) splits.get(index - 1); FileSplit currSplit = (FileSplit) splits.get(index); // If this index is the last split, or we've moved on to splits from a different // file, then we need to adjust the last split for that file. int lastFileIndex = -1; if (index == splits.size() - 1) { lastFileIndex = index;/*from w ww. j av a 2 s .co m*/ } else if (!currSplit.getPath().equals(previousSplit.getPath())) { lastFileIndex = index - 1; } if (lastFileIndex >= 2) { FileSplit lastFileSplit = (FileSplit) splits.get(lastFileIndex); FileSplit priorSplit = (FileSplit) splits.get(lastFileIndex - 1); if (lastFileSplit.getPath().equals(priorSplit.getPath())) if (priorSplit.getPath().equals(lastFileSplit.getPath()) && priorSplit.getStart() + priorSplit.getLength() < lastFileSplit.getStart()) { // Adjust the start of previous split FileSplit replacement = new FileSplit(lastFileSplit.getPath(), priorSplit.getStart() + priorSplit.getLength(), lastFileSplit.getLength() + 1, lastFileSplit.getLocations()); log.info("Replacing split: " + lastFileSplit.toString()); log.info(" With split: " + replacement.toString()); splits.set(lastFileIndex, replacement); } } } return splits; } else { List<InputSplit> splits = new TextInputFormat().getSplits(context); return splits; } }