List of usage examples for org.apache.hadoop.mapreduce.lib.input FileSplit FileSplit
public FileSplit(Path file, long start, long length, String[] hosts)
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Non_Empty_File_One_Split() throws IOException { File in = createTmpFile("in_Splits_Non_Empty_File_One_Split", non_Empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);//ww w . j a v a 2s.com List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 1); List<FileSplit> expected = newArrayListWithCapacity(1); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 17, new String[] {})); checkSplits(expected, splits); }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Non_Empty_File_Ok_Splits() throws IOException { File in = createTmpFile("in_Splits_Non_Empty_File_Ok_Splits", non_Empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);/*from w w w . java 2 s. com*/ List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 2); List<FileSplit> expected = newArrayListWithCapacity(2); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 12, new String[] {})); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {})); checkSplits(expected, splits); }
From source file:be.uantwerpen.adrem.eclat.util.SplitByKTextInputFormatTest.java
License:Apache License
@Test public void splits_Non_Empty_File_More_Splits_Than_Lines() throws IOException { File in = createTmpFile("in_Splits_Non_Empty_File_More_Splits_Than_Lines", non_Empty); Configuration conf = createConfiguration(); FileStatus status = EasyMock.createMock(FileStatus.class); EasyMock.expect(status.getPath()).andReturn(new Path(in.getAbsolutePath())); EasyMock.expect(status.isDir()).andReturn(false); EasyMock.replay(status);//from w ww. j av a 2 s . co m List<FileSplit> splits = SplitByKTextInputFormat.getSplitsForFile(status, conf, 10); List<FileSplit> expected = newArrayListWithCapacity(3); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 0, 5, new String[] {})); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 5, 7, new String[] {})); expected.add(new FileSplit(new Path(in.getAbsolutePath()), 12, 5, new String[] {})); checkSplits(expected, splits); }
From source file:be.uantwerpen.adrem.hadoop.util.SplitByKTextInputFormat.java
License:Apache License
/** * Creates a new filesplit object//from w w w . j a v a2 s. c om * * @param fileName * name of the file to which filesplit corresponds * @param begin * begin of the split * @param length * length of the split * @return file split object */ protected static FileSplit createFileSplit(Path fileName, long begin, long length) { return (begin == 0) ? new FileSplit(fileName, begin, length - 1, new String[] {}) : new FileSplit(fileName, begin - 1, length, new String[] {}); }
From source file:bsc.spark.examples.terasort.ehiggs.TeraScheduler.java
License:Apache License
/** * Solve the schedule and modify the FileSplit array to reflect the new * schedule. It will move placed splits to front and unplacable splits * to the end./*from ww w.j a v a 2 s . c o m*/ * @return a new list of FileSplits that are modified to have the * best host as the only host. * @throws IOException */ public List<InputSplit> getNewFileSplits() throws IOException { solve(); FileSplit[] result = new FileSplit[realSplits.length]; int left = 0; int right = realSplits.length - 1; for (int i = 0; i < splits.length; ++i) { if (splits[i].isAssigned) { // copy the split and fix up the locations String[] newLocations = { splits[i].locations.get(0).hostname }; realSplits[i] = new FileSplit(realSplits[i].getPath(), realSplits[i].getStart(), realSplits[i].getLength(), newLocations); result[left++] = realSplits[i]; } else { result[right--] = realSplits[i]; } } List<InputSplit> ret = new ArrayList<InputSplit>(); for (FileSplit fs : result) { ret.add(fs); } return ret; }
From source file:bucket_sort.NLineInputFormat.java
License:Apache License
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit) throws IOException { List<FileSplit> splits = new ArrayList<FileSplit>(); Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); }/*from w w w . ja v a2s. c o m*/ FileSystem fs = fileName.getFileSystem(conf); LineReader lr = null; try { FSDataInputStream in = fs.open(fileName); lr = new LineReader(in, conf); Text line = new Text(); int numLines = 0; long begin = 0; long length = 0; int num = -1; while ((num = lr.readLine(line)) > 0) { numLines++; length += num; if (numLines == numLinesPerSplit) { // NLineInputFormat uses LineRecordReader, which always reads // (and consumes) at least one character out of its upper split // boundary. So to make sure that each mapper gets N lines, we // move back the upper split limits of each split // by one character here. if (begin == 0) { splits.add(new FileSplit(fileName, begin, length - 1, new String[] {})); } else { splits.add(new FileSplit(fileName, begin - 1, length, new String[] {})); } begin += length; length = 0; numLines = 0; } } if (numLines != 0) { splits.add(new FileSplit(fileName, begin, length, new String[] {})); } } finally { if (lr != null) { lr.close(); } } return splits; }
From source file:com.alexholmes.hadooputils.combine.common.mapreduce.CommonCombineFileRecordReader.java
License:Apache License
/** * Moves on to the next split inside {@link #split}. The {@link #reader} will be {@code null} * once we have exhausted all the splits. * * @return true if we successfully moved on to the next split * @throws java.io.IOException if we hit io errors * @throws InterruptedException if we get interrupted *///from w w w . jav a 2s .c o m public boolean nextReader() throws IOException, InterruptedException { // close the current reader and set it to null close(); currentSplit++; if (currentSplit >= split.getPaths().length) { // hit the end of the line return false; } FileSplit fileSplit = new FileSplit(split.getPath(currentSplit), split.getOffset(currentSplit), split.getLength(currentSplit), split.getLocations() == null || split.getLocations().length - 1 < currentSplit ? null : new String[] { split.getLocations()[currentSplit] }); reader = engineerer.createRecordReader(); reader.initialize(fileSplit, context); return true; }
From source file:com.asakusafw.runtime.stage.input.TemporaryInputFormat.java
License:Apache License
private static FileSplit getSplit(BlockMap blockMap, Path path, long start, long end) { DirectInputFragment f = blockMap.get(start, end); List<String> owners = f.getOwnerNodeNames(); FileSplit split = new FileSplit(path, start, end - start, owners.toArray(new String[owners.size()])); return split; }
From source file:com.asakusafw.runtime.stage.input.TemporaryInputFormatTest.java
License:Apache License
/** * Simple case for record readers./*from w w w . j a v a 2 s.co m*/ * @throws Exception if failed */ @Test public void reader_simple() throws Exception { Configuration conf = new ConfigurationProvider().newInstance(); FileStatus stat = write(conf, 1); try (RecordReader<NullWritable, Text> reader = TemporaryInputFormat.createRecordReader()) { reader.initialize(new FileSplit(stat.getPath(), 0, stat.getLen(), null), JobCompatibility.newTaskAttemptContext(conf, id())); assertThat(reader.nextKeyValue(), is(true)); assertThat(reader.getCurrentValue(), is(new Text("Hello, world!"))); assertThat(reader.nextKeyValue(), is(false)); assertThat((double) reader.getProgress(), closeTo(1.0, 0.01)); } }
From source file:com.chinamobile.bcbsp.io.BSPFileInputFormat.java
License:Apache License
/** * Generate the list of files and make them into FileSplits. * * @param job//from ww w .ja v a 2s . co m * The current BSPJob job * @return input splits */ @Override public List<InputSplit> getSplits(BSPJob job) throws IOException { List<InputSplit> splits = new ArrayList<InputSplit>(); for (FileStatus file : listStatus(job)) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConf()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = 0L; if (job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1) == 1) { if (job.getSplitSize() == 0L) { splitSize = blockSize; } else { splitSize = job.getSplitSize(); } } else { if (job.getSplitSize() == 0L) { splitSize = blockSize * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1); } else { splitSize = job.getSplitSize() * job.getInt(Constants.USER_BC_BSP_JOB_SPLIT_FACTOR, 1); } } LOG.info("[Split Size] " + (splitSize / (1024 * 1024)) + " MB"); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } LOG.info("[Split Number] " + splits.size()); return splits; }