List of usage examples for org.apache.hadoop.mapred FileSplit getLocations
public String[] getLocations() throws IOException
From source file:DeprecatedBAMInputFormat.java
License:Open Source License
public static List<org.apache.hadoop.mapreduce.InputSplit> undeprecateSplits(InputSplit[] splits) throws IOException { final List<org.apache.hadoop.mapreduce.InputSplit> undeprecated = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>( splits.length);// ww w.ja va 2 s . com for (final InputSplit s : splits) { final FileSplit f = (FileSplit) s; undeprecated.add(new org.apache.hadoop.mapreduce.lib.input.FileSplit(f.getPath(), f.getStart(), f.getLength(), f.getLocations())); } return undeprecated; }
From source file:DeprecatedBAMRecordReader.java
License:Open Source License
public DeprecatedBAMRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException { if (split instanceof DeprecatedFileVirtualSplit) { rr.initialize(((DeprecatedFileVirtualSplit) split).vs, new FakeTaskAttemptContext(job)); splitLength = split.getLength(); return;//from ww w. j av a 2s . c o m } if (split instanceof FileSplit) { // XXX XXX // XXX XXX // XXX // XXX XXX // XXX XXX // // Hive gives us its own custom FileSplits for some reason, so we have // to do our own split alignment. (Sometimes, anyway; for "select // count(*) from table" we get FileSplits here, but for "select * from // table" our input format is used directly. Perhaps it's only because // the latter doesn't spawn a MapReduce job, so getting a FileSplit // here is the common case.) // // Since we get only one split at a time here, this is very poor: we // have to open the file for every split, even if it's the same file // every time. // // This should always work, but might be /very/ slow. I can't think of // a better way. final FileSplit fspl = (FileSplit) split; final Path path = fspl.getPath(); final long beg = fspl.getStart(); final long end = beg + fspl.getLength(); final SeekableStream sin = WrapSeekable.openPath(path.getFileSystem(job), path); final BAMSplitGuesser guesser = new BAMSplitGuesser(sin); final long alignedBeg = guesser.guessNextBAMRecordStart(beg, end); sin.close(); if (alignedBeg == end) throw new IOException("Guesser found nothing after pos " + beg); final long alignedEnd = end << 16 | 0xffff; splitLength = (alignedEnd - alignedBeg) >> 16; rr.initialize(new FileVirtualSplit(path, alignedBeg, alignedEnd, fspl.getLocations()), new FakeTaskAttemptContext(job)); return; } throw new ClassCastException("Can only handle DeprecatedFileVirtualSplit and FileSplit"); }
From source file:alluxio.hadoop.HadoopUtils.java
License:Apache License
/** * Returns a string representation of a Hadoop {@link FileSplit}. * * @param fs Hadoop {@link FileSplit}// w ww .j ava 2 s .c om * @return its string representation */ public static String toStringHadoopFileSplit(FileSplit fs) { StringBuilder sb = new StringBuilder(); sb.append("HadoopFileSplit: Path: ").append(fs.getPath()); sb.append(" , Start: ").append(fs.getStart()); sb.append(" , Length: ").append(fs.getLength()); sb.append(" , Hosts: "); String[] locs; try { locs = fs.getLocations(); } catch (IOException e) { LOG.error(e.getMessage()); locs = new String[] {}; } for (String loc : locs) { sb.append(loc).append("; "); } return sb.toString(); }
From source file:com.ds.lzo.DeprecatedLzoLineRecordReaderForCombined.java
License:Open Source License
public DeprecatedLzoLineRecordReaderForCombined(Configuration conf, FileSplit split) throws IOException { LOG.warn("split start: " + split.getStart()); LOG.warn("split length: " + split.getLength()); String[] locs = split.getLocations(); for (String loc : locs) { LOG.warn("location: " + loc); }/*from w w w .j ava 2s. c o m*/ start = split.getStart(); end = start + split.getLength(); LOG.warn("split end: " + end); final Path file = split.getPath(); LOG.warn("file: " + file.getName()); LOG.warn("INT split start: " + (int) split.getStart()); LOG.warn("INT split length: " + (int) split.getLength()); LOG.warn("INT split end: " + (int) end); FileSystem fs = file.getFileSystem(conf); codecFactory = new CompressionCodecFactory(conf); final CompressionCodec codec = codecFactory.getCodec(file); LOG.warn("codec: " + codec.toString()); LOG.warn("config: " + conf.toString()); if (codec == null) { throw new IOException("No LZO codec found, cannot run."); } // Open the file and seek to the next split. fileIn = fs.open(file); // Create input stream and read the file header. in = new LineReader(codec.createInputStream(fileIn), conf); if (start != 0) { fileIn.seek(start); LOG.warn("fileIn position: " + fileIn.getPos()); LOG.warn("buffer size: " + conf.get("io.file.buffer.size")); // Read and ignore the first line. in.readLine(new Text()); start = fileIn.getPos(); } pos = start; }
From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java
License:Open Source License
@Override public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException { FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits); // Find new starts/ends of the filesplit that align with the LZO blocks. List<FileSplit> result = new ArrayList<FileSplit>(); for (FileSplit fileSplit : splits) { Path file = fileSplit.getPath(); FileSystem fs = file.getFileSystem(conf); if (!LzoInputFormatCommon.isLzoFile(file.toString())) { // non-LZO file, keep the input split as is. result.add(fileSplit);//from w ww .j a v a2 s .c om continue; } // LZO file, try to split if the .index file was found LzoIndex index = indexes.get(file); if (index == null) { throw new IOException("Index not found for " + file); } if (index.isEmpty()) { // Empty index, keep it as is. result.add(fileSplit); continue; } long start = fileSplit.getStart(); long end = start + fileSplit.getLength(); long lzoStart = index.alignSliceStartToIndex(start, end); long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen()); if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) { result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations())); } } return result.toArray(new FileSplit[result.size()]); }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java
License:Open Source License
public RemoteParForColocatedFileSplit(FileSplit split, String fname, int blen) throws IOException { super(split.getPath(), split.getStart(), split.getLength(), split.getLocations()); _fname = fname;/*from w w w .j ava 2s . c o m*/ _blen = blen; }
From source file:com.ibm.jaql.fail.io.ErrorSplit.java
License:Apache License
public ErrorSplit(FileSplit split, JobConf job, Error e, int cnt) throws IOException { child = new FileSplit(split.getPath(), split.getStart(), split.getLength(), split.getLocations()); error = e;/* w w w.j a v a 2s . com*/ count = cnt; }
From source file:com.ibm.jaql.lang.expr.io.FileSplitToRecordFn.java
License:Apache License
@Override public JsonRecord eval(Context context) throws Exception { // { path: string, start: long, length: long, locations: [string...] } if (in == null) { in = new DataInputBuffer(); jpath = new MutableJsonString(); jstart = new MutableJsonLong(); jlength = new MutableJsonLong(); jlocations = new BufferedJsonArray(); values = new JsonValue[] { jpath, jstart, jlength, jlocations }; resultRec = new BufferedJsonRecord(); resultRec.set(NAMES, values, NAMES.length); }/*from w w w .j a v a 2 s . co m*/ JsonRecord splitRec = (JsonRecord) exprs[0].eval(context); JsonString jsplitClassName = (JsonString) splitRec.get(InputSplitsFn.CLASS_TAG); Class<? extends FileSplit> splitCls = (Class<? extends FileSplit>) ClassLoaderMgr .resolveClass(jsplitClassName.toString()); FileSplit split = (FileSplit) ReflectionUtils.newInstance(splitCls, null); JsonBinary rawSplit = (JsonBinary) splitRec.get(InputSplitsFn.SPLIT_TAG); in.reset(rawSplit.getInternalBytes(), rawSplit.bytesOffset(), rawSplit.bytesLength()); split.readFields(in); JsonArray jlocs = (JsonArray) splitRec.get(InputSplitsFn.LOCATIONS_TAG); jpath.setCopy(split.getPath().toString()); jstart.set(split.getStart()); jlength.set(split.getLength()); if (jlocs != null) { values[3] = jlocs; } else { String[] locs = split.getLocations(); jlocations.resize(locs.length); for (int i = 0; i < locs.length; i++) { jlocations.set(i, new JsonString(locs[i])); } values[3] = jlocations; } return resultRec; }
From source file:com.mongodb.hadoop.mapred.BSONFileInputFormat.java
License:Apache License
@Override public FileSplit[] getSplits(final JobConf job, final int numSplits) throws IOException { FileStatus[] inputFiles = listStatus(job); List<FileSplit> results = new ArrayList<FileSplit>(); for (FileStatus file : inputFiles) { BSONSplitter splitter = new BSONSplitter(); splitter.setConf(job);// w w w .j ava 2s . c o m splitter.setInputPath(file.getPath()); Path splitFilePath; splitFilePath = new Path(file.getPath().getParent(), "." + file.getPath().getName() + ".splits"); try { splitter.loadSplitsFromSplitFile(file, splitFilePath); } catch (BSONSplitter.NoSplitFileException nsfe) { if (LOG.isDebugEnabled()) { LOG.debug(String.format("No split file for %s; building split file", file.getPath())); } splitter.readSplitsForFile(file); } if (LOG.isDebugEnabled()) { LOG.debug(format("BSONSplitter found %d splits.", splitter.getAllSplits().size())); } for (org.apache.hadoop.mapreduce.lib.input.FileSplit split : splitter.getAllSplits()) { FileSplit fsplit = new FileSplit(split.getPath(), split.getStart(), split.getLength(), split.getLocations()); results.add(fsplit); } } if (LOG.isDebugEnabled()) { LOG.debug(format("Total of %d found.", results.size())); } return results.toArray(new FileSplit[results.size()]); }
From source file:com.moz.fiji.express.flow.framework.MapredInputFormatWrapper.java
License:Apache License
@Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { initInputFormat(job);//from w ww.j a v a2 s . co m try { List<org.apache.hadoop.mapreduce.InputSplit> splits = realInputFormat .getSplits(HadoopCompat.newJobContext(job, null)); if (splits == null) { return null; } InputSplit[] resultSplits = new InputSplit[splits.size()]; int i = 0; for (org.apache.hadoop.mapreduce.InputSplit split : splits) { if (split.getClass() == org.apache.hadoop.mapreduce.lib.input.FileSplit.class) { org.apache.hadoop.mapreduce.lib.input.FileSplit mapreduceFileSplit = ((org.apache.hadoop.mapreduce.lib.input.FileSplit) split); resultSplits[i++] = new FileSplit(mapreduceFileSplit.getPath(), mapreduceFileSplit.getStart(), mapreduceFileSplit.getLength(), mapreduceFileSplit.getLocations()); } else { resultSplits[i++] = new InputSplitWrapper(split); } } return resultSplits; } catch (InterruptedException e) { throw new IOException(e); } }