Example usage for org.apache.hadoop.mapred FileSplit getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileSplit getLocations.

Prototype

public String[] getLocations() throws IOException

Source Link

Usage

From source file:DeprecatedBAMInputFormat.java

License:Open Source License

public static List<org.apache.hadoop.mapreduce.InputSplit> undeprecateSplits(InputSplit[] splits)
        throws IOException {
    final List<org.apache.hadoop.mapreduce.InputSplit> undeprecated = new ArrayList<org.apache.hadoop.mapreduce.InputSplit>(
            splits.length);// ww w.ja va 2 s  . com
    for (final InputSplit s : splits) {
        final FileSplit f = (FileSplit) s;
        undeprecated.add(new org.apache.hadoop.mapreduce.lib.input.FileSplit(f.getPath(), f.getStart(),
                f.getLength(), f.getLocations()));
    }
    return undeprecated;
}

From source file:DeprecatedBAMRecordReader.java

License:Open Source License

public DeprecatedBAMRecordReader(InputSplit split, final JobConf job, Reporter reporter) throws IOException {
    if (split instanceof DeprecatedFileVirtualSplit) {
        rr.initialize(((DeprecatedFileVirtualSplit) split).vs, new FakeTaskAttemptContext(job));

        splitLength = split.getLength();
        return;//from  ww  w.  j  av  a  2s  .  c  o  m

    }
    if (split instanceof FileSplit) {
        // XXX             XXX
        //     XXX     XXX
        //         XXX
        //     XXX     XXX
        // XXX             XXX
        //
        // Hive gives us its own custom FileSplits for some reason, so we have
        // to do our own split alignment. (Sometimes, anyway; for "select
        // count(*) from table" we get FileSplits here, but for "select * from
        // table" our input format is used directly. Perhaps it's only because
        // the latter doesn't spawn a MapReduce job, so getting a FileSplit
        // here is the common case.)
        //
        // Since we get only one split at a time here, this is very poor: we
        // have to open the file for every split, even if it's the same file
        // every time.
        //
        // This should always work, but might be /very/ slow. I can't think of
        // a better way.

        final FileSplit fspl = (FileSplit) split;
        final Path path = fspl.getPath();

        final long beg = fspl.getStart();
        final long end = beg + fspl.getLength();

        final SeekableStream sin = WrapSeekable.openPath(path.getFileSystem(job), path);
        final BAMSplitGuesser guesser = new BAMSplitGuesser(sin);

        final long alignedBeg = guesser.guessNextBAMRecordStart(beg, end);
        sin.close();

        if (alignedBeg == end)
            throw new IOException("Guesser found nothing after pos " + beg);

        final long alignedEnd = end << 16 | 0xffff;
        splitLength = (alignedEnd - alignedBeg) >> 16;

        rr.initialize(new FileVirtualSplit(path, alignedBeg, alignedEnd, fspl.getLocations()),
                new FakeTaskAttemptContext(job));
        return;
    }

    throw new ClassCastException("Can only handle DeprecatedFileVirtualSplit and FileSplit");
}

From source file:alluxio.hadoop.HadoopUtils.java

License:Apache License

/**
 * Returns a string representation of a Hadoop {@link FileSplit}.
 *
 * @param fs Hadoop {@link FileSplit}//  w ww .j  ava 2  s  .c  om
 * @return its string representation
 */
public static String toStringHadoopFileSplit(FileSplit fs) {
    StringBuilder sb = new StringBuilder();
    sb.append("HadoopFileSplit: Path: ").append(fs.getPath());
    sb.append(" , Start: ").append(fs.getStart());
    sb.append(" , Length: ").append(fs.getLength());
    sb.append(" , Hosts: ");
    String[] locs;
    try {
        locs = fs.getLocations();
    } catch (IOException e) {
        LOG.error(e.getMessage());
        locs = new String[] {};
    }
    for (String loc : locs) {
        sb.append(loc).append("; ");
    }

    return sb.toString();
}

From source file:com.ds.lzo.DeprecatedLzoLineRecordReaderForCombined.java

License:Open Source License

public DeprecatedLzoLineRecordReaderForCombined(Configuration conf, FileSplit split) throws IOException {
    LOG.warn("split start: " + split.getStart());
    LOG.warn("split length: " + split.getLength());
    String[] locs = split.getLocations();
    for (String loc : locs) {
        LOG.warn("location: " + loc);
    }/*from   w w w  .j ava 2s. c  o  m*/
    start = split.getStart();
    end = start + split.getLength();
    LOG.warn("split end: " + end);
    final Path file = split.getPath();
    LOG.warn("file: " + file.getName());
    LOG.warn("INT split start: " + (int) split.getStart());
    LOG.warn("INT split length: " + (int) split.getLength());
    LOG.warn("INT split end: " + (int) end);

    FileSystem fs = file.getFileSystem(conf);
    codecFactory = new CompressionCodecFactory(conf);
    final CompressionCodec codec = codecFactory.getCodec(file);
    LOG.warn("codec: " + codec.toString());
    LOG.warn("config: " + conf.toString());
    if (codec == null) {
        throw new IOException("No LZO codec found, cannot run.");
    }

    // Open the file and seek to the next split.
    fileIn = fs.open(file);
    // Create input stream and read the file header.
    in = new LineReader(codec.createInputStream(fileIn), conf);
    if (start != 0) {
        fileIn.seek(start);
        LOG.warn("fileIn position: " + fileIn.getPos());
        LOG.warn("buffer size: " + conf.get("io.file.buffer.size"));

        // Read and ignore the first line.
        in.readLine(new Text());
        start = fileIn.getPos();
    }

    pos = start;
}

From source file:com.hadoop.mapred.DeprecatedLzoTextInputFormat.java

License:Open Source License

@Override
public InputSplit[] getSplits(JobConf conf, int numSplits) throws IOException {
    FileSplit[] splits = (FileSplit[]) super.getSplits(conf, numSplits);
    // Find new starts/ends of the filesplit that align with the LZO blocks.

    List<FileSplit> result = new ArrayList<FileSplit>();

    for (FileSplit fileSplit : splits) {
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        if (!LzoInputFormatCommon.isLzoFile(file.toString())) {
            // non-LZO file, keep the input split as is.
            result.add(fileSplit);//from w ww  .j a v a2  s  .c  om
            continue;
        }

        // LZO file, try to split if the .index file was found
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }
        if (index.isEmpty()) {
            // Empty index, keep it as is.
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long lzoStart = index.alignSliceStartToIndex(start, end);
        long lzoEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (lzoStart != LzoIndex.NOT_FOUND && lzoEnd != LzoIndex.NOT_FOUND) {
            result.add(new FileSplit(file, lzoStart, lzoEnd - lzoStart, fileSplit.getLocations()));
        }
    }

    return result.toArray(new FileSplit[result.size()]);
}

From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.RemoteParForColocatedFileSplit.java

License:Open Source License

public RemoteParForColocatedFileSplit(FileSplit split, String fname, int blen) throws IOException {
    super(split.getPath(), split.getStart(), split.getLength(), split.getLocations());

    _fname = fname;/*from   w  w w  .j  ava 2s  .  c  o  m*/
    _blen = blen;
}

From source file:com.ibm.jaql.fail.io.ErrorSplit.java

License:Apache License

public ErrorSplit(FileSplit split, JobConf job, Error e, int cnt) throws IOException {
    child = new FileSplit(split.getPath(), split.getStart(), split.getLength(), split.getLocations());
    error = e;/* w w  w.j a  v  a  2s .  com*/
    count = cnt;
}

From source file:com.ibm.jaql.lang.expr.io.FileSplitToRecordFn.java

License:Apache License

@Override
public JsonRecord eval(Context context) throws Exception {
    // { path: string, start: long, length: long, locations: [string...] }
    if (in == null) {
        in = new DataInputBuffer();
        jpath = new MutableJsonString();
        jstart = new MutableJsonLong();
        jlength = new MutableJsonLong();
        jlocations = new BufferedJsonArray();
        values = new JsonValue[] { jpath, jstart, jlength, jlocations };
        resultRec = new BufferedJsonRecord();
        resultRec.set(NAMES, values, NAMES.length);
    }/*from  w w w .j  a  v  a  2  s  .  co m*/

    JsonRecord splitRec = (JsonRecord) exprs[0].eval(context);

    JsonString jsplitClassName = (JsonString) splitRec.get(InputSplitsFn.CLASS_TAG);
    Class<? extends FileSplit> splitCls = (Class<? extends FileSplit>) ClassLoaderMgr
            .resolveClass(jsplitClassName.toString());
    FileSplit split = (FileSplit) ReflectionUtils.newInstance(splitCls, null);
    JsonBinary rawSplit = (JsonBinary) splitRec.get(InputSplitsFn.SPLIT_TAG);
    in.reset(rawSplit.getInternalBytes(), rawSplit.bytesOffset(), rawSplit.bytesLength());
    split.readFields(in);
    JsonArray jlocs = (JsonArray) splitRec.get(InputSplitsFn.LOCATIONS_TAG);

    jpath.setCopy(split.getPath().toString());
    jstart.set(split.getStart());
    jlength.set(split.getLength());
    if (jlocs != null) {
        values[3] = jlocs;
    } else {
        String[] locs = split.getLocations();
        jlocations.resize(locs.length);
        for (int i = 0; i < locs.length; i++) {
            jlocations.set(i, new JsonString(locs[i]));
        }
        values[3] = jlocations;
    }

    return resultRec;
}

From source file:com.mongodb.hadoop.mapred.BSONFileInputFormat.java

License:Apache License

@Override
public FileSplit[] getSplits(final JobConf job, final int numSplits) throws IOException {

    FileStatus[] inputFiles = listStatus(job);
    List<FileSplit> results = new ArrayList<FileSplit>();
    for (FileStatus file : inputFiles) {
        BSONSplitter splitter = new BSONSplitter();
        splitter.setConf(job);//  w w w  .j  ava  2s  .  c o  m
        splitter.setInputPath(file.getPath());
        Path splitFilePath;
        splitFilePath = new Path(file.getPath().getParent(), "." + file.getPath().getName() + ".splits");
        try {
            splitter.loadSplitsFromSplitFile(file, splitFilePath);
        } catch (BSONSplitter.NoSplitFileException nsfe) {
            if (LOG.isDebugEnabled()) {
                LOG.debug(String.format("No split file for %s; building split file", file.getPath()));
            }
            splitter.readSplitsForFile(file);
        }
        if (LOG.isDebugEnabled()) {
            LOG.debug(format("BSONSplitter found %d splits.", splitter.getAllSplits().size()));
        }

        for (org.apache.hadoop.mapreduce.lib.input.FileSplit split : splitter.getAllSplits()) {
            FileSplit fsplit = new FileSplit(split.getPath(), split.getStart(), split.getLength(),
                    split.getLocations());
            results.add(fsplit);
        }
    }
    if (LOG.isDebugEnabled()) {
        LOG.debug(format("Total of %d found.", results.size()));
    }
    return results.toArray(new FileSplit[results.size()]);
}

From source file:com.moz.fiji.express.flow.framework.MapredInputFormatWrapper.java

License:Apache License

@Override
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    initInputFormat(job);//from w ww.j a  v a2  s  . co  m

    try {
        List<org.apache.hadoop.mapreduce.InputSplit> splits = realInputFormat
                .getSplits(HadoopCompat.newJobContext(job, null));

        if (splits == null) {
            return null;
        }

        InputSplit[] resultSplits = new InputSplit[splits.size()];
        int i = 0;
        for (org.apache.hadoop.mapreduce.InputSplit split : splits) {
            if (split.getClass() == org.apache.hadoop.mapreduce.lib.input.FileSplit.class) {
                org.apache.hadoop.mapreduce.lib.input.FileSplit mapreduceFileSplit = ((org.apache.hadoop.mapreduce.lib.input.FileSplit) split);
                resultSplits[i++] = new FileSplit(mapreduceFileSplit.getPath(), mapreduceFileSplit.getStart(),
                        mapreduceFileSplit.getLength(), mapreduceFileSplit.getLocations());
            } else {
                resultSplits[i++] = new InputSplitWrapper(split);
            }
        }

        return resultSplits;

    } catch (InterruptedException e) {
        throw new IOException(e);
    }
}