Example usage for org.apache.hadoop.mapreduce.lib.input FileSplit getPath

List of usage examples for org.apache.hadoop.mapreduce.lib.input FileSplit getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.input FileSplit getPath.

Prototype

public Path getPath() 

Source Link

Document

The file containing this split's data.

Usage

From source file:BamRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//from  www  .j a  v a 2 s . c  o  m
    split_length = split.getLength();
    System.out.println("start: " + start);
    System.out.println("split_length: " + split_length);
    fileInfo = split.getPath();
    //String fileName = fileInfo.toString().split("-")[0];
    //Path file = new Path(fileName);
    //compressionCodecs = new CompressionCodecFactory(job);
    //final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    //FileSystem fs = file.getFileSystem(job);
    //fileIn = fs.open(file);
    //fileIn.seek(start);
    //this.pos = start;
}

From source file:ZipFileRecordReader.java

License:Apache License

/**
 * Initialise and open the ZIP file from the FileSystem
 *//*w  w  w.j  av  a  2  s. co m*/
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);
}

From source file:authordetect.input.SingleBookReader.java

/**
 * @param inputSplit/* w  ww.  jav a  2  s .c  om*/
 * @param context    the information about the task
 * @throws java.io.IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) inputSplit;
    Configuration configuration = context.getConfiguration();

    // get the option from configuration:
    // 0 for group by author, 1 for group by book
    int option = configuration.getInt("GROUP_OPTION", 0);

    Path path = split.getPath();
    filename = path.getName();
    FileSystem fileSystem = path.getFileSystem(configuration);
    FSDataInputStream inputStream = fileSystem.open(path);
    lineReader = new LineReader(inputStream, configuration);

    //initial start point and end point
    start = split.getStart();
    end = start + split.getLength();

    inputStream.seek(start);
    if (start != 0) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }

    start += lineReader.readLine(currentLine);

    prepareToScanBook(option);
}

From source file:brush.FastqRecordReader.java

License:Apache License

/**
 * Builds a new record reader given a config file and an input split.
 *
 * @param conf The Hadoop configuration object. Used for gaining access
 *   to the underlying file system.//from   w w  w .  j  a v  a 2s . c om
 * @param split The file split to read.
 */
protected FastqRecordReader(final Configuration conf, final FileSplit split) throws IOException {
    file = split.getPath();
    start = split.getStart();
    end = start + split.getLength();

    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(file);

    CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
    CompressionCodec codec = codecFactory.getCodec(file);

    if (codec == null) { // no codec.  Uncompressed file.
        positionAtFirstRecord(fileIn);
        inputStream = fileIn;
    } else {
        // compressed file
        if (start != 0) {
            throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
        }

        inputStream = codec.createInputStream(fileIn);
        end = Long.MAX_VALUE; // read until the end of the file
    }

    lineReader = new LineReader(inputStream);
}

From source file:bsc.spark.examples.terasort.ehiggs.TeraScheduler.java

License:Apache License

public TeraScheduler(FileSplit[] realSplits, Configuration conf) throws IOException {
    this.realSplits = realSplits;
    this.slotsPerHost = conf.getInt(TTConfig.TT_MAP_SLOTS, 4);
    Map<String, Host> hostTable = new HashMap<String, Host>();
    splits = new Split[realSplits.length];
    for (FileSplit realSplit : realSplits) {
        Split split = new Split(realSplit.getPath().toString());
        splits[remainingSplits++] = split;
        for (String hostname : realSplit.getLocations()) {
            Host host = hostTable.get(hostname);
            if (host == null) {
                host = new Host(hostname);
                hostTable.put(hostname, host);
                hosts.add(host);/*from  w ww.  j  a v  a  2s . c o m*/
            }
            host.splits.add(split);
            split.locations.add(host);
        }
    }
}

From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    final Path file = split.getPath();
    initialize(job, split.getStart(), split.getLength(), file);
}

From source file:chaohBIM.ZipFileRecordReader.java

License:Apache License

/**
 * Initialise and open the ZIP file from the FileSystem
 *///www .  j a  v a 2  s. c om
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);

    zipfilename = path.getName().replaceAll(".zip", "");
    //System.out.println(zipfilename);
}

From source file:clustering.link_back.step1.SetKeyMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    Path filePath = fileSplit.getPath();
    this.joinOrder = filePath.toString().contains("mst") ? 1 : 2;
}

From source file:clustering.link_back.step2.SetKeyMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    FileSplit fileSplit = (FileSplit) context.getInputSplit();
    Path filePath = fileSplit.getPath();
    this.joinOrder = filePath.toString().contains("step1") ? 1 : 2;
}

From source file:cn.uc.hadoop.mapreduce.lib.input.FileNameLineRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();//from w  ww  . j  av a2  s .c om
    end = start + split.getLength();
    final Path file = split.getPath();
    //ADD by qiujw key??
    key = new Text(file.getName());

    compressionCodecs = new CompressionCodecFactory(job);
    codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    if (isCompressedInput()) {
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(cIn, job);
            } else {
                in = new LineReader(cIn, job, this.recordDelimiterBytes);
            }

            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            if (null == this.recordDelimiterBytes) {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job);
            } else {
                in = new LineReader(codec.createInputStream(fileIn, decompressor), job,
                        this.recordDelimiterBytes);
            }
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }

        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}