Example usage for org.apache.hadoop.mapred FileSplit toString

List of usage examples for org.apache.hadoop.mapred FileSplit toString

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileSplit toString.

Prototype

public String toString() 

Source Link

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader)
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}

From source file:com.blm.orc.VectorizedOrcInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit inputSplit, JobConf conf,
        Reporter reporter) throws IOException {
    FileSplit fSplit = (FileSplit) inputSplit;
    reporter.setStatus(fSplit.toString());

    Path path = fSplit.getPath();

    OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf);
    if (fSplit instanceof OrcSplit) {
        OrcSplit orcSplit = (OrcSplit) fSplit;
        if (orcSplit.hasFooter()) {
            opts.fileMetaInfo(orcSplit.getFileMetaInfo());
        }/* w w  w . ja va  2s  .  c  o  m*/
    }
    Reader reader = OrcFile.createReader(path, opts);
    return new VectorizedOrcRecordReader(reader, conf, fSplit);
}

From source file:com.facebook.hive.orc.OrcInputFormat.java

License:Open Source License

@Override
public RecordReader<NullWritable, OrcLazyRow> getRecordReader(InputSplit inputSplit, JobConf conf,
        Reporter reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    reporter.setStatus(fileSplit.toString());

    return new OrcRecordReader(OrcFile.createReader(fs, path, conf), conf, fileSplit.getStart(),
            fileSplit.getLength());/*w w  w  . ja v a 2s  .  co m*/
}

From source file:org.apache.trevni.avro.AvroTrevniInputFormat.java

License:Apache License

@Override
public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job,
        Reporter reporter) throws IOException {
    final FileSplit file = (FileSplit) split;
    reporter.setStatus(file.toString());

    final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
    params.setModel(ReflectData.get());/*from  w w  w.  j  a  v  a  2  s .com*/
    if (job.get(AvroJob.INPUT_SCHEMA) != null)
        params.setSchema(AvroJob.getInputSchema(job));

    return new RecordReader<AvroWrapper<T>, NullWritable>() {
        private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
        private float rows = reader.getRowCount();
        private long row;

        public AvroWrapper<T> createKey() {
            return new AvroWrapper<T>(null);
        }

        public NullWritable createValue() {
            return NullWritable.get();
        }

        public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            if (!reader.hasNext())
                return false;
            wrapper.datum(reader.next());
            row++;
            return true;
        }

        public float getProgress() throws IOException {
            return row / rows;
        }

        public long getPos() throws IOException {
            return row;
        }

        public void close() throws IOException {
            reader.close();
        }

    };

}

From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader) 
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}