Example usage for org.apache.hadoop.mapred FileSplit toString

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred FileSplit toString.

Prototype

public String toString()

Source Link

Usage

From source file:StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader)
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}

From source file:com.blm.orc.VectorizedOrcInputFormat.java

License:Apache License

@Override
public RecordReader<NullWritable, VectorizedRowBatch> getRecordReader(InputSplit inputSplit, JobConf conf,
        Reporter reporter) throws IOException {
    FileSplit fSplit = (FileSplit) inputSplit;
    reporter.setStatus(fSplit.toString());

    Path path = fSplit.getPath();

    OrcFile.ReaderOptions opts = OrcFile.readerOptions(conf);
    if (fSplit instanceof OrcSplit) {
        OrcSplit orcSplit = (OrcSplit) fSplit;
        if (orcSplit.hasFooter()) {
            opts.fileMetaInfo(orcSplit.getFileMetaInfo());
        }/* w w  w . ja va  2s  .  c  o  m*/
    }
    Reader reader = OrcFile.createReader(path, opts);
    return new VectorizedOrcRecordReader(reader, conf, fSplit);
}

From source file:com.facebook.hive.orc.OrcInputFormat.java

License:Open Source License

@Override
public RecordReader<NullWritable, OrcLazyRow> getRecordReader(InputSplit inputSplit, JobConf conf,
        Reporter reporter) throws IOException {
    ReaderWriterProfiler.setProfilerOptions(conf);
    FileSplit fileSplit = (FileSplit) inputSplit;
    Path path = fileSplit.getPath();
    FileSystem fs = path.getFileSystem(conf);
    reporter.setStatus(fileSplit.toString());

    return new OrcRecordReader(OrcFile.createReader(fs, path, conf), conf, fileSplit.getStart(),
            fileSplit.getLength());/*w w  w  . ja v a 2s  .  co m*/
}

From source file:org.apache.trevni.avro.AvroTrevniInputFormat.java

License:Apache License

@Override
public RecordReader<AvroWrapper<T>, NullWritable> getRecordReader(InputSplit split, final JobConf job,
        Reporter reporter) throws IOException {
    final FileSplit file = (FileSplit) split;
    reporter.setStatus(file.toString());

    final AvroColumnReader.Params params = new AvroColumnReader.Params(new HadoopInput(file.getPath(), job));
    params.setModel(ReflectData.get());/*from  w w  w.  j  a  v  a  2  s .com*/
    if (job.get(AvroJob.INPUT_SCHEMA) != null)
        params.setSchema(AvroJob.getInputSchema(job));

    return new RecordReader<AvroWrapper<T>, NullWritable>() {
        private AvroColumnReader<T> reader = new AvroColumnReader<T>(params);
        private float rows = reader.getRowCount();
        private long row;

        public AvroWrapper<T> createKey() {
            return new AvroWrapper<T>(null);
        }

        public NullWritable createValue() {
            return NullWritable.get();
        }

        public boolean next(AvroWrapper<T> wrapper, NullWritable ignore) throws IOException {
            if (!reader.hasNext())
                return false;
            wrapper.datum(reader.next());
            row++;
            return true;
        }

        public float getProgress() throws IOException {
            return row / rows;
        }

        public long getPos() throws IOException {
            return row;
        }

        public void close() throws IOException {
            reader.close();
        }

    };

}

From source file:org.wikimedia.wikihadoop.StreamWikiDumpInputFormat.java

License:Apache License

public RecordReader<Text, Text> getRecordReader(final InputSplit genericSplit, JobConf job, Reporter reporter)
        throws IOException {
    // handling non-standard record reader (likely StreamXmlRecordReader) 
    FileSplit split = (FileSplit) genericSplit;
    LOG.info("getRecordReader start.....split=" + split);
    reporter.setStatus(split.toString());

    // Open the file and seek to the start of the split
    FileSystem fs = split.getPath().getFileSystem(job);
    String patt = job.get(KEY_EXCLUDE_PAGE_PATTERN);
    boolean prev = job.getBoolean(KEY_PREVIOUS_REVISION, true);
    return new MyRecordReader(split, reporter, job, fs,
            patt != null && !"".equals(patt) ? Pattern.compile(patt) : null, prev);
}