Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:ml.shifu.shifu.core.mr.input.CombineRecordReader.java

License:Apache License

private void initializeOne(TaskAttemptContext context, FileSplit split) throws IOException {
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//w w w  .  j  a  va  2s  .  co  m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(codec.createInputStream(fileIn), job);
        } else {
            in = new LineReader(codec.createInputStream(fileIn), job, this.recordDelimiterBytes);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:mvm.rya.accumulo.mr.utils.AccumuloHDFSFileInputFormat.java

License:Apache License

@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    return new RecordReader<Key, Value>() {

        private FileSKVIterator fileSKVIterator;

        @Override//from   w  ww .  j  a  va  2s .  co  m
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            FileSplit split = (FileSplit) inputSplit;
            Configuration job = taskAttemptContext.getConfiguration();
            Path file = split.getPath();
            //                long start = split.getStart();
            //                long length = split.getLength();
            FileSystem fs = file.getFileSystem(job);
            //                FSDataInputStream fileIn = fs.open(file);
            //                System.out.println(start);
            //                if (start != 0L) {
            //                    fileIn.seek(start);
            //                }
            Instance instance = AccumuloProps.getInstance(taskAttemptContext);

            fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE,
                    new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
            //                fileSKVIterator = new RFileOperations2().openReader(fileIn, length - start, job);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            fileSKVIterator.next();
            return fileSKVIterator.hasTop();
        }

        @Override
        public Key getCurrentKey() throws IOException, InterruptedException {
            return fileSKVIterator.getTopKey();
        }

        @Override
        public Value getCurrentValue() throws IOException, InterruptedException {
            return fileSKVIterator.getTopValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            //To change body of implemented methods use File | Settings | File Templates.
        }
    };
}

From source file:mvm.rya.cloudbase.giraph.format.CloudbaseRyaVertexOutputFormat.java

License:Apache License

public VertexWriter<Text, Text, Text> createVertexWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    RecordWriter<Text, Mutation> writer = cloudbaseOutputFormat.getRecordWriter(context);
    String tableName = context.getConfiguration().get(OUTPUT_TABLE);
    if (tableName == null)
        throw new IOException("Forgot to set table name " + "using CloudbaseVertexOutputFormat.OUTPUT_TABLE");
    return new CloudbaseEdgeVertexWriter(writer, tableName);
}

From source file:net.hubs1.mahout.cluster.CRLFInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {/*w  w w  .ja v  a  2  s  .c om*/
        return new CRLFRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        log.warn("Error while creating CRLFRecordReader", ioe);
        return null;
    }
}

From source file:net.mooncloud.mapreduce.lib.db.DBInputFormat.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, Record> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return createDBRecordReader((DBInputSplit) split, context.getConfiguration());
}

From source file:net.mooncloud.mapreduce.lib.db.DBRecordReader.java

License:Apache License

public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // do nothing
    if (StringUtils.isBlank(schema)) {
        schema = context.getConfiguration().get("mapred.input.schema");
    }//from   w  w  w .  j a  v a2s. com
}

From source file:net.shun.mapreduce.lib.input.XmlRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    String[] beginMarks = job.getStrings("mapred.xmlrecordreader.begin", "<page>");
    this.beginMark = beginMarks[0];
    String[] endMarks = job.getStrings("mapred.xmlrecordreader.begin", "</page>");
    this.endMark = endMarks[0];

    start = split.getStart();/*from  w w  w . j ava  2 s . c  om*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    fileIn.seek(start);
    in = new BufferedInputStream(fileIn);
    /*
    boolean skipFirstLine = false;
    if (codec != null) {
      in = new LineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
      if (start != 0) {
        skipFirstLine = true;
        --start;
        fileIn.seek(start);
      }
      in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) {  // skip first line and re-establish "start".
      start += in.readLine(new Text(), 0,
                   (int)Math.min((long)Integer.MAX_VALUE, end - start));
    }
    */
    this.pos = start;
    readUntilMatch(beginMark, false, null);
}

From source file:nl.basjes.hadoop.input.ApacheHttpdLogfileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit split, final TaskAttemptContext context) throws IOException {
    lineReader.initialize(split, context);
    final Configuration conf = context.getConfiguration();

    counterLinesRead = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "1:Lines read");
    counterGoodLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "2:Good lines");
    counterBadLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "3:Bad lines");

    if (logformat == null || requestedFields.isEmpty()) {
        if (logformat == null) {
            logformat = conf.get("nl.basjes.parse.apachehttpdlogline.format", "common");
        }/*from   ww w  .  ja v  a 2s. c o  m*/
        if (requestedFields.isEmpty()) {
            String fields = conf.get("nl.basjes.parse.apachehttpdlogline.fields", null);

            if (fields != null) {
                fieldList = Arrays.asList(fields.split(","));
            }
        } else {
            fieldList = new ArrayList<>(requestedFields);
        }
    }

    if (fieldList != null) {
        if (logformat != null && parser == null) {
            parser = createParser();
        }
        for (String field : fieldList) {
            currentValue.declareRequestedFieldname(field);
        }
    }

    setupFields();
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.FastqFileRecordWriter.java

License:Open Source License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within FileRecordWriter.
 *
 * @param context The context for this task.
 *//*from  www  .  j a v  a  2  s  .c om*/
public FastqFileRecordWriter(TaskAttemptContext context) {
    this.mConf = context.getConfiguration();
    this.mOutputPath = new Path(this.mConf.get("output.dir"), "PhredCalculator.fastqc");
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java

License:Open Source License

/**
 * Override method for instantiation.//from   w  ww .jav  a 2  s.  c o  m
 *
 * @param inputSplit The InputSplit to read.
 * @param context    The context for this task.
 * @throws IOException          Returns default exception.
 * @throws InterruptedException Returns default exception.
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    // Initialize.
    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream infile = fs.open(split.getPath());

    // Use number of lines given by user and set parameters.
    this.NLINESTOPROCESS = NLineInputFormat.getNumLinesPerSplit(context);
    this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    boolean skipFirstLine = false;

    // Skip first line?
    if (this.start != 0) {
        skipFirstLine = true;
        this.start--;
        infile.seek(this.start);
    }
    this.in = new LineReader(infile, conf);
    if (skipFirstLine) {
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));
    }
    this.pos = this.start;
}