Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:ml.shifu.shifu.core.mr.input.CombineRecordReader.java

License:Apache License

private void initializeOne(TaskAttemptContext context, FileSplit split) throws IOException {
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();//w w w  .  j  a  va  2s  .  co  m
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(codec.createInputStream(fileIn), job);
        } else {
            in = new LineReader(codec.createInputStream(fileIn), job, this.recordDelimiterBytes);
        }
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        if (null == this.recordDelimiterBytes) {
            in = new LineReader(fileIn, job);
        } else {
            in = new LineReader(fileIn, job, this.recordDelimiterBytes);
        }
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:mvm.rya.accumulo.mr.utils.AccumuloHDFSFileInputFormat.java

License:Apache License

@Override
public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    return new RecordReader<Key, Value>() {

        private FileSKVIterator fileSKVIterator;

        @Override//from   w  ww .  j  a  va  2s .  co  m
        public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
                throws IOException, InterruptedException {
            FileSplit split = (FileSplit) inputSplit;
            Configuration job = taskAttemptContext.getConfiguration();
            Path file = split.getPath();
            //                long start = split.getStart();
            //                long length = split.getLength();
            FileSystem fs = file.getFileSystem(job);
            //                FSDataInputStream fileIn = fs.open(file);
            //                System.out.println(start);
            //                if (start != 0L) {
            //                    fileIn.seek(start);
            //                }
            Instance instance = AccumuloProps.getInstance(taskAttemptContext);

            fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE,
                    new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration());
            //                fileSKVIterator = new RFileOperations2().openReader(fileIn, length - start, job);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            fileSKVIterator.next();
            return fileSKVIterator.hasTop();
        }

        @Override
        public Key getCurrentKey() throws IOException, InterruptedException {
            return fileSKVIterator.getTopKey();
        }

        @Override
        public Value getCurrentValue() throws IOException, InterruptedException {
            return fileSKVIterator.getTopValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return 0;
        }

        @Override
        public void close() throws IOException {
            //To change body of implemented methods use File | Settings | File Templates.
        }
    };
}

From source file:mvm.rya.cloudbase.giraph.format.CloudbaseRyaVertexOutputFormat.java

License:Apache License

public VertexWriter<Text, Text, Text> createVertexWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    RecordWriter<Text, Mutation> writer = cloudbaseOutputFormat.getRecordWriter(context);
    String tableName = context.getConfiguration().get(OUTPUT_TABLE);
    if (tableName == null)
        throw new IOException("Forgot to set table name " + "using CloudbaseVertexOutputFormat.OUTPUT_TABLE");
    return new CloudbaseEdgeVertexWriter(writer, tableName);
}

From source file:net.hubs1.mahout.cluster.CRLFInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    try {/*w  w w  .ja v  a  2  s  .c om*/
        return new CRLFRecordReader((FileSplit) split, context.getConfiguration());
    } catch (IOException ioe) {
        log.warn("Error while creating CRLFRecordReader", ioe);
        return null;
    }
}

From source file:net.mooncloud.mapreduce.lib.db.DBInputFormat.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, Record> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return createDBRecordReader((DBInputSplit) split, context.getConfiguration());
}

From source file:net.mooncloud.mapreduce.lib.db.DBRecordReader.java

License:Apache License

public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // do nothing
    if (StringUtils.isBlank(schema)) {
        schema = context.getConfiguration().get("mapred.input.schema");
    }//from   w  w  w .  j a  v a2s. com
}

From source file:net.shun.mapreduce.lib.input.XmlRecordReader.java

License:Apache License

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    String[] beginMarks = job.getStrings("mapred.xmlrecordreader.begin", "<page>");
    this.beginMark = beginMarks[0];
    String[] endMarks = job.getStrings("mapred.xmlrecordreader.begin", "</page>");
    this.endMark = endMarks[0];

    start = split.getStart();/*from  w w  w . j ava  2 s . c  om*/
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    fileIn.seek(start);
    in = new BufferedInputStream(fileIn);
    /*
    boolean skipFirstLine = false;
    if (codec != null) {
      in = new LineReader(codec.createInputStream(fileIn), job);
      end = Long.MAX_VALUE;
    } else {
      if (start != 0) {
        skipFirstLine = true;
        --start;
        fileIn.seek(start);
      }
      in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) {  // skip first line and re-establish "start".
      start += in.readLine(new Text(), 0,
                   (int)Math.min((long)Integer.MAX_VALUE, end - start));
    }
    */
    this.pos = start;
    readUntilMatch(beginMark, false, null);
}

From source file:nl.basjes.hadoop.input.ApacheHttpdLogfileRecordReader.java

License:Apache License

@Override
public void initialize(final InputSplit split, final TaskAttemptContext context) throws IOException {
    lineReader.initialize(split, context);
    final Configuration conf = context.getConfiguration();

    counterLinesRead = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "1:Lines read");
    counterGoodLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "2:Good lines");
    counterBadLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "3:Bad lines");

    if (logformat == null || requestedFields.isEmpty()) {
        if (logformat == null) {
            logformat = conf.get("nl.basjes.parse.apachehttpdlogline.format", "common");
        }/*from   ww w  .  ja v  a 2s. c o  m*/
        if (requestedFields.isEmpty()) {
            String fields = conf.get("nl.basjes.parse.apachehttpdlogline.fields", null);

            if (fields != null) {
                fieldList = Arrays.asList(fields.split(","));
            }
        } else {
            fieldList = new ArrayList<>(requestedFields);
        }
    }

    if (fieldList != null) {
        if (logformat != null && parser == null) {
            parser = createParser();
        }
        for (String field : fieldList) {
            currentValue.declareRequestedFieldname(field);
        }
    }

    setupFields();
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.FastqFileRecordWriter.java

License:Open Source License

/**
 * Implementation detail: This constructor is built to be called via
 * reflection from within FileRecordWriter.
 *
 * @param context The context for this task.
 *//*from  www  .  j a v  a  2  s  .c om*/
public FastqFileRecordWriter(TaskAttemptContext context) {
    this.mConf = context.getConfiguration();
    this.mOutputPath = new Path(this.mConf.get("output.dir"), "PhredCalculator.fastqc");
}

From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java

License:Open Source License

/**
 * Override method for instantiation.//from   w  ww .jav  a 2  s.  c o  m
 *
 * @param inputSplit The InputSplit to read.
 * @param context    The context for this task.
 * @throws IOException          Returns default exception.
 * @throws InterruptedException Returns default exception.
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    // Initialize.
    Configuration conf = context.getConfiguration();
    FileSplit split = (FileSplit) inputSplit;
    Path file = split.getPath();
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream infile = fs.open(split.getPath());

    // Use number of lines given by user and set parameters.
    this.NLINESTOPROCESS = NLineInputFormat.getNumLinesPerSplit(context);
    this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE);
    this.start = split.getStart();
    this.end = this.start + split.getLength();
    boolean skipFirstLine = false;

    // Skip first line?
    if (this.start != 0) {
        skipFirstLine = true;
        this.start--;
        infile.seek(this.start);
    }
    this.in = new LineReader(infile, conf);
    if (skipFirstLine) {
        this.start += this.in.readLine(new Text(), 0,
                (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start));
    }
    this.pos = this.start;
}