List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:ml.shifu.shifu.core.mr.input.CombineRecordReader.java
License:Apache License
private void initializeOne(TaskAttemptContext context, FileSplit split) throws IOException { Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//w w w . j a va 2s . co m end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { if (null == this.recordDelimiterBytes) { in = new LineReader(codec.createInputStream(fileIn), job); } else { in = new LineReader(codec.createInputStream(fileIn), job, this.recordDelimiterBytes); } end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } if (null == this.recordDelimiterBytes) { in = new LineReader(fileIn, job); } else { in = new LineReader(fileIn, job, this.recordDelimiterBytes); } } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:mvm.rya.accumulo.mr.utils.AccumuloHDFSFileInputFormat.java
License:Apache License
@Override public RecordReader<Key, Value> createRecordReader(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { return new RecordReader<Key, Value>() { private FileSKVIterator fileSKVIterator; @Override//from w ww . j a va 2s . co m public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration job = taskAttemptContext.getConfiguration(); Path file = split.getPath(); // long start = split.getStart(); // long length = split.getLength(); FileSystem fs = file.getFileSystem(job); // FSDataInputStream fileIn = fs.open(file); // System.out.println(start); // if (start != 0L) { // fileIn.seek(start); // } Instance instance = AccumuloProps.getInstance(taskAttemptContext); fileSKVIterator = RFileOperations.getInstance().openReader(file.toString(), ALLRANGE, new HashSet<ByteSequence>(), false, fs, job, instance.getConfiguration()); // fileSKVIterator = new RFileOperations2().openReader(fileIn, length - start, job); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { fileSKVIterator.next(); return fileSKVIterator.hasTop(); } @Override public Key getCurrentKey() throws IOException, InterruptedException { return fileSKVIterator.getTopKey(); } @Override public Value getCurrentValue() throws IOException, InterruptedException { return fileSKVIterator.getTopValue(); } @Override public float getProgress() throws IOException, InterruptedException { return 0; } @Override public void close() throws IOException { //To change body of implemented methods use File | Settings | File Templates. } }; }
From source file:mvm.rya.cloudbase.giraph.format.CloudbaseRyaVertexOutputFormat.java
License:Apache License
public VertexWriter<Text, Text, Text> createVertexWriter(TaskAttemptContext context) throws IOException, InterruptedException { RecordWriter<Text, Mutation> writer = cloudbaseOutputFormat.getRecordWriter(context); String tableName = context.getConfiguration().get(OUTPUT_TABLE); if (tableName == null) throw new IOException("Forgot to set table name " + "using CloudbaseVertexOutputFormat.OUTPUT_TABLE"); return new CloudbaseEdgeVertexWriter(writer, tableName); }
From source file:net.hubs1.mahout.cluster.CRLFInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { try {/*w w w .ja v a 2 s .c om*/ return new CRLFRecordReader((FileSplit) split, context.getConfiguration()); } catch (IOException ioe) { log.warn("Error while creating CRLFRecordReader", ioe); return null; } }
From source file:net.mooncloud.mapreduce.lib.db.DBInputFormat.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") public RecordReader<LongWritable, Record> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return createDBRecordReader((DBInputSplit) split, context.getConfiguration()); }
From source file:net.mooncloud.mapreduce.lib.db.DBRecordReader.java
License:Apache License
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // do nothing if (StringUtils.isBlank(schema)) { schema = context.getConfiguration().get("mapred.input.schema"); }//from w w w . j a v a2s. com }
From source file:net.shun.mapreduce.lib.input.XmlRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); String[] beginMarks = job.getStrings("mapred.xmlrecordreader.begin", "<page>"); this.beginMark = beginMarks[0]; String[] endMarks = job.getStrings("mapred.xmlrecordreader.begin", "</page>"); this.endMark = endMarks[0]; start = split.getStart();/*from w w w . j ava 2 s . c om*/ end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); fileIn.seek(start); in = new BufferedInputStream(fileIn); /* boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int)Math.min((long)Integer.MAX_VALUE, end - start)); } */ this.pos = start; readUntilMatch(beginMark, false, null); }
From source file:nl.basjes.hadoop.input.ApacheHttpdLogfileRecordReader.java
License:Apache License
@Override public void initialize(final InputSplit split, final TaskAttemptContext context) throws IOException { lineReader.initialize(split, context); final Configuration conf = context.getConfiguration(); counterLinesRead = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "1:Lines read"); counterGoodLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "2:Good lines"); counterBadLines = context.getCounter(HTTPD_LOGFILE_INPUT_FORMAT, "3:Bad lines"); if (logformat == null || requestedFields.isEmpty()) { if (logformat == null) { logformat = conf.get("nl.basjes.parse.apachehttpdlogline.format", "common"); }/*from ww w . ja v a 2s. c o m*/ if (requestedFields.isEmpty()) { String fields = conf.get("nl.basjes.parse.apachehttpdlogline.fields", null); if (fields != null) { fieldList = Arrays.asList(fields.split(",")); } } else { fieldList = new ArrayList<>(requestedFields); } } if (fieldList != null) { if (logformat != null && parser == null) { parser = createParser(); } for (String field : fieldList) { currentValue.declareRequestedFieldname(field); } } setupFields(); }
From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.FastqFileRecordWriter.java
License:Open Source License
/** * Implementation detail: This constructor is built to be called via * reflection from within FileRecordWriter. * * @param context The context for this task. *//*from www . j a v a 2 s .c om*/ public FastqFileRecordWriter(TaskAttemptContext context) { this.mConf = context.getConfiguration(); this.mOutputPath = new Path(this.mConf.get("output.dir"), "PhredCalculator.fastqc"); }
From source file:nl.bioinf.wvanhelvoirt.HadoopPhredCalculator.NReadRecordReader.java
License:Open Source License
/** * Override method for instantiation.//from w ww .jav a 2 s. c o m * * @param inputSplit The InputSplit to read. * @param context The context for this task. * @throws IOException Returns default exception. * @throws InterruptedException Returns default exception. */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { // Initialize. Configuration conf = context.getConfiguration(); FileSplit split = (FileSplit) inputSplit; Path file = split.getPath(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream infile = fs.open(split.getPath()); // Use number of lines given by user and set parameters. this.NLINESTOPROCESS = NLineInputFormat.getNumLinesPerSplit(context); this.maxLineLength = conf.getInt("mapreduce.input.linerecordreader.line.maxlength", Integer.MAX_VALUE); this.start = split.getStart(); this.end = this.start + split.getLength(); boolean skipFirstLine = false; // Skip first line? if (this.start != 0) { skipFirstLine = true; this.start--; infile.seek(this.start); } this.in = new LineReader(infile, conf); if (skipFirstLine) { this.start += this.in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, this.end - this.start)); } this.pos = this.start; }