List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:hdfs.hdfsadapter.XmlInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { try {//from www . ja va2 s. co m return new XmlRecordReader((FileSplit) split, context.getConfiguration()); } catch (IOException ioe) { return null; } }
From source file:hdfs.hdfsadapter.XmlInputFormatOneBufferSolution.java
License:Apache License
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { try {// w ww . j a v a 2 s.c o m STARTING_TAG = context.getConfiguration().get("start_tag"); ENDING_TAG = context.getConfiguration().get("end_tag"); return new XmlRecordReader((FileSplit) split, context.getConfiguration()); } catch (IOException ioe) { return null; } }
From source file:hdfs.hdfsadapter.XmlInputFormatTwoBufferSolution.java
License:Apache License
@Override public RecordReader<LongWritable, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { try {//from ww w .j a v a2s . co m END_TAG = context.getConfiguration().get("end_tag"); START_TAG = context.getConfiguration().get("start_tag"); return new XmlRecordReader((FileSplit) split, context.getConfiguration()); } catch (IOException ioe) { return null; } }
From source file:ilps.hadoop.ThriftFileInputFormat.java
License:Apache License
@Override public RecordReader<Text, StreamItemWritable> createRecordReader(InputSplit split, TaskAttemptContext tac) throws IOException, InterruptedException { return new ThriftRecordReader((FileSplit) split, tac.getConfiguration()); }
From source file:ilps.hadoop.ThriftRecordReader.java
License:Apache License
/** * Boilerplate initialization code for file input streams. * /*from ww w . j a va 2 s .c o m*/ * Tuan - Add the .xz decompressor here * */ @Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { conf = context.getConfiguration(); fileSplit = (FileSplit) split; start = fileSplit.getStart(); length = fileSplit.getLength(); position = 0; Path path = fileSplit.getPath(); FileSystem fs = path.getFileSystem(conf); // Some files are corrupted, report them and move on try { fis = fs.open(path); bis = new BufferedInputStream(fis); xzis = new XZCompressorInputStream(bis); transport = new TIOStreamTransport(xzis); } catch (IOException e) { LOG.error("Bad file: ", path.toString()); e.printStackTrace(); } try { if (transport != null) transport.open(); // Skip this file else { fis = null; return; } } catch (TTransportException e) { e.printStackTrace(); throw new IOException(e); } factory = new TBinaryProtocol.Factory(); tp = factory.getProtocol(transport); value = new StreamItemWritable(factory); }
From source file:input_format.HFileRecordReaderBufferedScan.java
License:Open Source License
/** * Initializes the reader./*from w w w .j ava 2 s . c o m*/ * * @param inputsplit The split to work with. * @param context The current task context. * @throws IOException When setting up the reader fails. * @throws InterruptedException When the job is aborted. * @see org.apache.hadoop.mapreduce.RecordReader#initialize( * org.apache.hadoop.mapreduce.InputSplit, * org.apache.hadoop.mapreduce.TaskAttemptContext) */ @Override public void initialize(InputSplit inputsplit, TaskAttemptContext context) throws IOException, InterruptedException { tsplit = (TableColumnSplit) inputsplit; scan = new Scan(); /*byte[] rowid =tsplit.getStartRow(); byte[] startr = new byte[19]; byte[] stopr = new byte[19]; for (int i = 0; i < rowid.length; i++) { startr[i] =rowid[i]; stopr[i] =rowid[i]; } if (rowid.length==18) { startr[18] =(byte)0; stopr[18] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; } if (rowid.length==10) { for (int i = 10; i < startr.length-1; i++) { startr[i] =(byte)0; stopr[i] =(byte)255; } startr[startr.length-1] =(byte)0; stopr[startr.length-1] =(byte)MyNewTotalOrderPartitioner.MAX_HBASE_BUCKETS; }*/ scan.setStartRow(tsplit.getStartRow()); scan.setStopRow(tsplit.getStopRow()); //scan.setCaching(1);//50 scan.setBatch(11000); byte[] a, bid = null; a = Bytes.toBytes("A"); bid = new byte[a.length]; for (int i = 0; i < a.length; i++) { bid[i] = a[i]; } //System.out.println(Bytes.toStringBinary(bid)); scan.addFamily(bid); HTable table = new HTable(HBconf, tsplit.getTable()); resultScanner = table.getScanner(scan); //System.out.println(Bytes.toStringBinary(scan.getStartRow())); //System.out.println(Bytes.toStringBinary(scan.getStopRow())); /* System.out.println(Bytes.toString(Bytes.toBytes(scan.getInputColumns()))); Get get = new Get(scan.getStartRow()); Result re; System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii"); while((re = resultScanner.next())!=null){ System.out.println("o"); System.out.println(re.size()); //System.out.println(String.format("%s$$%s ", var1, Bytes.toString(list.next().getQualifier()))); } System.exit(1);*/ result = resultScanner.next(); more = false; if (result == null) { empty = true; } else { more = true; list = result.list().iterator(); kv = list.next(); } Configuration conf = context.getConfiguration(); String newjoinVars = conf.get("input.patId"); String joinVars = newjoinVars.split(tsplit.getFname())[1]; joinVars = joinVars.substring(0, joinVars.indexOf("$$") - 1); String vars = tsplit.getVars(); StringTokenizer vtok = new StringTokenizer(vars); varsno = 0; while (vtok.hasMoreTokens()) { vtok.nextToken(); varsno++; } if (varsno == 1) { StringTokenizer vtok2 = new StringTokenizer(vars); v1 = vtok2.nextToken(); } else if (varsno == 2) { StringTokenizer vtok2 = new StringTokenizer(vars); v1 = vtok2.nextToken(); v2 = vtok2.nextToken(); } }
From source file:input_format.MultiHFileOutputFormat.java
License:Open Source License
public RecordWriter<ImmutableBytesWritable, KeyValue> getRecordWriter(final TaskAttemptContext context) throws IOException, InterruptedException { return new RecordWriter<ImmutableBytesWritable, KeyValue>() { @Override//from w w w . ja v a 2s . c o m public void close(TaskAttemptContext context) throws IOException, InterruptedException { for (RecordWriter<ImmutableBytesWritable, KeyValue> writer : writers.values()) { writer.close(context); } } @Override public void write(ImmutableBytesWritable key, KeyValue value) throws IOException, InterruptedException { RecordWriter<ImmutableBytesWritable, KeyValue> writer = writers.get(key); if (writer == null) { final Path outputPath = new Path( FileOutputFormat.getOutputPath(context).toString() + "/" + Bytes.toString(key.get())); writer = new RecordWriter<ImmutableBytesWritable, KeyValue>() { final FileOutputCommitter committer = new FileOutputCommitter(outputPath, context); final Path outputdir = committer.getWorkPath(); final Configuration conf = context.getConfiguration(); final FileSystem fs = outputdir.getFileSystem(conf); final long maxsize = conf.getLong("hbase.hregion.max.filesize", HConstants.DEFAULT_MAX_FILE_SIZE); final int blocksize = conf.getInt("hfile.min.blocksize.size", HFile.DEFAULT_BLOCKSIZE); // Invented config. Add to hbase-*.xml if other than default compression. final String compression = conf.get("hfile.compression", Compression.Algorithm.NONE.getName()); // Map of families to writers and how much has been output on the writer. final Map<byte[], WriterLength> writers = new TreeMap<byte[], WriterLength>( Bytes.BYTES_COMPARATOR); byte[] previousRow = HConstants.EMPTY_BYTE_ARRAY; final byte[] now = Bytes.toBytes(System.currentTimeMillis()); boolean rollRequested = false; public void write(ImmutableBytesWritable row, KeyValue kv) throws IOException { // null input == user explicitly wants to flush if (row == null && kv == null) { rollWriters(); return; } byte[] rowKey = kv.getRow(); long length = kv.getLength(); byte[] family = kv.getFamily(); WriterLength wl = this.writers.get(family); // If this is a new column family, verify that the directory exists if (wl == null) { fs.mkdirs(new Path(outputdir, Bytes.toString(family))); } // If any of the HFiles for the column families has reached // maxsize, we need to roll all the writers if (wl != null && wl.written + length >= maxsize) { this.rollRequested = true; } // This can only happen once a row is finished though if (rollRequested && Bytes.compareTo(this.previousRow, rowKey) != 0) { rollWriters(); } // create a new HLog writer, if necessary if (wl == null || wl.writer == null) { wl = getNewWriter(family); } // we now have the proper HLog writer. full steam ahead kv.updateLatestStamp(this.now); wl.writer.append(kv); wl.written += length; // Copy the row so we know when a row transition. this.previousRow = rowKey; } private void rollWriters() throws IOException { for (WriterLength wl : this.writers.values()) { if (wl.writer != null) { close(wl.writer); } wl.writer = null; wl.written = 0; } this.rollRequested = false; } private HFile.Writer getNewWriter(final HFile.Writer writer, final Path familydir, Configuration conf) throws IOException { if (writer != null) { close(writer); } return HFile.getWriterFactory(conf).createWriter(fs, StoreFile.getUniqueFile(fs, familydir), blocksize, compression, KeyValue.KEY_COMPARATOR); // return new HFile.Writer(fs, StoreFile.getUniqueFile(fs, familydir), // blocksize, compression, KeyValue.KEY_COMPARATOR); } private WriterLength getNewWriter(byte[] family) throws IOException { WriterLength wl = new WriterLength(); Path familydir = new Path(outputdir, Bytes.toString(family)); wl.writer = getNewWriter(wl.writer, familydir, conf); this.writers.put(family, wl); return wl; } private void close(final HFile.Writer w) throws IOException { if (w != null) { w.appendFileInfo(StoreFile.BULKLOAD_TIME_KEY, Bytes.toBytes(System.currentTimeMillis())); w.appendFileInfo(StoreFile.BULKLOAD_TASK_KEY, Bytes.toBytes(context.getTaskAttemptID().toString())); w.appendFileInfo(StoreFile.MAJOR_COMPACTION_KEY, Bytes.toBytes(true)); w.close(); } } public void close(TaskAttemptContext c) throws IOException, InterruptedException { for (WriterLength wl : this.writers.values()) { close(wl.writer); } committer.commitTask(c); } }; writers.put(key, writer); } writer.write(new ImmutableBytesWritable(value.getRow()), value); } }; }
From source file:InvertedIndex.NLineRecordReader.java
License:Apache License
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.job = job; this.context = context; this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart();//from www . j a v a 2 s .c o m end = start + split.getLength(); final Path file = split.getPath(); this.path = file; this.length = split.getLength(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { if (0 == split.getLength() && job.getBoolean("mapred.ignore.badcompress", false)) { if (null != context && context instanceof TaskInputOutputContext) { ((TaskInputOutputContext) context).getCounter("Input Counter", "Gzip File length is zero") .increment(1); } if (null != this.path) { LOG.warn("Skip 0-length Zip file: " + this.path.toString()); } in = new NLineReader(fileIn, job); } else { try { in = new NLineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } catch (IOException e) { if (isIgnoreBadCompress(job, e)) { in = new NLineReader(fileIn, job); end = start; LOG.warn("Skip Bad Compress File: " + this.path.toString()); LOG.warn("initialize line read error", e); ((TaskInputOutputContext) context).getCounter("Input Counter", "Skip Bad Zip File") .increment(1); ((TaskInputOutputContext) context).getCounter("Input Counter", "Total Skip Bad Zip Length") .increment(this.length); } else { throw e; } } } } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new NLineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:io.amient.kafka.hadoop.io.MultiOutputFormat.java
License:Apache License
public RecordWriter<MsgMetadataWritable, BytesWritable> getRecordWriter(TaskAttemptContext context) throws IOException { final TaskAttemptContext taskContext = context; final Configuration conf = context.getConfiguration(); final boolean isCompressed = getCompressOutput(context); String ext = ""; CompressionCodec gzipCodec = null;//from w w w . jav a 2s. c o m if (isCompressed) { Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(context, GzipCodec.class); gzipCodec = ReflectionUtils.newInstance(codecClass, conf); ext = ".gz"; } final CompressionCodec codec = gzipCodec; final String extension = ext; final String pathFormat = conf.get(CONFIG_PATH_FORMAT, "'{T}/{P}'"); log.info("Using path format: " + pathFormat); final SimpleDateFormat timeFormat = new SimpleDateFormat(pathFormat); timeFormat.setTimeZone(TimeZone.getTimeZone("UTC")); final DecimalFormat offsetFormat = new DecimalFormat("0000000000000000000"); final boolean hasTS = HadoopJobMapper.isTimestampExtractorConfigured(conf); return new RecordWriter<MsgMetadataWritable, BytesWritable>() { TreeMap<String, RecordWriter<Void, BytesWritable>> recordWriters = new TreeMap<>(); Path prefixPath = ((FileOutputCommitter) getOutputCommitter(taskContext)).getWorkPath(); public void write(MsgMetadataWritable key, BytesWritable value) throws IOException { if (hasTS && key.getTimestamp() == null) { //extractor didn't wish to throw exception so skipping this record return; } String P = String.valueOf(key.getSplit().getPartition()); String T = key.getSplit().getTopic(); String suffixPath = hasTS ? timeFormat.format(key.getTimestamp()) : pathFormat.replaceAll("'", ""); suffixPath = suffixPath.replace("{T}", T); suffixPath = suffixPath.replace("{P}", P); suffixPath += "/" + T + "-" + P + "-" + offsetFormat.format(key.getSplit().getStartOffset()); suffixPath += extension; RecordWriter<Void, BytesWritable> rw = this.recordWriters.get(suffixPath); try { if (rw == null) { Path file = new Path(prefixPath, suffixPath); FileSystem fs = file.getFileSystem(conf); FSDataOutputStream fileOut = fs.create(file, false); if (isCompressed) { rw = new LineRecordWriter(new DataOutputStream(codec.createOutputStream(fileOut))); } else { rw = new LineRecordWriter(fileOut); } this.recordWriters.put(suffixPath, rw); } rw.write(null, value); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { Iterator<String> keys = this.recordWriters.keySet().iterator(); while (keys.hasNext()) { RecordWriter<Void, BytesWritable> rw = this.recordWriters.get(keys.next()); rw.close(context); } this.recordWriters.clear(); } }; }
From source file:io.covert.dns.collection.DnsRequestInputFormat.java
License:Apache License
@Override public RecordReader<Text, DnsRequest> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); int dclass = DClass.value(conf.get("dns.request.dclass", "IN")); List<String> subdomains = Arrays.asList(conf.get("dns.requests.subdomains", "").split(",")); List<Integer> types = new LinkedList<Integer>(); for (String type : conf.get("dns.request.types", "A").split(",")) types.add(Type.value(type)); return new DnsRequestRecordReader(subdomains, types, dclass); }