List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration
public Configuration getConfiguration();
From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetInputFormat.java
License:Apache License
@Override public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { DataSetInputSplit inputSplit = (DataSetInputSplit) split; BatchReadable<KEY, VALUE> batchReadable = getBatchReadable(context.getConfiguration()); SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit()); return new DatasetRecordReader<>(splitReader); }
From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetOutputFormat.java
License:Apache License
@Override public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context) throws IOException, InterruptedException { CloseableBatchWritable<KEY, VALUE> batchWritable = getBatchWritable(context.getConfiguration()); return new DatasetRecordWriter<>(batchWritable); }
From source file:co.cask.cdap.template.etl.common.ETLDBOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); DBConfiguration dbConf = new DBConfiguration(conf); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if (fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; }//from ww w. jav a2 s . c o m try { Connection connection = getConnection(conf); PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement) { @Override public void close(TaskAttemptContext context) throws IOException { super.close(context); try { DriverManager.deregisterDriver(driverShim); } catch (SQLException e) { throw new IOException(e); } } }; } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
From source file:co.cask.hydrator.plugin.batch.CopybookRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Get configuration Configuration conf = context.getConfiguration(); int fileStructure = net.sf.JRecord.Common.Constants.IO_FIXED_LENGTH; Path path = new Path(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH)); FileSystem fs = FileSystem.get(path.toUri(), conf); // Create input stream for the COBOL copybook contents InputStream inputStream = IOUtils .toInputStream(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_CBL_CONTENTS), "UTF-8"); BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream); try {/*from w w w. j a v a2 s .co m*/ externalRecord = CopybookIOUtils.getExternalRecord(bufferedInputStream); recordByteLength = CopybookIOUtils.getRecordLength(externalRecord, fileStructure); LineProvider lineProvider = LineIOProvider.getInstance().getLineProvider(fileStructure, CopybookIOUtils.FONT); reader = LineIOProvider.getInstance().getLineReader(fileStructure, lineProvider); LayoutDetail copybook = CopybookIOUtils.getLayoutDetail(externalRecord); org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split; start = fileSplit.getStart(); end = start + fileSplit.getLength(); BufferedInputStream fileIn = new BufferedInputStream(fs.open(fileSplit.getPath())); // Jump to the point in the split at which the first complete record of the split starts, // if not the first InputSplit if (start != 0) { position = start - (start % recordByteLength) + recordByteLength; fileIn.skip(position); } reader.open(fileIn, copybook); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:co.cask.hydrator.plugin.batch.source.XMLInputFormat.java
License:Apache License
@Override public RecordReader<LongWritable, Map<String, String>> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException { return new XMLRecordReader((FileSplit) split, context.getConfiguration()); }
From source file:co.cask.hydrator.plugin.db.batch.sink.ETLDBOutputFormat.java
License:Apache License
@Override public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { Configuration conf = context.getConfiguration(); DBConfiguration dbConf = new DBConfiguration(conf); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if (fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; }// w ww .j a v a2 s . c o m try { Connection connection = getConnection(conf); PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement) { private boolean emptyData = true; //Implementation of the close method below is the exact implementation in DBOutputFormat except that //we check if there is any data to be written and if not, we skip executeBatch call. //There might be reducers that don't receive any data and thus this check is necessary to prevent //empty data to be committed (since some Databases doesn't support that). @Override public void close(TaskAttemptContext context) throws IOException { try { if (!emptyData) { getStatement().executeBatch(); getConnection().commit(); } } catch (SQLException e) { try { getConnection().rollback(); } catch (SQLException ex) { LOG.warn(StringUtils.stringifyException(ex)); } throw new IOException(e.getMessage()); } finally { try { getStatement().close(); getConnection().close(); } catch (SQLException ex) { throw new IOException(ex.getMessage()); } } try { DriverManager.deregisterDriver(driverShim); } catch (SQLException e) { throw new IOException(e); } } @Override public void write(K key, V value) throws IOException { super.write(key, value); emptyData = false; } }; } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
From source file:co.nubetech.apache.hadoop.DBInputFormat.java
License:Apache License
/** {@inheritDoc} */ @SuppressWarnings("unchecked") public RecordReader<LongWritable, T> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { return createDBRecordReader((DBInputSplit) split, context.getConfiguration()); }
From source file:co.nubetech.apache.hadoop.DBOutputFormat.java
License:Apache License
/** {@inheritDoc} */ public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException { DBConfiguration dbConf = new DBConfiguration(context.getConfiguration()); String tableName = dbConf.getOutputTableName(); String[] fieldNames = dbConf.getOutputFieldNames(); if (fieldNames == null) { fieldNames = new String[dbConf.getOutputFieldCount()]; }/* ww w. j a v a2 s. c o m*/ try { Connection connection = dbConf.getConnection(); PreparedStatement statement = null; statement = connection.prepareStatement(constructQuery(tableName, fieldNames)); return new DBRecordWriter(connection, statement); } catch (Exception ex) { throw new IOException(ex.getMessage()); } }
From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java
License:Apache License
/** * /*from w w w . ja v a 2 s .co m*/ * @param delimiter * @param column * * */ @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF); this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0); this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE); start = split.getStart(); end = start + split.getLength(); final Path file = split.getPath(); compressionCodecs = new CompressionCodecFactory(job); final CompressionCodec codec = compressionCodecs.getCodec(file); // open the file and seek to the start of the split FileSystem fs = file.getFileSystem(job); FSDataInputStream fileIn = fs.open(split.getPath()); boolean skipFirstLine = false; if (codec != null) { in = new LineReader(codec.createInputStream(fileIn), job); end = Long.MAX_VALUE; } else { if (start != 0) { skipFirstLine = true; --start; fileIn.seek(start); } in = new LineReader(fileIn, job); } if (skipFirstLine) { // skip first line and re-establish "start". start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start)); } this.pos = start; }
From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java
License:Apache License
@Override public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) { logger.debug("Delimiter is: " + context.getConfiguration().get(DELIMITER_CONF)); logger.debug("Column is: " + context.getConfiguration().getInt(COLUMN_CONF, 0)); return new DelimitedLineRecordReader(); }