Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetInputFormat.java

License:Apache License

@Override
public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException {
    DataSetInputSplit inputSplit = (DataSetInputSplit) split;
    BatchReadable<KEY, VALUE> batchReadable = getBatchReadable(context.getConfiguration());
    SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit());

    return new DatasetRecordReader<>(splitReader);
}

From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    CloseableBatchWritable<KEY, VALUE> batchWritable = getBatchWritable(context.getConfiguration());
    return new DatasetRecordWriter<>(batchWritable);
}

From source file:co.cask.cdap.template.etl.common.ETLDBOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    DBConfiguration dbConf = new DBConfiguration(conf);
    String tableName = dbConf.getOutputTableName();
    String[] fieldNames = dbConf.getOutputFieldNames();

    if (fieldNames == null) {
        fieldNames = new String[dbConf.getOutputFieldCount()];
    }//from  ww  w. jav  a2  s . c  o  m

    try {
        Connection connection = getConnection(conf);
        PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
        return new DBRecordWriter(connection, statement) {
            @Override
            public void close(TaskAttemptContext context) throws IOException {
                super.close(context);
                try {
                    DriverManager.deregisterDriver(driverShim);
                } catch (SQLException e) {
                    throw new IOException(e);
                }
            }
        };
    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:co.cask.hydrator.plugin.batch.CopybookRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // Get configuration
    Configuration conf = context.getConfiguration();
    int fileStructure = net.sf.JRecord.Common.Constants.IO_FIXED_LENGTH;
    Path path = new Path(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH));
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    // Create input stream for the COBOL copybook contents
    InputStream inputStream = IOUtils
            .toInputStream(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_CBL_CONTENTS), "UTF-8");
    BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
    try {/*from  w w w. j  a v  a2  s .co  m*/
        externalRecord = CopybookIOUtils.getExternalRecord(bufferedInputStream);
        recordByteLength = CopybookIOUtils.getRecordLength(externalRecord, fileStructure);

        LineProvider lineProvider = LineIOProvider.getInstance().getLineProvider(fileStructure,
                CopybookIOUtils.FONT);
        reader = LineIOProvider.getInstance().getLineReader(fileStructure, lineProvider);
        LayoutDetail copybook = CopybookIOUtils.getLayoutDetail(externalRecord);

        org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split;

        start = fileSplit.getStart();
        end = start + fileSplit.getLength();

        BufferedInputStream fileIn = new BufferedInputStream(fs.open(fileSplit.getPath()));
        // Jump to the point in the split at which the first complete record of the split starts,
        // if not the first InputSplit
        if (start != 0) {
            position = start - (start % recordByteLength) + recordByteLength;
            fileIn.skip(position);
        }
        reader.open(fileIn, copybook);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:co.cask.hydrator.plugin.batch.source.XMLInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, Map<String, String>> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException {
    return new XMLRecordReader((FileSplit) split, context.getConfiguration());
}

From source file:co.cask.hydrator.plugin.db.batch.sink.ETLDBOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    DBConfiguration dbConf = new DBConfiguration(conf);
    String tableName = dbConf.getOutputTableName();
    String[] fieldNames = dbConf.getOutputFieldNames();

    if (fieldNames == null) {
        fieldNames = new String[dbConf.getOutputFieldCount()];
    }//  w ww .j  a v a2  s  .  c o  m

    try {
        Connection connection = getConnection(conf);
        PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
        return new DBRecordWriter(connection, statement) {

            private boolean emptyData = true;

            //Implementation of the close method below is the exact implementation in DBOutputFormat except that
            //we check if there is any data to be written and if not, we skip executeBatch call.
            //There might be reducers that don't receive any data and thus this check is necessary to prevent
            //empty data to be committed (since some Databases doesn't support that).
            @Override
            public void close(TaskAttemptContext context) throws IOException {
                try {
                    if (!emptyData) {
                        getStatement().executeBatch();
                        getConnection().commit();
                    }
                } catch (SQLException e) {
                    try {
                        getConnection().rollback();
                    } catch (SQLException ex) {
                        LOG.warn(StringUtils.stringifyException(ex));
                    }
                    throw new IOException(e.getMessage());
                } finally {
                    try {
                        getStatement().close();
                        getConnection().close();
                    } catch (SQLException ex) {
                        throw new IOException(ex.getMessage());
                    }
                }

                try {
                    DriverManager.deregisterDriver(driverShim);
                } catch (SQLException e) {
                    throw new IOException(e);
                }
            }

            @Override
            public void write(K key, V value) throws IOException {
                super.write(key, value);
                emptyData = false;
            }
        };
    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:co.nubetech.apache.hadoop.DBInputFormat.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, T> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return createDBRecordReader((DBInputSplit) split, context.getConfiguration());
}

From source file:co.nubetech.apache.hadoop.DBOutputFormat.java

License:Apache License

/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
    String tableName = dbConf.getOutputTableName();
    String[] fieldNames = dbConf.getOutputFieldNames();

    if (fieldNames == null) {
        fieldNames = new String[dbConf.getOutputFieldCount()];
    }/*  ww w. j a  v a2  s.  c o m*/

    try {
        Connection connection = dbConf.getConnection();
        PreparedStatement statement = null;

        statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
        return new DBRecordWriter(connection, statement);
    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

/**
 * /*from w w w  .  ja  v a  2 s .co  m*/
 * @param delimiter
 * @param column
 * 
 * 
 */

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF);
    this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    logger.debug("Delimiter is: " + context.getConfiguration().get(DELIMITER_CONF));
    logger.debug("Column is: " + context.getConfiguration().getInt(COLUMN_CONF, 0));
    return new DelimitedLineRecordReader();
}