Example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce TaskAttemptContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetInputFormat.java

License:Apache License

@Override
public RecordReader<KEY, VALUE> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException {
    DataSetInputSplit inputSplit = (DataSetInputSplit) split;
    BatchReadable<KEY, VALUE> batchReadable = getBatchReadable(context.getConfiguration());
    SplitReader<KEY, VALUE> splitReader = batchReadable.createSplitReader(inputSplit.getSplit());

    return new DatasetRecordReader<>(splitReader);
}

From source file:co.cask.cdap.internal.app.runtime.spark.dataset.SparkDatasetOutputFormat.java

License:Apache License

@Override
public RecordWriter<KEY, VALUE> getRecordWriter(TaskAttemptContext context)
        throws IOException, InterruptedException {
    CloseableBatchWritable<KEY, VALUE> batchWritable = getBatchWritable(context.getConfiguration());
    return new DatasetRecordWriter<>(batchWritable);
}

From source file:co.cask.cdap.template.etl.common.ETLDBOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    DBConfiguration dbConf = new DBConfiguration(conf);
    String tableName = dbConf.getOutputTableName();
    String[] fieldNames = dbConf.getOutputFieldNames();

    if (fieldNames == null) {
        fieldNames = new String[dbConf.getOutputFieldCount()];
    }//from  ww  w. jav  a2  s . c  o  m

    try {
        Connection connection = getConnection(conf);
        PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
        return new DBRecordWriter(connection, statement) {
            @Override
            public void close(TaskAttemptContext context) throws IOException {
                super.close(context);
                try {
                    DriverManager.deregisterDriver(driverShim);
                } catch (SQLException e) {
                    throw new IOException(e);
                }
            }
        };
    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:co.cask.hydrator.plugin.batch.CopybookRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // Get configuration
    Configuration conf = context.getConfiguration();
    int fileStructure = net.sf.JRecord.Common.Constants.IO_FIXED_LENGTH;
    Path path = new Path(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH));
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    // Create input stream for the COBOL copybook contents
    InputStream inputStream = IOUtils
            .toInputStream(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_CBL_CONTENTS), "UTF-8");
    BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
    try {/*from  w w w. j  a v  a2  s .co  m*/
        externalRecord = CopybookIOUtils.getExternalRecord(bufferedInputStream);
        recordByteLength = CopybookIOUtils.getRecordLength(externalRecord, fileStructure);

        LineProvider lineProvider = LineIOProvider.getInstance().getLineProvider(fileStructure,
                CopybookIOUtils.FONT);
        reader = LineIOProvider.getInstance().getLineReader(fileStructure, lineProvider);
        LayoutDetail copybook = CopybookIOUtils.getLayoutDetail(externalRecord);

        org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split;

        start = fileSplit.getStart();
        end = start + fileSplit.getLength();

        BufferedInputStream fileIn = new BufferedInputStream(fs.open(fileSplit.getPath()));
        // Jump to the point in the split at which the first complete record of the split starts,
        // if not the first InputSplit
        if (start != 0) {
            position = start - (start % recordByteLength) + recordByteLength;
            fileIn.skip(position);
        }
        reader.open(fileIn, copybook);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:co.cask.hydrator.plugin.batch.source.XMLInputFormat.java

License:Apache License

@Override
public RecordReader<LongWritable, Map<String, String>> createRecordReader(InputSplit split,
        TaskAttemptContext context) throws IOException {
    return new XMLRecordReader((FileSplit) split, context.getConfiguration());
}

From source file:co.cask.hydrator.plugin.db.batch.sink.ETLDBOutputFormat.java

License:Apache License

@Override
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    Configuration conf = context.getConfiguration();
    DBConfiguration dbConf = new DBConfiguration(conf);
    String tableName = dbConf.getOutputTableName();
    String[] fieldNames = dbConf.getOutputFieldNames();

    if (fieldNames == null) {
        fieldNames = new String[dbConf.getOutputFieldCount()];
    }//  w ww .j  a v a2  s  .  c o  m

    try {
        Connection connection = getConnection(conf);
        PreparedStatement statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
        return new DBRecordWriter(connection, statement) {

            private boolean emptyData = true;

            //Implementation of the close method below is the exact implementation in DBOutputFormat except that
            //we check if there is any data to be written and if not, we skip executeBatch call.
            //There might be reducers that don't receive any data and thus this check is necessary to prevent
            //empty data to be committed (since some Databases doesn't support that).
            @Override
            public void close(TaskAttemptContext context) throws IOException {
                try {
                    if (!emptyData) {
                        getStatement().executeBatch();
                        getConnection().commit();
                    }
                } catch (SQLException e) {
                    try {
                        getConnection().rollback();
                    } catch (SQLException ex) {
                        LOG.warn(StringUtils.stringifyException(ex));
                    }
                    throw new IOException(e.getMessage());
                } finally {
                    try {
                        getStatement().close();
                        getConnection().close();
                    } catch (SQLException ex) {
                        throw new IOException(ex.getMessage());
                    }
                }

                try {
                    DriverManager.deregisterDriver(driverShim);
                } catch (SQLException e) {
                    throw new IOException(e);
                }
            }

            @Override
            public void write(K key, V value) throws IOException {
                super.write(key, value);
                emptyData = false;
            }
        };
    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:co.nubetech.apache.hadoop.DBInputFormat.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings("unchecked")
public RecordReader<LongWritable, T> createRecordReader(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException {

    return createDBRecordReader((DBInputSplit) split, context.getConfiguration());
}

From source file:co.nubetech.apache.hadoop.DBOutputFormat.java

License:Apache License

/** {@inheritDoc} */
public RecordWriter<K, V> getRecordWriter(TaskAttemptContext context) throws IOException {
    DBConfiguration dbConf = new DBConfiguration(context.getConfiguration());
    String tableName = dbConf.getOutputTableName();
    String[] fieldNames = dbConf.getOutputFieldNames();

    if (fieldNames == null) {
        fieldNames = new String[dbConf.getOutputFieldCount()];
    }/*  ww w. j a  v a2  s.  c o m*/

    try {
        Connection connection = dbConf.getConnection();
        PreparedStatement statement = null;

        statement = connection.prepareStatement(constructQuery(tableName, fieldNames));
        return new DBRecordWriter(connection, statement);
    } catch (Exception ex) {
        throw new IOException(ex.getMessage());
    }
}

From source file:co.nubetech.hiho.dedup.DelimitedLineRecordReader.java

License:Apache License

/**
 * /*from w w w  .  ja  v a  2 s .co  m*/
 * @param delimiter
 * @param column
 * 
 * 
 */

@Override
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.delimiter = job.get(DelimitedTextInputFormat.DELIMITER_CONF);
    this.column = job.getInt(DelimitedTextInputFormat.COLUMN_CONF, 0);
    this.maxLineLength = job.getInt("mapred.linerecordreader.maxlength", Integer.MAX_VALUE);
    start = split.getStart();
    end = start + split.getLength();
    final Path file = split.getPath();
    compressionCodecs = new CompressionCodecFactory(job);
    final CompressionCodec codec = compressionCodecs.getCodec(file);

    // open the file and seek to the start of the split
    FileSystem fs = file.getFileSystem(job);
    FSDataInputStream fileIn = fs.open(split.getPath());
    boolean skipFirstLine = false;
    if (codec != null) {
        in = new LineReader(codec.createInputStream(fileIn), job);
        end = Long.MAX_VALUE;
    } else {
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        in = new LineReader(fileIn, job);
    }
    if (skipFirstLine) { // skip first line and re-establish "start".
        start += in.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    this.pos = start;
}

From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java

License:Apache License

@Override
public RecordReader<Text, Text> createRecordReader(InputSplit split, TaskAttemptContext context) {
    logger.debug("Delimiter is: " + context.getConfiguration().get(DELIMITER_CONF));
    logger.debug("Column is: " + context.getConfiguration().getInt(COLUMN_CONF, 0));
    return new DelimitedLineRecordReader();
}