Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.nubetech.apache.hadoop.DBInputFormat.java

License:Apache License

/** {@inheritDoc} */
public List<InputSplit> getSplits(JobContext job) throws IOException {

    ResultSet results = null;/*  w ww .j av  a 2 s . c  o  m*/
    Statement statement = null;
    try {
        statement = connection.createStatement();

        results = statement.executeQuery(getCountQuery());
        results.next();

        long count = results.getLong(1);
        int chunks = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
        long chunkSize = (count / chunks);

        results.close();
        statement.close();

        List<InputSplit> splits = new ArrayList<InputSplit>();

        // Split the rows into n-number of chunks and adjust the last chunk
        // accordingly
        for (int i = 0; i < chunks; i++) {
            DBInputSplit split;

            if ((i + 1) == chunks)
                split = new DBInputSplit(i * chunkSize, count);
            else
                split = new DBInputSplit(i * chunkSize, (i * chunkSize) + chunkSize);

            splits.add(split);
        }

        connection.commit();
        return splits;
    } catch (SQLException e) {
        throw new IOException("Got SQLException", e);
    } finally {
        try {
            if (results != null) {
                results.close();
            }
        } catch (SQLException e1) {
        }
        try {
            if (statement != null) {
                statement.close();
            }
        } catch (SQLException e1) {
        }

        closeConnection();
    }
}

From source file:co.nubetech.apache.hadoop.mapred.DataDrivenDBInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext job) throws IOException {
    return this.getSplits(job.getConfiguration());
}

From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    return codec == null;
}

From source file:co.nubetech.hiho.mapreduce.lib.db.apache.DataDrivenDBInputFormat.java

License:Apache License

/** {@inheritDoc} */
public List<InputSplit> getSplits(JobContext job) throws IOException {

    int targetNumTasks = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
    if (1 == targetNumTasks) {
        // There's no need to run a bounding vals query; just return a split
        // that separates nothing. This can be considerably more optimal for a
        // large table with no index.
        List<InputSplit> singletonSplit = new ArrayList<InputSplit>();
        singletonSplit.add(new DataDrivenDBInputSplit("1=1", "1=1"));
        return singletonSplit;
    }//from   ww w  .  j  ava 2 s  .  c o m

    ResultSet results = null;
    Statement statement = null;
    Connection connection = getConnection();
    try {
        statement = connection.createStatement();

        results = statement.executeQuery(getBoundingValsQuery());
        results.next();

        // Based on the type of the results, use a different mechanism
        // for interpolating split points (i.e., numeric splits, text splits,
        // dates, etc.)
        int sqlDataType = results.getMetaData().getColumnType(1);
        DBSplitter splitter = getSplitter(sqlDataType);
        if (null == splitter) {
            throw new IOException("Unknown SQL data type: " + sqlDataType);
        }

        return splitter.split(job.getConfiguration(), results, getDBConf().getInputOrderBy());
    } catch (SQLException e) {
        throw new IOException(e.getMessage());
    } finally {
        // More-or-less ignore SQL exceptions here, but log in case we need it.
        try {
            if (null != results) {
                results.close();
            }
        } catch (SQLException se) {
            LOG.debug("SQLException closing resultset: " + se.toString());
        }

        try {
            if (null != statement) {
                statement.close();
            }
        } catch (SQLException se) {
            LOG.debug("SQLException closing statement: " + se.toString());
        }

        try {
            connection.commit();
            closeConnection();
        } catch (SQLException se) {
            LOG.debug("SQLException committing split transaction: " + se.toString());
        }
    }
}

From source file:co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    try {/*from  w  ww . j  a v  a2  s.c  om*/
        super.checkOutputSpecs(job);
    } catch (FileAlreadyExistsException e) {

        Configuration conf = job.getConfiguration();

        isAppend = conf.get(HIHOConf.IS_APPEND, "false");
        if (isAppend.equalsIgnoreCase("false")) {
            throw new FileAlreadyExistsException();
        } else {
            Path outDir = getOutputPath(job);
            if (outDir == null) {
                throw new InvalidJobConfException("OUTPUT directory not set.");
            }
        }
    }
}

From source file:com.aerospike.hadoop.mapreduce.AerospikeInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    // Delegate to the old API.
    Configuration cfg = context.getConfiguration();
    JobConf jobconf = AerospikeConfigUtil.asJobConf(cfg);
    return Arrays.asList((InputSplit[]) getSplits(jobconf, jobconf.getNumMapTasks()));
}

From source file:com.aerospike.hadoop.mapreduce.AerospikeOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException {
    // careful as it seems the info here saved by in the config is discarded
    Configuration cfg = context.getConfiguration();
    init(cfg);/*from   ww w .java2  s .  c  om*/
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileInputFormat.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
protected List<FileStatus> listStatus(JobContext job) throws IOException {

    List<FileStatus> files = super.listStatus(job);

    int len = files.size();
    for (int i = 0; i < len; ++i) {
        FileStatus file = files.get(i);/*from w  w  w .ja v  a2 s .  c  o  m*/
        if (file.isDir()) { // it's a MapFile
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            // use the data file
            files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME)));
        }
    }
    return files;
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStore.java

License:Apache License

/**
 * Set access-key id/secret into a JobContext.
 */// ww  w  .  j a v a  2 s . c o  m
public static void setCredential(JobContext job, String accessKeyId, String accessKeySecret) {
    Preconditions.checkNotNull(job, "job must be nonnull");
    setCredential(job.getConfiguration(), new Credential(accessKeyId, accessKeySecret, null));
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStore.java

License:Apache License

/**
 * Set access-key id/secret and security token into a JobContext.
 *///  ww w .j  a v a2  s  . co m
public static void setCredential(JobContext job, String accessKeyId, String accessKeySecret,
        String securityToken) {
    Preconditions.checkNotNull(job, "job must be nonnull");
    setCredential(job.getConfiguration(), new Credential(accessKeyId, accessKeySecret, securityToken));
}