Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:co.nubetech.apache.hadoop.DBInputFormat.java

License:Apache License

/** {@inheritDoc} */
public List<InputSplit> getSplits(JobContext job) throws IOException {

    ResultSet results = null;/*  w ww .j av  a 2 s . c  o  m*/
    Statement statement = null;
    try {
        statement = connection.createStatement();

        results = statement.executeQuery(getCountQuery());
        results.next();

        long count = results.getLong(1);
        int chunks = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
        long chunkSize = (count / chunks);

        results.close();
        statement.close();

        List<InputSplit> splits = new ArrayList<InputSplit>();

        // Split the rows into n-number of chunks and adjust the last chunk
        // accordingly
        for (int i = 0; i < chunks; i++) {
            DBInputSplit split;

            if ((i + 1) == chunks)
                split = new DBInputSplit(i * chunkSize, count);
            else
                split = new DBInputSplit(i * chunkSize, (i * chunkSize) + chunkSize);

            splits.add(split);
        }

        connection.commit();
        return splits;
    } catch (SQLException e) {
        throw new IOException("Got SQLException", e);
    } finally {
        try {
            if (results != null) {
                results.close();
            }
        } catch (SQLException e1) {
        }
        try {
            if (statement != null) {
                statement.close();
            }
        } catch (SQLException e1) {
        }

        closeConnection();
    }
}

From source file:co.nubetech.apache.hadoop.mapred.DataDrivenDBInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext job) throws IOException {
    return this.getSplits(job.getConfiguration());
}

From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java

License:Apache License

@Override
protected boolean isSplitable(JobContext context, Path file) {
    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file);
    return codec == null;
}

From source file:co.nubetech.hiho.mapreduce.lib.db.apache.DataDrivenDBInputFormat.java

License:Apache License

/** {@inheritDoc} */
public List<InputSplit> getSplits(JobContext job) throws IOException {

    int targetNumTasks = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1);
    if (1 == targetNumTasks) {
        // There's no need to run a bounding vals query; just return a split
        // that separates nothing. This can be considerably more optimal for a
        // large table with no index.
        List<InputSplit> singletonSplit = new ArrayList<InputSplit>();
        singletonSplit.add(new DataDrivenDBInputSplit("1=1", "1=1"));
        return singletonSplit;
    }//from   ww w  .  j  ava 2 s  .  c o m

    ResultSet results = null;
    Statement statement = null;
    Connection connection = getConnection();
    try {
        statement = connection.createStatement();

        results = statement.executeQuery(getBoundingValsQuery());
        results.next();

        // Based on the type of the results, use a different mechanism
        // for interpolating split points (i.e., numeric splits, text splits,
        // dates, etc.)
        int sqlDataType = results.getMetaData().getColumnType(1);
        DBSplitter splitter = getSplitter(sqlDataType);
        if (null == splitter) {
            throw new IOException("Unknown SQL data type: " + sqlDataType);
        }

        return splitter.split(job.getConfiguration(), results, getDBConf().getInputOrderBy());
    } catch (SQLException e) {
        throw new IOException(e.getMessage());
    } finally {
        // More-or-less ignore SQL exceptions here, but log in case we need it.
        try {
            if (null != results) {
                results.close();
            }
        } catch (SQLException se) {
            LOG.debug("SQLException closing resultset: " + se.toString());
        }

        try {
            if (null != statement) {
                statement.close();
            }
        } catch (SQLException se) {
            LOG.debug("SQLException closing statement: " + se.toString());
        }

        try {
            connection.commit();
            closeConnection();
        } catch (SQLException se) {
            LOG.debug("SQLException committing split transaction: " + se.toString());
        }
    }
}

From source file:co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException {
    try {/*from  w  ww . j  a v  a2  s.c  om*/
        super.checkOutputSpecs(job);
    } catch (FileAlreadyExistsException e) {

        Configuration conf = job.getConfiguration();

        isAppend = conf.get(HIHOConf.IS_APPEND, "false");
        if (isAppend.equalsIgnoreCase("false")) {
            throw new FileAlreadyExistsException();
        } else {
            Path outDir = getOutputPath(job);
            if (outDir == null) {
                throw new InvalidJobConfException("OUTPUT directory not set.");
            }
        }
    }
}

From source file:com.aerospike.hadoop.mapreduce.AerospikeInputFormat.java

License:Apache License

public List<InputSplit> getSplits(JobContext context) throws IOException {
    // Delegate to the old API.
    Configuration cfg = context.getConfiguration();
    JobConf jobconf = AerospikeConfigUtil.asJobConf(cfg);
    return Arrays.asList((InputSplit[]) getSplits(jobconf, jobconf.getNumMapTasks()));
}

From source file:com.aerospike.hadoop.mapreduce.AerospikeOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException {
    // careful as it seems the info here saved by in the config is discarded
    Configuration cfg = context.getConfiguration();
    init(cfg);/*from   ww w .java2  s .  c  om*/
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileInputFormat.java

License:Apache License

@Override
@SuppressWarnings("unchecked")
protected List<FileStatus> listStatus(JobContext job) throws IOException {

    List<FileStatus> files = super.listStatus(job);

    int len = files.size();
    for (int i = 0; i < len; ++i) {
        FileStatus file = files.get(i);/*from w  w  w .ja v  a2 s .  c  o  m*/
        if (file.isDir()) { // it's a MapFile
            Path p = file.getPath();
            FileSystem fs = p.getFileSystem(job.getConfiguration());
            // use the data file
            files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME)));
        }
    }
    return files;
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStore.java

License:Apache License

/**
 * Set access-key id/secret into a JobContext.
 */// ww  w  .  j a v a  2 s . c o  m
public static void setCredential(JobContext job, String accessKeyId, String accessKeySecret) {
    Preconditions.checkNotNull(job, "job must be nonnull");
    setCredential(job.getConfiguration(), new Credential(accessKeyId, accessKeySecret, null));
}

From source file:com.aliyun.openservices.tablestore.hadoop.TableStore.java

License:Apache License

/**
 * Set access-key id/secret and security token into a JobContext.
 *///  ww w .j  a v a2  s  . co m
public static void setCredential(JobContext job, String accessKeyId, String accessKeySecret,
        String securityToken) {
    Preconditions.checkNotNull(job, "job must be nonnull");
    setCredential(job.getConfiguration(), new Credential(accessKeyId, accessKeySecret, securityToken));
}