List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration
public Configuration getConfiguration();
From source file:co.nubetech.apache.hadoop.DBInputFormat.java
License:Apache License
/** {@inheritDoc} */ public List<InputSplit> getSplits(JobContext job) throws IOException { ResultSet results = null;/* w ww .j av a 2 s . c o m*/ Statement statement = null; try { statement = connection.createStatement(); results = statement.executeQuery(getCountQuery()); results.next(); long count = results.getLong(1); int chunks = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1); long chunkSize = (count / chunks); results.close(); statement.close(); List<InputSplit> splits = new ArrayList<InputSplit>(); // Split the rows into n-number of chunks and adjust the last chunk // accordingly for (int i = 0; i < chunks; i++) { DBInputSplit split; if ((i + 1) == chunks) split = new DBInputSplit(i * chunkSize, count); else split = new DBInputSplit(i * chunkSize, (i * chunkSize) + chunkSize); splits.add(split); } connection.commit(); return splits; } catch (SQLException e) { throw new IOException("Got SQLException", e); } finally { try { if (results != null) { results.close(); } } catch (SQLException e1) { } try { if (statement != null) { statement.close(); } } catch (SQLException e1) { } closeConnection(); } }
From source file:co.nubetech.apache.hadoop.mapred.DataDrivenDBInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext job) throws IOException { return this.getSplits(job.getConfiguration()); }
From source file:co.nubetech.hiho.dedup.DelimitedTextInputFormat.java
License:Apache License
@Override protected boolean isSplitable(JobContext context, Path file) { CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(file); return codec == null; }
From source file:co.nubetech.hiho.mapreduce.lib.db.apache.DataDrivenDBInputFormat.java
License:Apache License
/** {@inheritDoc} */ public List<InputSplit> getSplits(JobContext job) throws IOException { int targetNumTasks = job.getConfiguration().getInt(MRJobConfig.NUM_MAPS, 1); if (1 == targetNumTasks) { // There's no need to run a bounding vals query; just return a split // that separates nothing. This can be considerably more optimal for a // large table with no index. List<InputSplit> singletonSplit = new ArrayList<InputSplit>(); singletonSplit.add(new DataDrivenDBInputSplit("1=1", "1=1")); return singletonSplit; }//from ww w . j ava 2 s . c o m ResultSet results = null; Statement statement = null; Connection connection = getConnection(); try { statement = connection.createStatement(); results = statement.executeQuery(getBoundingValsQuery()); results.next(); // Based on the type of the results, use a different mechanism // for interpolating split points (i.e., numeric splits, text splits, // dates, etc.) int sqlDataType = results.getMetaData().getColumnType(1); DBSplitter splitter = getSplitter(sqlDataType); if (null == splitter) { throw new IOException("Unknown SQL data type: " + sqlDataType); } return splitter.split(job.getConfiguration(), results, getDBConf().getInputOrderBy()); } catch (SQLException e) { throw new IOException(e.getMessage()); } finally { // More-or-less ignore SQL exceptions here, but log in case we need it. try { if (null != results) { results.close(); } } catch (SQLException se) { LOG.debug("SQLException closing resultset: " + se.toString()); } try { if (null != statement) { statement.close(); } } catch (SQLException se) { LOG.debug("SQLException closing statement: " + se.toString()); } try { connection.commit(); closeConnection(); } catch (SQLException se) { LOG.debug("SQLException committing split transaction: " + se.toString()); } } }
From source file:co.nubetech.hiho.mapreduce.lib.output.AppendSequenceFileOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext job) throws FileAlreadyExistsException, IOException { try {/*from w ww . j a v a2 s.c om*/ super.checkOutputSpecs(job); } catch (FileAlreadyExistsException e) { Configuration conf = job.getConfiguration(); isAppend = conf.get(HIHOConf.IS_APPEND, "false"); if (isAppend.equalsIgnoreCase("false")) { throw new FileAlreadyExistsException(); } else { Path outDir = getOutputPath(job); if (outDir == null) { throw new InvalidJobConfException("OUTPUT directory not set."); } } } }
From source file:com.aerospike.hadoop.mapreduce.AerospikeInputFormat.java
License:Apache License
public List<InputSplit> getSplits(JobContext context) throws IOException { // Delegate to the old API. Configuration cfg = context.getConfiguration(); JobConf jobconf = AerospikeConfigUtil.asJobConf(cfg); return Arrays.asList((InputSplit[]) getSplits(jobconf, jobconf.getNumMapTasks())); }
From source file:com.aerospike.hadoop.mapreduce.AerospikeOutputFormat.java
License:Apache License
@Override public void checkOutputSpecs(JobContext context) throws IOException { // careful as it seems the info here saved by in the config is discarded Configuration cfg = context.getConfiguration(); init(cfg);/*from ww w .java2 s . c om*/ }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapreduce.CombineSequenceFileInputFormat.java
License:Apache License
@Override @SuppressWarnings("unchecked") protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); int len = files.size(); for (int i = 0; i < len; ++i) { FileStatus file = files.get(i);/*from w w w .ja v a2 s . c o m*/ if (file.isDir()) { // it's a MapFile Path p = file.getPath(); FileSystem fs = p.getFileSystem(job.getConfiguration()); // use the data file files.set(i, fs.getFileStatus(new Path(p, MapFile.DATA_FILE_NAME))); } } return files; }
From source file:com.aliyun.openservices.tablestore.hadoop.TableStore.java
License:Apache License
/** * Set access-key id/secret into a JobContext. */// ww w . j a v a 2 s . c o m public static void setCredential(JobContext job, String accessKeyId, String accessKeySecret) { Preconditions.checkNotNull(job, "job must be nonnull"); setCredential(job.getConfiguration(), new Credential(accessKeyId, accessKeySecret, null)); }
From source file:com.aliyun.openservices.tablestore.hadoop.TableStore.java
License:Apache License
/** * Set access-key id/secret and security token into a JobContext. */// ww w .j a v a2 s . co m public static void setCredential(JobContext job, String accessKeyId, String accessKeySecret, String securityToken) { Preconditions.checkNotNull(job, "job must be nonnull"); setCredential(job.getConfiguration(), new Credential(accessKeyId, accessKeySecret, securityToken)); }