List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:BU.MET.CS755.SpeciesIterMapper2.java
public void configure(JobConf job) { iterationNumber = Long.parseLong(job.get("iterationNumber")); totalLinks = Long.parseLong(job.get("totalLinks")); }
From source file:BU.MET.CS755.SpeciesIterReducer2.java
public void configure(JobConf job) { iterationNumber = Long.parseLong(job.get("iterationNumber")); }
From source file:cascading.flow.hadoop.HadoopFlowStep.java
License:Open Source License
/** * Method clean removes any temporary files used by this FlowStep instance. It will log any IOExceptions thrown. * * @param config of type JobConf//from ww w . java 2 s .c o m */ public void clean(JobConf config) { String stepStatePath = config.get("cascading.flow.step.path"); if (stepStatePath != null) { try { HadoopUtil.removeStateFromDistCache(config, stepStatePath); } catch (IOException exception) { logWarn("unable to remove step state file: " + stepStatePath, exception); } } if (tempSink != null) { try { tempSink.deleteResource(config); } catch (Exception exception) { // sink all exceptions, don't fail app logWarn("unable to remove temporary file: " + tempSink, exception); } } if (getSink().isTemporary() && (getFlow().getFlowStats().isSuccessful() || getFlow().getRunID() == null)) { try { getSink().deleteResource(config); } catch (Exception exception) { // sink all exceptions, don't fail app logWarn("unable to remove temporary file: " + getSink(), exception); } } else { cleanTapMetaData(config, getSink()); } for (Tap tap : getTraps()) cleanTapMetaData(config, tap); }
From source file:cascading.hbase.helper.TableInputFormat.java
License:Apache License
public void configure(JobConf job) { // Path[] tableNames = FileInputFormat.getInputPaths(job); // String colArg = job.get(COLUMN_LIST); // String[] colNames = colArg.split(" "); // byte [][] m_cols = new byte[colNames.length][]; // for (int i = 0; i < m_cols.length; i++) { // m_cols[i] = Bytes.toBytes(colNames[i]); // }/*w w w . j ava 2 s . co m*/ // setInputColumns(m_cols); // try { // setHTable(new HTable(HBaseConfiguration.create(job), tableNames[0].getName())); // } catch (Exception e) { // LOG.error(StringUtils.stringifyException(e)); // } //this.conf = configuration; String tableName = job.get(INPUT_TABLE); try { setHTable(new HTable(new Configuration(job), tableName)); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } Scan scan = null; if (job.get(SCAN) != null) { try { scan = convertStringToScan(job.get(SCAN)); } catch (IOException e) { LOG.error("An error occurred.", e); } } else { try { scan = new Scan(); if (job.get(SCAN_COLUMNS) != null) { addColumns(scan, job.get(SCAN_COLUMNS)); } if (job.get(SCAN_COLUMN_FAMILY) != null) { scan.addFamily(Bytes.toBytes(job.get(SCAN_COLUMN_FAMILY))); } if (job.get(SCAN_TIMESTAMP) != null) { scan.setTimeStamp(Long.parseLong(job.get(SCAN_TIMESTAMP))); } if (job.get(SCAN_TIMERANGE_START) != null && job.get(SCAN_TIMERANGE_END) != null) { scan.setTimeRange(Long.parseLong(job.get(SCAN_TIMERANGE_START)), Long.parseLong(job.get(SCAN_TIMERANGE_END))); } if (job.get(SCAN_MAXVERSIONS) != null) { scan.setMaxVersions(Integer.parseInt(job.get(SCAN_MAXVERSIONS))); } if (job.get(SCAN_CACHEDROWS) != null) { scan.setCaching(Integer.parseInt(job.get(SCAN_CACHEDROWS))); } // false by default, full table scans generate too much BC churn scan.setCacheBlocks((job.getBoolean(SCAN_CACHEBLOCKS, false))); } catch (Exception e) { LOG.error(StringUtils.stringifyException(e)); } } setScan(scan); }
From source file:cascading.hbase.helper.TableInputFormat.java
License:Apache License
public void validateInput(JobConf job) throws IOException { // expecting exactly one path Path[] tableNames = FileInputFormat.getInputPaths(job); if (tableNames == null || tableNames.length > 1) { throw new IOException("expecting one table name"); }/* ww w . j a va 2 s . c o m*/ // connected to table? if (getHTable() == null) { throw new IOException("could not connect to table '" + tableNames[0].getName() + "'"); } // expecting at least one column String colArg = job.get(COLUMN_LIST); if (colArg == null || colArg.length() == 0) { throw new IOException("expecting at least one column"); } }
From source file:cascading.jdbc.JDBCTap.java
License:Open Source License
@Override public void sinkInit(JobConf conf) throws IOException { if (!isSink()) return;/*from w ww. j a v a2s . co m*/ // do not delete if initialized from within a task if (isReplace() && conf.get("mapred.task.partition") == null && !deletePath(conf)) throw new TapException("unable to drop table: " + tableDesc.getTableName()); if (!makeDirs(conf)) throw new TapException("unable to create table: " + tableDesc.getTableName()); if (username == null) DBConfiguration.configureDB(conf, driverClassName, connectionUrl); else DBConfiguration.configureDB(conf, driverClassName, connectionUrl, username, password); super.sinkInit(conf); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
/** * should only be called if not in a Flow * * @param conf/* w w w .j ava2 s .co m*/ * @throws IOException */ public static void setupJob(JobConf conf) throws IOException { Path outputPath = FileOutputFormat.getOutputPath(conf); if (outputPath == null) return; if (getFSSafe(conf, outputPath) == null) return; if (conf.get("mapred.task.id") == null) // need to stuff a fake id { String mapper = conf.getBoolean("mapred.task.is.map", true) ? "m" : "r"; conf.set("mapred.task.id", String.format("attempt_%012d_0000_%s_000000_0", (int) Math.rint(System.currentTimeMillis()), mapper)); } makeTempPath(conf); if (writeDirectlyToWorkingPath(conf, outputPath)) { LOG.info("writing directly to output path: " + outputPath); setWorkOutputPath(conf, outputPath); return; } // "mapred.work.output.dir" Path taskOutputPath = getTaskOutputPath(conf); setWorkOutputPath(conf, taskOutputPath); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
static synchronized void setupTask(JobConf conf) throws IOException { String workpath = conf.get("mapred.work.output.dir"); if (workpath == null) return;/* ww w .ja va2 s. co m*/ FileSystem fs = getFSSafe(conf, new Path(workpath)); if (fs == null) return; String taskId = conf.get("mapred.task.id"); LOG.info("setting up task: '" + taskId + "' - " + workpath); AtomicInteger integer = pathCounts.get(workpath); if (integer == null) { integer = new AtomicInteger(); pathCounts.put(workpath, integer); } integer.incrementAndGet(); }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
public static boolean needsTaskCommit(JobConf conf) throws IOException { String workpath = conf.get("mapred.work.output.dir"); if (workpath == null) return false; Path taskOutputPath = new Path(workpath); if (taskOutputPath != null) { FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return false; if (fs.exists(taskOutputPath)) return true; }//from w w w .ja v a2s .c om return false; }
From source file:cascading.tap.hadoop.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf//from w w w. j ava 2s . c om */ public static void commitTask(JobConf conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id"); LOG.info("committing task: '" + taskId + "' - " + taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '" + taskId + "' - " + taskOutputPath); LOG.info("saved output of task '" + taskId + "' to " + jobOutputPath); } } }