List of usage examples for org.apache.hadoop.mapreduce Job getConfiguration
public Configuration getConfiguration()
From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java
License:Apache License
/** * Adds the jars from a directory into the distributed cache of a job. * * @param job The job to configure./*from w ww .ja v a 2 s. c o m*/ * @param jarDirectory A path to a directory of jar files. * @throws IOException on I/O error. */ public static void addJarsToDistributedCache(Job job, Path jarDirectory) throws IOException { if (null == jarDirectory) { throw new IllegalArgumentException("Jar directory may not be null"); } addJarsToDistributedCache(job, listJarFilesFromDirectory(job.getConfiguration(), jarDirectory)); }
From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java
License:Apache License
/** * Adds the jar files into the distributed cache of a job. * * @param job The job to configure.// ww w . j a va 2 s .c om * @param jarFiles Collection of jar files to add. * @throws IOException on I/O error. */ public static void addJarsToDistributedCache(Job job, Collection<Path> jarFiles) throws IOException { // Get existing jars named in configuration. final List<Path> allJars = Lists.newArrayList(getJarsFromConfiguration(job.getConfiguration())); // Add jars from jarDirectory. for (Path path : jarFiles) { final Path qualifiedPath = path.getFileSystem(job.getConfiguration()).makeQualified(path); LOG.debug("Adding jar {}, fully qualified as {}", path, qualifiedPath); allJars.add(qualifiedPath); } // De-duplicate the list of jar files, based on their names: final Collection<Path> deDupedJars = deDuplicateFilenames(allJars); job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(deDupedJars, ",")); }
From source file:com.moz.fiji.mapreduce.FijiMapReduceJobBuilder.java
License:Apache License
/** {@inheritDoc} */ @Override/* w w w . j a va 2s .c o m*/ protected void configureJob(Job job) throws IOException { // Check that job input was configured. if (null == mJobInput) { throw new JobConfigurationException("Must specify job input."); } // Construct the mapper instance. if (null == mMapperClass) { throw new JobConfigurationException("Must specify a mapper."); } mMapper = ReflectionUtils.newInstance(mMapperClass, job.getConfiguration()); // Construct the combiner instance (if specified). if (null != mCombinerClass) { mCombiner = ReflectionUtils.newInstance(mCombinerClass, job.getConfiguration()); } // Construct the reducer instance (if specified). if (null != mReducerClass) { mReducer = ReflectionUtils.newInstance(mReducerClass, job.getConfiguration()); } StringBuilder name = new StringBuilder(); name.append("Fiji transform: "); name.append(mMapperClass.getSimpleName()); if (null != mReducerClass) { name.append(" / "); name.append(mReducerClass.getSimpleName()); } job.setJobName(name.toString()); // Configure the MapReduce job. super.configureJob(job); }
From source file:com.moz.fiji.mapreduce.framework.FijiTableInputFormat.java
License:Apache License
/** * Configures a Hadoop M/R job to read from a given table. * * @param job Job to configure.// w w w .j a v a2 s. c om * @param tableURI URI of the table to read from. * @param dataRequest Data request. * @param startRow Minimum row key to process. May be left null to indicate * that scanning should start at the beginning of the table. * @param endRow Maximum row key to process. May be left null to indicate that * scanning should continue to the end of the table. * @param filter Filter to use for scanning. May be left null. * @throws IOException on I/O error. */ public static void configureJob(Job job, FijiURI tableURI, FijiDataRequest dataRequest, EntityId startRow, EntityId endRow, FijiRowFilter filter) throws IOException { Preconditions.checkNotNull(job, "job must not be null"); Preconditions.checkNotNull(tableURI, "tableURI must not be null"); Preconditions.checkNotNull(dataRequest, "dataRequest must not be null"); final Configuration conf = job.getConfiguration(); // TODO: Check for jars config: // GenericTableMapReduceUtil.initTableInput(hbaseTableName, scan, job); // Write all the required values to the job's configuration object. final String serializedRequest = Base64.encodeBase64String(SerializationUtils.serialize(dataRequest)); conf.set(FijiConfKeys.FIJI_INPUT_DATA_REQUEST, serializedRequest); conf.set(FijiConfKeys.FIJI_INPUT_TABLE_URI, tableURI.toString()); if (null != startRow) { conf.set(FijiConfKeys.FIJI_START_ROW_KEY, Base64.encodeBase64String(startRow.getHBaseRowKey())); } if (null != endRow) { conf.set(FijiConfKeys.FIJI_LIMIT_ROW_KEY, Base64.encodeBase64String(endRow.getHBaseRowKey())); } if (null != filter) { conf.set(FijiConfKeys.FIJI_ROW_FILTER, filter.toJson().toString()); } }
From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java
License:Apache License
/** * Writes a job into the JobHistoryFijiTable. * * @param job The job to save.// ww w .ja v a 2 s. c o m * @param startTime The time the job began, in milliseconds. * @param endTime The time the job ended, in milliseconds * @throws IOException If there is an error writing to the table. */ public void recordJob(final Job job, final long startTime, final long endTime) throws IOException { recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(), job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap()); }
From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the MapReduce mapper for the job. * * @param job The Hadoop MR job.// w w w . j a v a 2s.c om * @throws IOException If there is an error. */ protected void configureMapper(Job job) throws IOException { // Set the map class in the job configuration. final FijiMapper<?, ?, ?, ?> mapper = getMapper(); if (null == mapper) { throw new JobConfigurationException("Must specify a mapper"); } if (mapper instanceof Configurable) { ((Configurable) mapper).setConf(job.getConfiguration()); } job.setMapperClass(((Mapper<?, ?, ?, ?>) mapper).getClass()); // Set the map output key and map output value types in the job configuration. job.setMapOutputKeyClass(mapper.getOutputKeyClass()); job.setMapOutputValueClass(mapper.getOutputValueClass()); configureAvro(job, mapper); configureHTableInput(job, mapper); }
From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures HTable input for the job if the mapper is an HTableReader. * * <p>If the mapper the job is configured to run is reading from an HBase table * (HTable), this method will make sure the mapper implements the HTableReader interface * and use its methods to configure the table scan specification required for the * HTableInputFormat.</p>//from w w w . j a va 2 s.c o m * * <p>A mapper that reads its input from an HTable needs to specify the Scan descriptor * that describes what subset of rows and column cells should be processed. The mapper * communicates this by implementing the methods of the HTableReader interface. This * method calls the methods of that interface on the job's mapper and sets Scan * descriptor into the job configuration so the HTableInputFormat can read it.</p> * * @param job The job to configure. * @param mapper The Fiji mapper the job is configured to run. * @throws IOException If the HTable input cannot be configured. */ protected void configureHTableInput(Job job, FijiMapper<?, ?, ?, ?> mapper) throws IOException { if (mapper instanceof HTableReader) { HTableReader htableReader = (HTableReader) mapper; Scan hbaseInputTableScan = htableReader.getInputHTableScan(job.getConfiguration()); if (null == hbaseInputTableScan) { return; } LOG.debug("Configuring HTable scan: " + hbaseInputTableScan.toString()); GenericTableMapReduceUtil.initTableScan(hbaseInputTableScan, job); } }
From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the MapReduce combiner for the job. * * @param job The Hadoop MR job.// w w w . j a va 2 s .c o m */ protected void configureCombiner(Job job) { final FijiReducer<?, ?, ?, ?> combiner = getCombiner(); if (null == combiner) { LOG.debug("No combiner provided."); return; } if (combiner instanceof Configurable) { ((Configurable) combiner).setConf(job.getConfiguration()); } job.setCombinerClass(((Reducer<?, ?, ?, ?>) combiner).getClass()); }
From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the MapReduce reducer for the job. * * @param job The Hadoop MR job./* w w w.j ava 2 s. c o m*/ * @throws IOException If there is an error. */ protected void configureReducer(Job job) throws IOException { final FijiReducer<?, ?, ?, ?> reducer = getReducer(); if (null == reducer) { LOG.info("No reducer provided. This will be a map-only job"); job.setNumReduceTasks(0); // Set the job output key/value classes based on what the map output key/value classes were // since this a map-only job. job.setOutputKeyClass(job.getMapOutputKeyClass()); Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration()); if (null != mapOutputKeySchema) { AvroJob.setOutputKeySchema(job, mapOutputKeySchema); } job.setOutputValueClass(job.getMapOutputValueClass()); Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration()); if (null != mapOutputValueSchema) { AvroJob.setOutputValueSchema(job, mapOutputValueSchema); } return; } if (reducer instanceof Configurable) { ((Configurable) reducer).setConf(job.getConfiguration()); } job.setReducerClass(reducer.getClass()); // Set output key class. Class<?> outputKeyClass = reducer.getOutputKeyClass(); job.setOutputKeyClass(outputKeyClass); Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer); if (AvroKey.class.isAssignableFrom(outputKeyClass)) { if (null == outputKeyWriterSchema) { throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. " + "Did you forget to implement AvroKeyWriter in your FijiReducer?"); } AvroJob.setOutputKeySchema(job, outputKeyWriterSchema); } else if (null != outputKeyWriterSchema) { throw new JobConfigurationException( reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema" + " but the output key class was not AvroKey."); } // Set output value class. Class<?> outputValueClass = reducer.getOutputValueClass(); job.setOutputValueClass(outputValueClass); Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer); if (AvroValue.class.isAssignableFrom(outputValueClass)) { if (null == outputValueWriterSchema) { throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. " + "Did you forget to implement AvroValueWriter in your FijiReducer?"); } AvroJob.setOutputValueSchema(job, outputValueWriterSchema); } else if (null != outputValueWriterSchema) { throw new JobConfigurationException( reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema" + " but the output value class was not AvroValue."); } }
From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java
License:Apache License
/** * Configures the key-value stores to be attached to the job. * * <p>This adds the key-value stores defined by the user and the * job components (producer, gatherer, etc) to the job configuration. * </p>//from w w w.ja va 2s . co m * * @param job The job to configure. * @throws IOException if there is an error configuring the stores. This * may include, e.g., the case where a store is required but not defined. */ protected final void configureStores(Job job) throws IOException { KeyValueStoreConfigValidator.get().bindAndValidateRequiredStores(getRequiredStores(), mBoundStores); KeyValueStoreConfigSerializer.get().addStoreMapToConfiguration(mBoundStores, job.getConfiguration()); }