Example usage for org.apache.hadoop.mapreduce Job getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getConfiguration.

Prototype

public Configuration getConfiguration()

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java

License:Apache License

/**
 * Adds the jars from a directory into the distributed cache of a job.
 *
 * @param job The job to configure./*from  w ww .ja  v  a 2 s. c o m*/
 * @param jarDirectory A path to a directory of jar files.
 * @throws IOException on I/O error.
 */
public static void addJarsToDistributedCache(Job job, Path jarDirectory) throws IOException {
    if (null == jarDirectory) {
        throw new IllegalArgumentException("Jar directory may not be null");
    }
    addJarsToDistributedCache(job, listJarFilesFromDirectory(job.getConfiguration(), jarDirectory));
}

From source file:com.moz.fiji.mapreduce.DistributedCacheJars.java

License:Apache License

/**
 * Adds the jar files into the distributed cache of a job.
 *
 * @param job The job to configure.// ww w  . j a va 2 s .c  om
 * @param jarFiles Collection of jar files to add.
 * @throws IOException on I/O error.
 */
public static void addJarsToDistributedCache(Job job, Collection<Path> jarFiles) throws IOException {
    // Get existing jars named in configuration.
    final List<Path> allJars = Lists.newArrayList(getJarsFromConfiguration(job.getConfiguration()));

    // Add jars from jarDirectory.
    for (Path path : jarFiles) {
        final Path qualifiedPath = path.getFileSystem(job.getConfiguration()).makeQualified(path);
        LOG.debug("Adding jar {}, fully qualified as {}", path, qualifiedPath);
        allJars.add(qualifiedPath);
    }

    // De-duplicate the list of jar files, based on their names:
    final Collection<Path> deDupedJars = deDuplicateFilenames(allJars);
    job.getConfiguration().set(CONF_TMPJARS, StringUtils.join(deDupedJars, ","));
}

From source file:com.moz.fiji.mapreduce.FijiMapReduceJobBuilder.java

License:Apache License

/** {@inheritDoc} */
@Override/*  w w  w  .  j  a  va 2s  .c o  m*/
protected void configureJob(Job job) throws IOException {
    // Check that job input was configured.
    if (null == mJobInput) {
        throw new JobConfigurationException("Must specify job input.");
    }

    // Construct the mapper instance.
    if (null == mMapperClass) {
        throw new JobConfigurationException("Must specify a mapper.");
    }
    mMapper = ReflectionUtils.newInstance(mMapperClass, job.getConfiguration());

    // Construct the combiner instance (if specified).
    if (null != mCombinerClass) {
        mCombiner = ReflectionUtils.newInstance(mCombinerClass, job.getConfiguration());
    }

    // Construct the reducer instance (if specified).
    if (null != mReducerClass) {
        mReducer = ReflectionUtils.newInstance(mReducerClass, job.getConfiguration());
    }

    StringBuilder name = new StringBuilder();
    name.append("Fiji transform: ");
    name.append(mMapperClass.getSimpleName());
    if (null != mReducerClass) {
        name.append(" / ");
        name.append(mReducerClass.getSimpleName());
    }
    job.setJobName(name.toString());

    // Configure the MapReduce job.
    super.configureJob(job);
}

From source file:com.moz.fiji.mapreduce.framework.FijiTableInputFormat.java

License:Apache License

/**
 * Configures a Hadoop M/R job to read from a given table.
 *
 * @param job Job to configure.// w  w  w .j  a v a2 s. c  om
 * @param tableURI URI of the table to read from.
 * @param dataRequest Data request.
 * @param startRow Minimum row key to process. May be left null to indicate
 *     that scanning should start at the beginning of the table.
 * @param endRow Maximum row key to process. May be left null to indicate that
 *     scanning should continue to the end of the table.
 * @param filter Filter to use for scanning. May be left null.
 * @throws IOException on I/O error.
 */
public static void configureJob(Job job, FijiURI tableURI, FijiDataRequest dataRequest, EntityId startRow,
        EntityId endRow, FijiRowFilter filter) throws IOException {
    Preconditions.checkNotNull(job, "job must not be null");
    Preconditions.checkNotNull(tableURI, "tableURI must not be null");
    Preconditions.checkNotNull(dataRequest, "dataRequest must not be null");

    final Configuration conf = job.getConfiguration();

    // TODO: Check for jars config:
    // GenericTableMapReduceUtil.initTableInput(hbaseTableName, scan, job);

    // Write all the required values to the job's configuration object.
    final String serializedRequest = Base64.encodeBase64String(SerializationUtils.serialize(dataRequest));
    conf.set(FijiConfKeys.FIJI_INPUT_DATA_REQUEST, serializedRequest);
    conf.set(FijiConfKeys.FIJI_INPUT_TABLE_URI, tableURI.toString());
    if (null != startRow) {
        conf.set(FijiConfKeys.FIJI_START_ROW_KEY, Base64.encodeBase64String(startRow.getHBaseRowKey()));
    }
    if (null != endRow) {
        conf.set(FijiConfKeys.FIJI_LIMIT_ROW_KEY, Base64.encodeBase64String(endRow.getHBaseRowKey()));
    }
    if (null != filter) {
        conf.set(FijiConfKeys.FIJI_ROW_FILTER, filter.toJson().toString());
    }
}

From source file:com.moz.fiji.mapreduce.framework.JobHistoryFijiTable.java

License:Apache License

/**
 * Writes a job into the JobHistoryFijiTable.
 *
 * @param job The job to save.// ww  w .ja v  a 2 s.  c o m
 * @param startTime The time the job began, in milliseconds.
 * @param endTime The time the job ended, in milliseconds
 * @throws IOException If there is an error writing to the table.
 */
public void recordJob(final Job job, final long startTime, final long endTime) throws IOException {
    recordJob(job.getJobID().toString(), job.getJobName(), startTime, endTime, job.isSuccessful(),
            job.getConfiguration(), getCounters(job), Collections.<String, String>emptyMap());
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce mapper for the job.
 *
 * @param job The Hadoop MR job.//  w w w .  j  a v  a 2s.c om
 * @throws IOException If there is an error.
 */
protected void configureMapper(Job job) throws IOException {
    // Set the map class in the job configuration.
    final FijiMapper<?, ?, ?, ?> mapper = getMapper();
    if (null == mapper) {
        throw new JobConfigurationException("Must specify a mapper");
    }
    if (mapper instanceof Configurable) {
        ((Configurable) mapper).setConf(job.getConfiguration());
    }
    job.setMapperClass(((Mapper<?, ?, ?, ?>) mapper).getClass());

    // Set the map output key and map output value types in the job configuration.
    job.setMapOutputKeyClass(mapper.getOutputKeyClass());
    job.setMapOutputValueClass(mapper.getOutputValueClass());

    configureAvro(job, mapper);
    configureHTableInput(job, mapper);
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures HTable input for the job if the mapper is an HTableReader.
 *
 * <p>If the mapper the job is configured to run is reading from an HBase table
 * (HTable), this method will make sure the mapper implements the HTableReader interface
 * and use its methods to configure the table scan specification required for the
 * HTableInputFormat.</p>//from w  w w  .  j  a va 2  s.c o m
 *
 * <p>A mapper that reads its input from an HTable needs to specify the Scan descriptor
 * that describes what subset of rows and column cells should be processed.  The mapper
 * communicates this by implementing the methods of the HTableReader interface.  This
 * method calls the methods of that interface on the job's mapper and sets Scan
 * descriptor into the job configuration so the HTableInputFormat can read it.</p>
 *
 * @param job The job to configure.
 * @param mapper The Fiji mapper the job is configured to run.
 * @throws IOException If the HTable input cannot be configured.
 */
protected void configureHTableInput(Job job, FijiMapper<?, ?, ?, ?> mapper) throws IOException {
    if (mapper instanceof HTableReader) {
        HTableReader htableReader = (HTableReader) mapper;
        Scan hbaseInputTableScan = htableReader.getInputHTableScan(job.getConfiguration());
        if (null == hbaseInputTableScan) {
            return;
        }
        LOG.debug("Configuring HTable scan: " + hbaseInputTableScan.toString());
        GenericTableMapReduceUtil.initTableScan(hbaseInputTableScan, job);
    }
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce combiner for the job.
 *
 * @param job The Hadoop MR job.// w w w . j  a  va  2 s  .c o  m
 */
protected void configureCombiner(Job job) {
    final FijiReducer<?, ?, ?, ?> combiner = getCombiner();
    if (null == combiner) {
        LOG.debug("No combiner provided.");
        return;
    }
    if (combiner instanceof Configurable) {
        ((Configurable) combiner).setConf(job.getConfiguration());
    }
    job.setCombinerClass(((Reducer<?, ?, ?, ?>) combiner).getClass());
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the MapReduce reducer for the job.
 *
 * @param job The Hadoop MR job./* w w  w.j ava  2 s. c o m*/
 * @throws IOException If there is an error.
 */
protected void configureReducer(Job job) throws IOException {
    final FijiReducer<?, ?, ?, ?> reducer = getReducer();
    if (null == reducer) {
        LOG.info("No reducer provided. This will be a map-only job");
        job.setNumReduceTasks(0);

        // Set the job output key/value classes based on what the map output key/value classes were
        // since this a map-only job.
        job.setOutputKeyClass(job.getMapOutputKeyClass());
        Schema mapOutputKeySchema = AvroJob.getMapOutputKeySchema(job.getConfiguration());
        if (null != mapOutputKeySchema) {
            AvroJob.setOutputKeySchema(job, mapOutputKeySchema);
        }
        job.setOutputValueClass(job.getMapOutputValueClass());
        Schema mapOutputValueSchema = AvroJob.getMapOutputValueSchema(job.getConfiguration());
        if (null != mapOutputValueSchema) {
            AvroJob.setOutputValueSchema(job, mapOutputValueSchema);
        }
        return;
    }
    if (reducer instanceof Configurable) {
        ((Configurable) reducer).setConf(job.getConfiguration());
    }
    job.setReducerClass(reducer.getClass());

    // Set output key class.
    Class<?> outputKeyClass = reducer.getOutputKeyClass();
    job.setOutputKeyClass(outputKeyClass);
    Schema outputKeyWriterSchema = AvroMapReduce.getAvroKeyWriterSchema(reducer);
    if (AvroKey.class.isAssignableFrom(outputKeyClass)) {
        if (null == outputKeyWriterSchema) {
            throw new JobConfigurationException("Using AvroKey output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroKeyWriter in your FijiReducer?");
        }
        AvroJob.setOutputKeySchema(job, outputKeyWriterSchema);
    } else if (null != outputKeyWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroKeyWriterSchema() returned a non-null Schema"
                        + " but the output key class was not AvroKey.");
    }

    // Set output value class.
    Class<?> outputValueClass = reducer.getOutputValueClass();
    job.setOutputValueClass(outputValueClass);
    Schema outputValueWriterSchema = AvroMapReduce.getAvroValueWriterSchema(reducer);
    if (AvroValue.class.isAssignableFrom(outputValueClass)) {
        if (null == outputValueWriterSchema) {
            throw new JobConfigurationException("Using AvroValue output, but a writer schema was not provided. "
                    + "Did you forget to implement AvroValueWriter in your FijiReducer?");
        }
        AvroJob.setOutputValueSchema(job, outputValueWriterSchema);
    } else if (null != outputValueWriterSchema) {
        throw new JobConfigurationException(
                reducer.getClass().getName() + ".getAvroValueWriterSchema() returned a non-null Schema"
                        + " but the output value class was not AvroValue.");
    }
}

From source file:com.moz.fiji.mapreduce.framework.MapReduceJobBuilder.java

License:Apache License

/**
 * Configures the key-value stores to be attached to the job.
 *
 * <p>This adds the key-value stores defined by the user and the
 * job components (producer, gatherer, etc) to the job configuration.
 * </p>//from w w  w.ja  va 2s .  co m
 *
 * @param job The job to configure.
 * @throws IOException if there is an error configuring the stores. This
 *     may include, e.g., the case where a store is required but not defined.
 */
protected final void configureStores(Job job) throws IOException {
    KeyValueStoreConfigValidator.get().bindAndValidateRequiredStores(getRequiredStores(), mBoundStores);
    KeyValueStoreConfigSerializer.get().addStoreMapToConfiguration(mBoundStores, job.getConfiguration());
}