Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    validateConfiguration(conf);/*  www  . ja  v  a2 s. c  o m*/
}

From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java

License:Open Source License

@Override
public void commitJob(JobContext jobContext) throws IOException {
    LOG.info("Commit job for path {}", outputPath);
    super.commitJob(jobContext);
    writeMetadataFile(jobContext.getConfiguration(), outputPath);
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public static Class transferConfiguration(JobContext context, JobRunParameterInterface inter) {
    Configuration conf = context.getConfiguration();
    String[] strings = conf.getStrings(MRHBasePluginRunner.EXT_PARAMETERS);
    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String mapParameter = strings[SETTINGS_MAP];
    if (mapParameter != null && !mapParameter.isEmpty()) {
        Map<String, String> settingsMap = (Map<String, String>) ((Object[]) SerializationUtils
                .deserialize(Base64.decodeBase64(mapParameter)))[EXTERNAL_PARAMETERS];
        if (settingsMap != null) {
            Logger.getLogger(FeatureSetCountPlugin.class.getName())
                    .info("Settings map retrieved with " + settingsMap.size() + " entries");
            Constants.setSETTINGS_MAP(settingsMap);
        }//from w ww .  j  a v  a 2 s . c  om
    }

    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String externalParameters = strings[EXTERNAL_PARAMETERS];
    if (externalParameters != null && !externalParameters.isEmpty()) {
        inter.setExt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(externalParameters)));
    }
    final String internalParameters = strings[INTERNAL_PARAMETERS];
    if (internalParameters != null && !internalParameters.isEmpty()) {
        inter.setInt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(internalParameters)));
    }
    final String sourceSets = strings[NUM_AND_SOURCE_FEATURE_SETS];
    if (sourceSets != null && !sourceSets.isEmpty()) {
        List<FeatureSet> sSets = convertBase64StrToFeatureSets(sourceSets);
        inter.setSourceSets(sSets);
    }
    final String destSetParameter = strings[DESTINATION_FEATURE_SET];
    if (destSetParameter != null && !destSetParameter.isEmpty()) {
        inter.setDestSet(SWQEFactory.getSerialization().deserialize(Base64.decodeBase64(destSetParameter),
                FeatureSet.class));
    }
    final String pluginParameter = strings[PLUGIN_CLASS];
    if (pluginParameter != null && !pluginParameter.isEmpty()) {
        Object deserialize = SerializationUtils.deserialize(Base64.decodeBase64(pluginParameter));
        Class plugin = (Class) deserialize;
        return plugin;
    }
    throw new RuntimeException("Could not determine plugin to run");
}

From source file:com.google.appengine.tools.mapreduce.AppEngineJobContextTest.java

License:Apache License

public void testGetJobContextFromRequest() {
    JobID jobId = new JobID("foo", 1);
    HttpServletRequest req = createMockMapReduceRequest(jobId);
    replay(req);/*from w  w  w  .  java 2  s  .  co m*/

    Configuration conf = ConfigurationXmlUtil.getConfigurationFromXml(SIMPLE_CONF_XML);
    persistMRState(jobId, conf);

    JobContext context = new AppEngineJobContext(req);
    assertEquals("/tmp/foo", context.getConfiguration().get("foo.bar"));
    assertEquals(jobId.toString(), context.getJobID().toString());
    verify(req);
}

From source file:com.google.appengine.tools.mapreduce.BlobstoreInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();
    String blobKey = configuration.get(BLOB_KEYS);
    int shardCount = configuration.getInt(SHARD_COUNT, DEFAULT_SHARD_COUNT);
    long blobSize = blobKeyToSize.apply(blobKey);
    return getSplits(blobKey, blobSize, shardCount);
}

From source file:com.google.appengine.tools.mapreduce.BlobstoreInputFormatTest.java

License:Apache License

/**
 * Tests that public {@code getSplits} passes expected arguments to package
 * private one from the {@code JobContext}.
 *//*from  w w w  .  j a  v a2s. c  om*/
public void test_getSplits() throws Exception {
    String blobKey = "blobKey";
    int shardCount = 3;
    long blobSize = 1024;
    IMocksControl control = EasyMock.createControl();

    @SuppressWarnings("unchecked")
    Function<String, Long> blobKeyToSize = control.createMock(Function.class);

    JobContext jobContext = control.createMock(JobContext.class);
    Configuration configuration = control.createMock(Configuration.class);
    EasyMock.expect(jobContext.getConfiguration()).andReturn(configuration).anyTimes();
    EasyMock.expect(configuration.get(BlobstoreInputFormat.BLOB_KEYS)).andReturn(blobKey);
    EasyMock.expect(
            configuration.getInt(BlobstoreInputFormat.SHARD_COUNT, BlobstoreInputFormat.DEFAULT_SHARD_COUNT))
            .andReturn(shardCount);
    EasyMock.expect(blobKeyToSize.apply(blobKey)).andReturn(blobSize);
    // this is what we are testing

    control.replay();
    BlobstoreInputFormat inputFormat = new BlobstoreInputFormat();

    inputFormat.setBlobKeyToSize(blobKeyToSize);
    assertSplits(inputFormat.getSplits(jobContext), blobKey, blobSize, shardCount);

    control.verify();
}

From source file:com.google.appengine.tools.mapreduce.DatastoreInputFormat.java

License:Apache License

/**
 * Generates a set of InputSplits partitioning a particular entity kind in
 * the datastore. The context's configuration must define a value for the
 * {@value #ENTITY_KIND_KEY} attribute, which will be the entity kind
 * partitioned, as well as a value for {@value #SHARD_COUNT_KEY} attribute,
 * which will be the maximum number of shards to split into.
 *//*  w  w  w. j  a v a  2 s.  co m*/
public List<InputSplit> getSplits(JobContext context) throws IOException {
    String entityKind = context.getConfiguration().get(ENTITY_KIND_KEY);
    if (entityKind == null) {
        throw new IOException("No entity kind specified in job.");
    }
    log.info("Getting input splits for: " + entityKind);

    DatastoreService datastoreService = DatastoreServiceFactory.getDatastoreService();
    Key startKey = getStartKey(entityKind, datastoreService);
    if (startKey == null) {
        return new ArrayList<InputSplit>();
    }

    int shardCount = context.getConfiguration().getInt(SHARD_COUNT_KEY, DEFAULT_SHARD_COUNT);
    int desiredScatterResultCount = shardCount * SCATTER_OVERSAMPLE_FACTOR;
    // NB(frew): If scatter doesn't exist (as in the 1.4.0 dev_appserver)
    // then we'll just end up with one split. This seems reasonable.
    Query scatter = new Query(entityKind).addSort(SCATTER_RESERVED_PROPERTY).setKeysOnly();
    List<Entity> scatterList = datastoreService.prepare(scatter).asList(withLimit(desiredScatterResultCount));
    Collections.sort(scatterList, new Comparator<Entity>() {
        public int compare(Entity e1, Entity e2) {
            return e1.getKey().compareTo(e2.getKey());
        }
    });

    List<Key> splitKeys = new ArrayList(shardCount);
    // Possibly use a lower oversampling factor if there aren't enough scatter
    // property-containing entities to fill out the list.
    int usedOversampleFactor = Math.max(1, scatterList.size() / shardCount);
    log.info("Requested " + desiredScatterResultCount + " scatter entities. Got " + scatterList.size()
            + " so using oversample factor " + usedOversampleFactor);
    // We expect the points to be uniformly randomly distributed. So we
    // act like the first point is the start key (which we alread know) and
    // omit it. This converges on correct as the number of samples goes
    // to infinity.
    for (int i = 1; i < shardCount; i++) {
        // This can happen if we don't have as many scatter properties as we want.
        if (i * usedOversampleFactor >= scatterList.size()) {
            break;
        }
        splitKeys.add(scatterList.get(i * usedOversampleFactor).getKey());
    }

    return getSplitsFromSplitPoints(startKey, splitKeys);
}

From source file:com.google.appengine.tools.mapreduce.RangeInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    long shardCount = conf.getInt(SHARD_COUNT_KEY, DEFAULT_SHARD_COUNT);
    long rangeStart = getNonNegativeLong(conf, RANGE_START_KEY);
    long rangeEnd = getNonNegativeLong(conf, RANGE_END_KEY);
    if (rangeStart >= rangeEnd) {
        throw new InvalidConfigurationException("Invalid range. Start: " + rangeStart + " >= end: " + rangeEnd);
    }//from ww  w . ja  va2s  .  c  om

    double increment = ((double) rangeEnd - rangeStart) / shardCount;
    ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
    for (int i = 0; i < shardCount - 1; i++) {
        splits.add(new RangeInputSplit(rangeStart + Math.round(i * increment),
                rangeStart + Math.round((i + 1) * increment)));
    }

    // Make sure that the final split hits end
    splits.add(new RangeInputSplit(rangeStart + Math.round((shardCount - 1) * increment), rangeEnd));

    return splits;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String fileExtension = new LzopCodec().getDefaultExtension();

    for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = (FileStatus) iterator.next();
        Path file = fileStatus.getPath();

        if (!file.toString().endsWith(fileExtension)) {
            //get rid of non lzo files
            iterator.remove();/*from   w w w . j a  v  a2  s  .c om*/
        } else {
            //read the index file
            LzoIndex index = readIndex(file, fs);
            indexes.put(file, index);
        }
    }

    return files;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = super.getSplits(job);
    // find new start/ends of the filesplit that aligns
    // with the lzo blocks

    List<InputSplit> result = new ArrayList<InputSplit>();
    FileSystem fs = FileSystem.get(job.getConfiguration());

    for (InputSplit genericSplit : splits) {
        // load the index
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }//  ww  w . ja va  2s .  c  o m

        if (index.isEmpty()) {
            // empty index, keep as is
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        if (start != 0) {
            // find the next block position from
            // the start of the split
            long newStart = index.findNextPosition(start);
            if (newStart == -1 || newStart >= end) {
                // just skip this since it will be handled by another split
                continue;
            }
            start = newStart;
        }

        long newEnd = index.findNextPosition(end);
        if (newEnd != -1) {
            end = newEnd;
        } else {
            //didn't find the next position
            //we have hit the end of the file
            end = fs.getFileStatus(file).getLen();
        }

        result.add(new FileSplit(file, start, end - start, fileSplit.getLocations()));
    }

    return result;
}