Example usage for org.apache.hadoop.mapreduce JobContext getConfiguration

List of usage examples for org.apache.hadoop.mapreduce JobContext getConfiguration

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getConfiguration.

Prototype

public Configuration getConfiguration();

Source Link

Document

Return the configuration for the job.

Usage

From source file:com.gemstone.gemfire.cache.hdfs.internal.hoplog.mapreduce.GFOutputFormat.java

License:Apache License

@Override
public void checkOutputSpecs(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    validateConfiguration(conf);/*  www  . ja  v  a2 s. c  o m*/
}

From source file:com.github.sadikovi.hadoop.riff.RiffOutputCommitter.java

License:Open Source License

@Override
public void commitJob(JobContext jobContext) throws IOException {
    LOG.info("Commit job for path {}", outputPath);
    super.commitJob(jobContext);
    writeMetadataFile(jobContext.getConfiguration(), outputPath);
}

From source file:com.github.seqware.queryengine.plugins.runners.hbasemr.MRHBasePluginRunner.java

License:Open Source License

public static Class transferConfiguration(JobContext context, JobRunParameterInterface inter) {
    Configuration conf = context.getConfiguration();
    String[] strings = conf.getStrings(MRHBasePluginRunner.EXT_PARAMETERS);
    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String mapParameter = strings[SETTINGS_MAP];
    if (mapParameter != null && !mapParameter.isEmpty()) {
        Map<String, String> settingsMap = (Map<String, String>) ((Object[]) SerializationUtils
                .deserialize(Base64.decodeBase64(mapParameter)))[EXTERNAL_PARAMETERS];
        if (settingsMap != null) {
            Logger.getLogger(FeatureSetCountPlugin.class.getName())
                    .info("Settings map retrieved with " + settingsMap.size() + " entries");
            Constants.setSETTINGS_MAP(settingsMap);
        }//from w ww .  j  a v  a 2 s . c  om
    }

    Logger.getLogger(PluginRunnerMapper.class.getName())
            .info("QEMapper configured with: host: "
                    + Constants.Term.HBASE_PROPERTIES.getTermValue(Map.class).toString() + " namespace: "
                    + Constants.Term.NAMESPACE.getTermValue(String.class));
    final String externalParameters = strings[EXTERNAL_PARAMETERS];
    if (externalParameters != null && !externalParameters.isEmpty()) {
        inter.setExt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(externalParameters)));
    }
    final String internalParameters = strings[INTERNAL_PARAMETERS];
    if (internalParameters != null && !internalParameters.isEmpty()) {
        inter.setInt_parameters(
                (Object[]) SerializationUtils.deserialize(Base64.decodeBase64(internalParameters)));
    }
    final String sourceSets = strings[NUM_AND_SOURCE_FEATURE_SETS];
    if (sourceSets != null && !sourceSets.isEmpty()) {
        List<FeatureSet> sSets = convertBase64StrToFeatureSets(sourceSets);
        inter.setSourceSets(sSets);
    }
    final String destSetParameter = strings[DESTINATION_FEATURE_SET];
    if (destSetParameter != null && !destSetParameter.isEmpty()) {
        inter.setDestSet(SWQEFactory.getSerialization().deserialize(Base64.decodeBase64(destSetParameter),
                FeatureSet.class));
    }
    final String pluginParameter = strings[PLUGIN_CLASS];
    if (pluginParameter != null && !pluginParameter.isEmpty()) {
        Object deserialize = SerializationUtils.deserialize(Base64.decodeBase64(pluginParameter));
        Class plugin = (Class) deserialize;
        return plugin;
    }
    throw new RuntimeException("Could not determine plugin to run");
}

From source file:com.google.appengine.tools.mapreduce.AppEngineJobContextTest.java

License:Apache License

public void testGetJobContextFromRequest() {
    JobID jobId = new JobID("foo", 1);
    HttpServletRequest req = createMockMapReduceRequest(jobId);
    replay(req);/*from w  w  w  .  java 2  s  .  co m*/

    Configuration conf = ConfigurationXmlUtil.getConfigurationFromXml(SIMPLE_CONF_XML);
    persistMRState(jobId, conf);

    JobContext context = new AppEngineJobContext(req);
    assertEquals("/tmp/foo", context.getConfiguration().get("foo.bar"));
    assertEquals(jobId.toString(), context.getJobID().toString());
    verify(req);
}

From source file:com.google.appengine.tools.mapreduce.BlobstoreInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();
    String blobKey = configuration.get(BLOB_KEYS);
    int shardCount = configuration.getInt(SHARD_COUNT, DEFAULT_SHARD_COUNT);
    long blobSize = blobKeyToSize.apply(blobKey);
    return getSplits(blobKey, blobSize, shardCount);
}

From source file:com.google.appengine.tools.mapreduce.BlobstoreInputFormatTest.java

License:Apache License

/**
 * Tests that public {@code getSplits} passes expected arguments to package
 * private one from the {@code JobContext}.
 *//*from  w w w  .  j a  v a2s. c  om*/
public void test_getSplits() throws Exception {
    String blobKey = "blobKey";
    int shardCount = 3;
    long blobSize = 1024;
    IMocksControl control = EasyMock.createControl();

    @SuppressWarnings("unchecked")
    Function<String, Long> blobKeyToSize = control.createMock(Function.class);

    JobContext jobContext = control.createMock(JobContext.class);
    Configuration configuration = control.createMock(Configuration.class);
    EasyMock.expect(jobContext.getConfiguration()).andReturn(configuration).anyTimes();
    EasyMock.expect(configuration.get(BlobstoreInputFormat.BLOB_KEYS)).andReturn(blobKey);
    EasyMock.expect(
            configuration.getInt(BlobstoreInputFormat.SHARD_COUNT, BlobstoreInputFormat.DEFAULT_SHARD_COUNT))
            .andReturn(shardCount);
    EasyMock.expect(blobKeyToSize.apply(blobKey)).andReturn(blobSize);
    // this is what we are testing

    control.replay();
    BlobstoreInputFormat inputFormat = new BlobstoreInputFormat();

    inputFormat.setBlobKeyToSize(blobKeyToSize);
    assertSplits(inputFormat.getSplits(jobContext), blobKey, blobSize, shardCount);

    control.verify();
}

From source file:com.google.appengine.tools.mapreduce.DatastoreInputFormat.java

License:Apache License

/**
 * Generates a set of InputSplits partitioning a particular entity kind in
 * the datastore. The context's configuration must define a value for the
 * {@value #ENTITY_KIND_KEY} attribute, which will be the entity kind
 * partitioned, as well as a value for {@value #SHARD_COUNT_KEY} attribute,
 * which will be the maximum number of shards to split into.
 *//*  w  w  w. j  a v a  2 s.  co m*/
public List<InputSplit> getSplits(JobContext context) throws IOException {
    String entityKind = context.getConfiguration().get(ENTITY_KIND_KEY);
    if (entityKind == null) {
        throw new IOException("No entity kind specified in job.");
    }
    log.info("Getting input splits for: " + entityKind);

    DatastoreService datastoreService = DatastoreServiceFactory.getDatastoreService();
    Key startKey = getStartKey(entityKind, datastoreService);
    if (startKey == null) {
        return new ArrayList<InputSplit>();
    }

    int shardCount = context.getConfiguration().getInt(SHARD_COUNT_KEY, DEFAULT_SHARD_COUNT);
    int desiredScatterResultCount = shardCount * SCATTER_OVERSAMPLE_FACTOR;
    // NB(frew): If scatter doesn't exist (as in the 1.4.0 dev_appserver)
    // then we'll just end up with one split. This seems reasonable.
    Query scatter = new Query(entityKind).addSort(SCATTER_RESERVED_PROPERTY).setKeysOnly();
    List<Entity> scatterList = datastoreService.prepare(scatter).asList(withLimit(desiredScatterResultCount));
    Collections.sort(scatterList, new Comparator<Entity>() {
        public int compare(Entity e1, Entity e2) {
            return e1.getKey().compareTo(e2.getKey());
        }
    });

    List<Key> splitKeys = new ArrayList(shardCount);
    // Possibly use a lower oversampling factor if there aren't enough scatter
    // property-containing entities to fill out the list.
    int usedOversampleFactor = Math.max(1, scatterList.size() / shardCount);
    log.info("Requested " + desiredScatterResultCount + " scatter entities. Got " + scatterList.size()
            + " so using oversample factor " + usedOversampleFactor);
    // We expect the points to be uniformly randomly distributed. So we
    // act like the first point is the start key (which we alread know) and
    // omit it. This converges on correct as the number of samples goes
    // to infinity.
    for (int i = 1; i < shardCount; i++) {
        // This can happen if we don't have as many scatter properties as we want.
        if (i * usedOversampleFactor >= scatterList.size()) {
            break;
        }
        splitKeys.add(scatterList.get(i * usedOversampleFactor).getKey());
    }

    return getSplitsFromSplitPoints(startKey, splitKeys);
}

From source file:com.google.appengine.tools.mapreduce.RangeInputFormat.java

License:Apache License

@Override
public List<InputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration conf = context.getConfiguration();
    long shardCount = conf.getInt(SHARD_COUNT_KEY, DEFAULT_SHARD_COUNT);
    long rangeStart = getNonNegativeLong(conf, RANGE_START_KEY);
    long rangeEnd = getNonNegativeLong(conf, RANGE_END_KEY);
    if (rangeStart >= rangeEnd) {
        throw new InvalidConfigurationException("Invalid range. Start: " + rangeStart + " >= end: " + rangeEnd);
    }//from ww  w . ja  va2s  .  c  om

    double increment = ((double) rangeEnd - rangeStart) / shardCount;
    ArrayList<InputSplit> splits = new ArrayList<InputSplit>();
    for (int i = 0; i < shardCount - 1; i++) {
        splits.add(new RangeInputSplit(rangeStart + Math.round(i * increment),
                rangeStart + Math.round((i + 1) * increment)));
    }

    // Make sure that the final split hits end
    splits.add(new RangeInputSplit(rangeStart + Math.round((shardCount - 1) * increment), rangeEnd));

    return splits;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    List<FileStatus> files = super.listStatus(job);

    FileSystem fs = FileSystem.get(job.getConfiguration());
    String fileExtension = new LzopCodec().getDefaultExtension();

    for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) {
        FileStatus fileStatus = (FileStatus) iterator.next();
        Path file = fileStatus.getPath();

        if (!file.toString().endsWith(fileExtension)) {
            //get rid of non lzo files
            iterator.remove();/*from   w w w . j a  v  a2  s  .c om*/
        } else {
            //read the index file
            LzoIndex index = readIndex(file, fs);
            indexes.put(file, index);
        }
    }

    return files;
}

From source file:com.hadoop.mapreduce.LzoTextInputFormat.java

License:Open Source License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    List<InputSplit> splits = super.getSplits(job);
    // find new start/ends of the filesplit that aligns
    // with the lzo blocks

    List<InputSplit> result = new ArrayList<InputSplit>();
    FileSystem fs = FileSystem.get(job.getConfiguration());

    for (InputSplit genericSplit : splits) {
        // load the index
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        LzoIndex index = indexes.get(file);
        if (index == null) {
            throw new IOException("Index not found for " + file);
        }//  ww  w . ja va  2s .  c  o m

        if (index.isEmpty()) {
            // empty index, keep as is
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        if (start != 0) {
            // find the next block position from
            // the start of the split
            long newStart = index.findNextPosition(start);
            if (newStart == -1 || newStart >= end) {
                // just skip this since it will be handled by another split
                continue;
            }
            start = newStart;
        }

        long newEnd = index.findNextPosition(end);
        if (newEnd != -1) {
            end = newEnd;
        } else {
            //didn't find the next position
            //we have hit the end of the file
            end = fs.getFileStatus(file).getLen();
        }

        result.add(new FileSplit(file, start, end - start, fileSplit.getLocations()));
    }

    return result;
}