Example usage for org.apache.hadoop.mapred JobConf get

List of usage examples for org.apache.hadoop.mapred JobConf get

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf get.

Prototype

public String get(String name) 

Source Link

Document

Get the value of the name property, null if no such property exists.

Usage

From source file:cascading.tap.Hfs.java

License:Open Source License

protected Path getTempPath(JobConf conf) {
    String tempDir = conf.get(TEMPORARY_DIRECTORY);

    if (tempDir == null)
        tempDir = conf.get("hadoop.tmp.dir");

    return new Path(tempDir);
}

From source file:cascading.tap.S3fs.java

License:Open Source License

@Override
protected URI makeURIScheme(JobConf jobConf) throws IOException {
    return makeURI(jobConf.get(S3FS_ID), jobConf.get(S3FS_SECRET), jobConf.get(S3FS_BUCKET));
}

From source file:cascading.tuple.hadoop.TupleSerialization.java

License:Open Source License

static String getSerializationTokens(JobConf jobConf) {
    return jobConf.get("cascading.serialization.tokens");
}

From source file:cascalog.TupleMemoryInputFormat.java

License:Open Source License

public static Object getObject(JobConf conf, String key) {
    String s = conf.get(key);
    if (s == null)
        return null;
    byte[] val = StringUtils.hexStringToByte(s);
    return deserialize(val);
}

From source file:cn.edu.xmu.dm.mapreduce.Sort.java

License:Apache License

/**
 * The main driver for sort program. Invoke this method to submit the
 * map/reduce job.//  w w  w.j  a v  a2 s . co  m
 * 
 * @throws IOException
 *             When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = new Job(conf, "Sorter");
    job.setJarByClass(Sort.class);

    JobConf jobConf = new JobConf(getConf(), Sort.class);
    jobConf.setJobName("sorter");

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(IdentityReducer.class);

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = jobConf.get("test.sort.reduces_per_host");
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                jobConf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }

    // Set user-supplied (possibly default) job configs
    jobConf.setNumReduceTasks(num_reduces);

    jobConf.setInputFormat(inputFormatClass);
    jobConf.setOutputFormat(outputFormatClass);

    jobConf.setOutputKeyClass(outputKeyClass);
    jobConf.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(jobConf, otherArgs.get(0));
    FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.<K, V>writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return 0;
}

From source file:cn.spark.Case.MyMultipleOutputFormat.java

License:Apache License

/**
 * Generate the outfile name based on a given anme and the input file name.
 * If the map input file does not exists (i.e. this is not for a map only
 * job), the given name is returned unchanged. If the config value for
 * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given
 * name is returned unchanged. Otherwise, return a file name consisting of
 * the N trailing legs of the input file name where N is the config value
 * for "num.of.trailing.legs.to.use".//  www .j  a v  a2 s . com
 * 
 * @param job
 *            the job config
 * @param name
 *            the output file name
 * @return the outfile name based on a given anme and the input file name.
 */
protected String getInputFileBasedOutputFileName(JobConf job, String name) {
    String infilepath = job.get("map.input.file");
    if (infilepath == null) {
        // if the map input file does not exists, then return the given name
        return name;
    }
    int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0);
    if (numOfTrailingLegsToUse <= 0) {
        return name;
    }
    Path infile = new Path(infilepath);
    Path parent = infile.getParent();
    String midName = infile.getName();
    Path outPath = new Path(midName);
    for (int i = 1; i < numOfTrailingLegsToUse; i++) {
        if (parent == null)
            break;
        midName = parent.getName();
        if (midName.length() == 0)
            break;
        parent = parent.getParent();
        outPath = new Path(midName, outPath);
    }
    return outPath.toString();
}

From source file:co.cask.cdap.data.runtime.main.ResourcesClassLoaderTest.java

License:Apache License

@SuppressWarnings("AccessStaticViaInstance")
@Test/*from  ww  w  .  java2  s  . c o m*/
public void testCustomResourceLoading() throws Exception {
    // Using default classloader
    JobConf jobConf = new JobConf();
    // foo-loader is not defined in default classloader
    Assert.assertNull(jobConf.get("foo-loader"));
    // On first load, TestClass.init should be false
    Assert.assertFalse(TestClass.init);
    TestClass.init = true;

    // Using ResourcesClassLoader with URL /test-conf
    URL url = getClass().getResource("/test-conf/mapred-site.xml");
    ClassLoader previousClassLoader = ClassLoaders.setContextClassLoader(
            new ResourcesClassLoader(new URL[] { getParentUrl(url) }, getClass().getClassLoader()));
    jobConf = new JobConf();
    Assert.assertEquals("bar-loader", jobConf.get("foo-loader"));
    // TestClass is already initialzed earlier, hence TestClass.init should be true
    TestClass testClass = (TestClass) Thread.currentThread().getContextClassLoader()
            .loadClass(TestClass.class.getName()).newInstance();
    Assert.assertTrue(testClass.init);
    ClassLoaders.setContextClassLoader(previousClassLoader);

    // Using ResourcesClassLoader with URL /test-app-conf
    url = getClass().getResource("/test-app-conf/mapred-site.xml");
    previousClassLoader = ClassLoaders.setContextClassLoader(
            new ResourcesClassLoader(new URL[] { getParentUrl(url) }, getClass().getClassLoader()));
    jobConf = new JobConf();
    Assert.assertEquals("baz-app-loader", jobConf.get("foo-loader"));
    // TestClass is already initialzed earlier, hence TestClass.init should be true
    testClass = (TestClass) Thread.currentThread().getContextClassLoader().loadClass(TestClass.class.getName())
            .newInstance();
    Assert.assertTrue(testClass.init);
    ClassLoaders.setContextClassLoader(previousClassLoader);
}

From source file:co.cask.cdap.hive.stream.HiveStreamInputFormat.java

License:Apache License

private StreamInputSplitFinder<InputSplit> getSplitFinder(JobConf conf) throws IOException {
    // first get the context we are in
    ContextManager.Context context = ContextManager.getContext(conf);

    String streamName = conf.get(Constants.Explore.STREAM_NAME);
    String streamNamespace = conf.get(Constants.Explore.STREAM_NAMESPACE);
    Id.Stream streamId = Id.Stream.from(streamNamespace, streamName);
    StreamConfig streamConfig = context.getStreamConfig(streamId);
    // make sure we get the current generation so we don't read events that occurred before a truncate.
    Location streamPath = StreamUtils.createGenerationLocation(streamConfig.getLocation(),
            StreamUtils.getGeneration(streamConfig));

    StreamInputSplitFinder.Builder builder = StreamInputSplitFinder.builder(streamPath.toURI());

    // Get the Hive table path for the InputSplit created. It is just to satisfy hive. The InputFormat never uses it.
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(conf));
    final Path[] tablePaths = FileInputFormat.getInputPaths(jobContext);

    return setupBuilder(conf, streamConfig, builder).build(new StreamInputSplitFactory<InputSplit>() {
        @Override/*from   w w  w .j  ava  2  s . c o m*/
        public InputSplit createSplit(Path eventPath, Path indexPath, long startTime, long endTime, long start,
                long length, @Nullable String[] locations) {
            return new StreamInputSplit(tablePaths[0], eventPath, indexPath, startTime, endTime, start, length,
                    locations);
        }
    });
}

From source file:co.nubetech.hiho.mapred.MySQLLoadDataMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    try {//from   w  w  w  .ja v  a2  s  .c  om
        Class.forName("com.mysql.jdbc.Driver").newInstance();

        String connString = job.get(DBConfiguration.URL_PROPERTY);
        String username = job.get(DBConfiguration.USERNAME_PROPERTY);
        String password = job.get(DBConfiguration.PASSWORD_PROPERTY);

        logger.debug("Connection values are " + connString + " " + username + "/" + password);
        connect(connString, username, password);

    } catch (Exception e) {
        e.printStackTrace();
    }
    querySuffix = job.get(HIHOConf.LOAD_QUERY_SUFFIX);
    hasHeaderLine = job.getBoolean(HIHOConf.LOAD_HAS_HEADER, false);
    keyIsTableName = job.getBoolean(HIHOConf.LOAD_KEY_IS_TABLENAME, false);
    disableKeys = job.getBoolean(HIHOConf.LOAD_DISABLE_KEYS, false);
}

From source file:colossal.pipe.AvroGroupPartitioner.java

License:Apache License

@Override
public void configure(JobConf conf) {
    //Schema schema = Schema.parse(conf.get(ColPhase.MAP_OUT_VALUE_SCHEMA));
    String groupBy = conf.get(ColPhase.GROUP_BY);
    String[] groupFields = groupBy == null ? new String[0] : groupBy.split(",");
    groupNames = new ArrayList<String>(groupFields.length);

    ReflectionKeyExtractor.addFieldnames(groupNames, groupFields);
}