List of usage examples for org.apache.hadoop.mapred JobConf get
public String get(String name)
name
property, null
if no such property exists. From source file:cascading.tap.Hfs.java
License:Open Source License
protected Path getTempPath(JobConf conf) { String tempDir = conf.get(TEMPORARY_DIRECTORY); if (tempDir == null) tempDir = conf.get("hadoop.tmp.dir"); return new Path(tempDir); }
From source file:cascading.tap.S3fs.java
License:Open Source License
@Override protected URI makeURIScheme(JobConf jobConf) throws IOException { return makeURI(jobConf.get(S3FS_ID), jobConf.get(S3FS_SECRET), jobConf.get(S3FS_BUCKET)); }
From source file:cascading.tuple.hadoop.TupleSerialization.java
License:Open Source License
static String getSerializationTokens(JobConf jobConf) { return jobConf.get("cascading.serialization.tokens"); }
From source file:cascalog.TupleMemoryInputFormat.java
License:Open Source License
public static Object getObject(JobConf conf, String key) { String s = conf.get(key); if (s == null) return null; byte[] val = StringUtils.hexStringToByte(s); return deserialize(val); }
From source file:cn.edu.xmu.dm.mapreduce.Sort.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job.// w w w.j a v a2 s . co m * * @throws IOException * When there is communication problems with the job tracker. */ public int run(String[] args) throws Exception { Configuration conf = new Configuration(); Job job = new Job(conf, "Sorter"); job.setJarByClass(Sort.class); JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("sorter"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = BytesWritable.class; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { jobConf.setNumMapTasks(Integer.parseInt(args[++i])); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-totalOrder".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) maxSplits = Integer.MAX_VALUE; sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumReduceTasks(num_reduces); jobConf.setInputFormat(inputFormatClass); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } FileInputFormat.setInputPaths(jobConf, otherArgs.get(0)); FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.get(1))); if (sampler != null) { System.out.println("Sampling input to effect total-order sort..."); jobConf.setPartitionerClass(TotalOrderPartitioner.class); Path inputDir = FileInputFormat.getInputPaths(jobConf)[0]; inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf)); Path partitionFile = new Path(inputDir, "_sortPartitioning"); TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile); InputSampler.<K, V>writePartitionFile(jobConf, sampler); URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning"); DistributedCache.addCacheFile(partitionUri, jobConf); DistributedCache.createSymlink(jobConf); } System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf) + " with " + num_reduces + " reduces."); Date startTime = new Date(); System.out.println("Job started: " + startTime); jobResult = JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:cn.spark.Case.MyMultipleOutputFormat.java
License:Apache License
/** * Generate the outfile name based on a given anme and the input file name. * If the map input file does not exists (i.e. this is not for a map only * job), the given name is returned unchanged. If the config value for * "num.of.trailing.legs.to.use" is not set, or set 0 or negative, the given * name is returned unchanged. Otherwise, return a file name consisting of * the N trailing legs of the input file name where N is the config value * for "num.of.trailing.legs.to.use".// www .j a v a2 s . com * * @param job * the job config * @param name * the output file name * @return the outfile name based on a given anme and the input file name. */ protected String getInputFileBasedOutputFileName(JobConf job, String name) { String infilepath = job.get("map.input.file"); if (infilepath == null) { // if the map input file does not exists, then return the given name return name; } int numOfTrailingLegsToUse = job.getInt("mapred.outputformat.numOfTrailingLegs", 0); if (numOfTrailingLegsToUse <= 0) { return name; } Path infile = new Path(infilepath); Path parent = infile.getParent(); String midName = infile.getName(); Path outPath = new Path(midName); for (int i = 1; i < numOfTrailingLegsToUse; i++) { if (parent == null) break; midName = parent.getName(); if (midName.length() == 0) break; parent = parent.getParent(); outPath = new Path(midName, outPath); } return outPath.toString(); }
From source file:co.cask.cdap.data.runtime.main.ResourcesClassLoaderTest.java
License:Apache License
@SuppressWarnings("AccessStaticViaInstance") @Test/*from ww w . java2 s . c o m*/ public void testCustomResourceLoading() throws Exception { // Using default classloader JobConf jobConf = new JobConf(); // foo-loader is not defined in default classloader Assert.assertNull(jobConf.get("foo-loader")); // On first load, TestClass.init should be false Assert.assertFalse(TestClass.init); TestClass.init = true; // Using ResourcesClassLoader with URL /test-conf URL url = getClass().getResource("/test-conf/mapred-site.xml"); ClassLoader previousClassLoader = ClassLoaders.setContextClassLoader( new ResourcesClassLoader(new URL[] { getParentUrl(url) }, getClass().getClassLoader())); jobConf = new JobConf(); Assert.assertEquals("bar-loader", jobConf.get("foo-loader")); // TestClass is already initialzed earlier, hence TestClass.init should be true TestClass testClass = (TestClass) Thread.currentThread().getContextClassLoader() .loadClass(TestClass.class.getName()).newInstance(); Assert.assertTrue(testClass.init); ClassLoaders.setContextClassLoader(previousClassLoader); // Using ResourcesClassLoader with URL /test-app-conf url = getClass().getResource("/test-app-conf/mapred-site.xml"); previousClassLoader = ClassLoaders.setContextClassLoader( new ResourcesClassLoader(new URL[] { getParentUrl(url) }, getClass().getClassLoader())); jobConf = new JobConf(); Assert.assertEquals("baz-app-loader", jobConf.get("foo-loader")); // TestClass is already initialzed earlier, hence TestClass.init should be true testClass = (TestClass) Thread.currentThread().getContextClassLoader().loadClass(TestClass.class.getName()) .newInstance(); Assert.assertTrue(testClass.init); ClassLoaders.setContextClassLoader(previousClassLoader); }
From source file:co.cask.cdap.hive.stream.HiveStreamInputFormat.java
License:Apache License
private StreamInputSplitFinder<InputSplit> getSplitFinder(JobConf conf) throws IOException { // first get the context we are in ContextManager.Context context = ContextManager.getContext(conf); String streamName = conf.get(Constants.Explore.STREAM_NAME); String streamNamespace = conf.get(Constants.Explore.STREAM_NAMESPACE); Id.Stream streamId = Id.Stream.from(streamNamespace, streamName); StreamConfig streamConfig = context.getStreamConfig(streamId); // make sure we get the current generation so we don't read events that occurred before a truncate. Location streamPath = StreamUtils.createGenerationLocation(streamConfig.getLocation(), StreamUtils.getGeneration(streamConfig)); StreamInputSplitFinder.Builder builder = StreamInputSplitFinder.builder(streamPath.toURI()); // Get the Hive table path for the InputSplit created. It is just to satisfy hive. The InputFormat never uses it. JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(Job.getInstance(conf)); final Path[] tablePaths = FileInputFormat.getInputPaths(jobContext); return setupBuilder(conf, streamConfig, builder).build(new StreamInputSplitFactory<InputSplit>() { @Override/*from w w w .j ava 2 s . c o m*/ public InputSplit createSplit(Path eventPath, Path indexPath, long startTime, long endTime, long start, long length, @Nullable String[] locations) { return new StreamInputSplit(tablePaths[0], eventPath, indexPath, startTime, endTime, start, length, locations); } }); }
From source file:co.nubetech.hiho.mapred.MySQLLoadDataMapper.java
License:Apache License
@Override public void configure(JobConf job) { try {//from w w w .ja v a2 s .c om Class.forName("com.mysql.jdbc.Driver").newInstance(); String connString = job.get(DBConfiguration.URL_PROPERTY); String username = job.get(DBConfiguration.USERNAME_PROPERTY); String password = job.get(DBConfiguration.PASSWORD_PROPERTY); logger.debug("Connection values are " + connString + " " + username + "/" + password); connect(connString, username, password); } catch (Exception e) { e.printStackTrace(); } querySuffix = job.get(HIHOConf.LOAD_QUERY_SUFFIX); hasHeaderLine = job.getBoolean(HIHOConf.LOAD_HAS_HEADER, false); keyIsTableName = job.getBoolean(HIHOConf.LOAD_KEY_IS_TABLENAME, false); disableKeys = job.getBoolean(HIHOConf.LOAD_DISABLE_KEYS, false); }
From source file:colossal.pipe.AvroGroupPartitioner.java
License:Apache License
@Override public void configure(JobConf conf) { //Schema schema = Schema.parse(conf.get(ColPhase.MAP_OUT_VALUE_SCHEMA)); String groupBy = conf.get(ColPhase.GROUP_BY); String[] groupFields = groupBy == null ? new String[0] : groupBy.split(","); groupNames = new ArrayList<String>(groupFields.length); ReflectionKeyExtractor.addFieldnames(groupNames, groupFields); }