List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:co.cask.cdap.explore.guice.ExploreRuntimeModule.java
License:Apache License
private static void setupClasspath(File tmpDir) throws IOException { // Here we find the transitive dependencies and remove all paths that come from the boot class path - // those paths are not needed because the new JVM will have them in its boot class path. // It could even be wrong to keep them because in the target container, the boot class path may be different // (for example, if Hadoop uses a different Java version than CDAP). final Set<String> bootstrapClassPaths = ExploreServiceUtils.getBoostrapClasses(); ClassAcceptor classAcceptor = new ClassAcceptor() { /* Excluding any class contained in the bootstrapClassPaths and Kryo classes and hive-exec.jar * We need to remove Kryo dependency in the Explore container. Spark introduced version 2.21 version of Kryo, * which would be normally shipped to the Explore container. Yet, Hive requires Kryo 2.22, * and gets it from the Hive jars - hive-exec.jar to be precise. * we also exclude hive jars as hive dependencies are found in job.jar. * *///from w w w.j a v a 2 s. c o m @Override public boolean accept(String className, URL classUrl, URL classPathUrl) { if (bootstrapClassPaths.contains(classPathUrl.getFile()) || className.startsWith("com.esotericsoftware.kryo") || classPathUrl.getFile().contains("hive")) { return false; } return true; } }; Set<File> hBaseTableDeps = ExploreServiceUtils.traceDependencies(null, classAcceptor, tmpDir, HBaseTableUtilFactory.getHBaseTableUtilClass().getName()); // Note the order of dependency jars is important so that HBase jars come first in the classpath order // LinkedHashSet maintains insertion order while removing duplicate entries. Set<File> orderedDependencies = new LinkedHashSet<>(); orderedDependencies.addAll(hBaseTableDeps); orderedDependencies.addAll(ExploreServiceUtils.traceDependencies(null, classAcceptor, tmpDir, RemoteDatasetFramework.class.getName(), DatasetStorageHandler.class.getName(), RecordFormats.class.getName())); // Note: the class path entries need to be prefixed with "file://" for the jars to work when // Hive starts local map-reduce job. ImmutableList.Builder<String> builder = ImmutableList.builder(); for (File dep : orderedDependencies) { builder.add("file://" + dep.getAbsolutePath()); } List<String> orderedDependenciesStr = builder.build(); // These dependency files need to be copied over to spark container System.setProperty(BaseHiveExploreService.SPARK_YARN_DIST_FILES, Joiner.on(',').join(Iterables.transform(orderedDependencies, new Function<File, String>() { @Override public String apply(File input) { return input.getAbsolutePath(); } }))); LOG.debug("Setting {} to {}", BaseHiveExploreService.SPARK_YARN_DIST_FILES, System.getProperty(BaseHiveExploreService.SPARK_YARN_DIST_FILES)); // These dependency files need to be copied over to hive job container System.setProperty(HiveConf.ConfVars.HIVEAUXJARS.toString(), Joiner.on(',').join(orderedDependenciesStr)); LOG.debug("Setting {} to {}", HiveConf.ConfVars.HIVEAUXJARS.toString(), System.getProperty(HiveConf.ConfVars.HIVEAUXJARS.toString())); // add hive-exec.jar to the HADOOP_CLASSPATH, which is used by the local mapreduce job launched by hive , // we need to add this, otherwise when hive runs a MapRedLocalTask it cannot find // "org.apache.hadoop.hive.serde2.SerDe" class in its classpath. List<String> orderedDependenciesWithHiveJar = Lists.newArrayList(orderedDependenciesStr); String hiveExecJar = new JobConf(org.apache.hadoop.hive.ql.exec.Task.class).getJar(); Preconditions.checkNotNull(hiveExecJar, "Couldn't locate hive-exec.jar to be included in HADOOP_CLASSPATH " + "for MapReduce jobs launched by Hive"); orderedDependenciesWithHiveJar.add(hiveExecJar); LOG.debug("Added hive-exec.jar {} to HADOOP_CLASSPATH to be included for MapReduce jobs launched by Hive", hiveExecJar); //TODO: Setup HADOOP_CLASSPATH hack, more info on why this is needed, see CDAP-9 LocalMapreduceClasspathSetter classpathSetter = new LocalMapreduceClasspathSetter(new HiveConf(), tmpDir.getAbsolutePath(), orderedDependenciesWithHiveJar); for (File jar : hBaseTableDeps) { classpathSetter.accept(jar.getAbsolutePath()); } classpathSetter.setupClasspathScript(); }
From source file:co.nubetech.hiho.job.DBQueryInputJob.java
License:Apache License
public void runJobs(Configuration conf, int jobCounter) throws IOException { try {/*from w w w .j a v a 2 s . c om*/ checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); for (Entry<String, String> entry : conf) { logger.warn("key, value " + entry.getKey() + "=" + entry.getValue()); } // logger.debug("Number of maps " + // conf.getInt("mapred.map.tasks", 1)); // conf.setInt(JobContext.NUM_MAPS, // conf.getInt("mapreduce.job.maps", 1)); // job.getConfiguration().setInt("mapred.map.tasks", 4); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1)); job.setJobName("Import job"); job.setJarByClass(DBQueryInputJob.class); String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY); OutputStrategyEnum os = OutputStrategyEnum.value(strategy); if (os == null) { throw new IllegalArgumentException("Wrong value of output strategy. Please correct"); } if (os != OutputStrategyEnum.AVRO) { switch (os) { case DUMP: { // job.setMapperClass(DBImportMapper.class); break; } /* * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); // * need avro in cp // job.setJarByClass(Schema.class); // need * jackson which is needed by avro - ugly! // * job.setJarByClass(ObjectMapper.class); * job.setMapOutputKeyClass(NullWritable.class); * job.setMapOutputValueClass(AvroValue.class); * job.setOutputKeyClass(NullWritable.class); * job.setOutputValueClass(AvroValue.class); * job.setOutputFormatClass(AvroOutputFormat.class); * * AvroOutputFormat.setOutputPath(job, new * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; } */ case DELIMITED: { job.setMapperClass(DBInputDelimMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class); NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); } case JSON: { // job.setMapperClass(DBImportJsonMapper.class); // job.setJarByClass(ObjectMapper.class); break; } default: { job.setMapperClass(DBInputDelimMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class); NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; } } String inputQuery = conf.get(DBConfiguration.INPUT_QUERY); String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY); logger.debug("About to set the params"); DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params); logger.debug("Set the params"); job.setNumReduceTasks(0); try { // job.setJarByClass(Class.forName(conf.get( // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY))); logger.debug("OUTPUT format class is " + job.getOutputFormatClass()); /* * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output = * ReflectionUtils.newInstance(job.getOutputFormatClass(), * job.getConfiguration()); output.checkOutputSpecs(job); */ logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) { generateHiveScript(conf, job, jobCounter); generatePigScript(conf, job); } } /* * catch (HIHOException h) { h.printStackTrace(); } */ catch (Exception e) { e.printStackTrace(); } catch (HIHOException e) { e.printStackTrace(); } } // avro to be handled differently, thanks to all the incompatibilities // in the apis. else { String inputQuery = conf.get(DBConfiguration.INPUT_QUERY); String inputBoundingQuery = conf.get(DBConfiguration.INPUT_BOUNDING_QUERY); logger.debug("About to set the params"); // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job, // inputQuery, inputBoundingQuery, params); logger.debug("Set the params"); JobConf jobConf = new JobConf(conf); try { GenericDBWritable queryWritable = getDBWritable(jobConf); Schema pair = DBMapper.getPairSchema(queryWritable.getColumns()); AvroJob.setMapOutputSchema(jobConf, pair); GenericRecordAvroOutputFormat.setOutputPath(jobConf, new Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(jobConf, inputQuery, inputBoundingQuery, params); jobConf.setInputFormat(co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.class); jobConf.setMapperClass(DBInputAvroMapper.class); jobConf.setMapOutputKeyClass(NullWritable.class); jobConf.setMapOutputValueClass(AvroValue.class); jobConf.setOutputKeyClass(NullWritable.class); jobConf.setOutputValueClass(Text.class); jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class); jobConf.setJarByClass(DBQueryInputJob.class); jobConf.setStrings("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization"); jobConf.setNumReduceTasks(0); /* * jobConf.setOutputFormat(org.apache.hadoop.mapred. * SequenceFileOutputFormat.class); * org.apache.hadoop.mapred.SequenceFileOutputFormat * .setOutputPath(jobConf, new * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); */ JobClient.runJob(jobConf); } catch (Throwable e) { e.printStackTrace(); } } }
From source file:co.nubetech.hiho.mapreduce.lib.input.TestFileStreamInputFormat.java
License:Apache License
@Test public void testNumInputs() throws Exception { Configuration conf = new Configuration(); JobConf job = new JobConf(conf); MiniDFSCluster dfs = newDFSCluster(job); FileSystem fs = dfs.getFileSystem(); System.out.println("FileSystem " + fs.getUri()); Path inputDir = new Path("/foo/"); final int numFiles = 10; String fileNameBase = "part-0000"; }
From source file:colossal.pipe.ColPhase.java
License:Apache License
public List<PhaseError> plan(ColPipe distPipeline) { List<PhaseError> errors = new ArrayList<PhaseError>(); conf = new JobConf(distPipeline.getConf()); for (Map.Entry<String, String> entry : props.entrySet()) { conf.set(entry.getKey(), entry.getValue()); }/* www. ja v a2s .c o m*/ Schema mapin = null; Class<?> mapOutClass = null; Class<?> mapInClass = null; Class<? extends ColMapper> mapperClass = null; if (mappers != null && mappers.length > 0) { if (mappers.length > 1) { errors.add(new PhaseError( "Colossal phase/avro currently only supports one mapper per process: " + name)); } else { mapperClass = mappers[0]; conf.set(MAPPER, mapperClass.getName()); Class<?> foundIn = null; for (Method m : mapperClass.getMethods()) { if ("map".equals(m.getName())) { Class<?>[] paramTypes = m.getParameterTypes(); if (paramTypes.length >= 3) { try { // prefer subclass methods to superclass methods if (foundIn == null || foundIn.isAssignableFrom(m.getDeclaringClass())) { if (paramTypes[0] == Object.class) { if (foundIn == m.getDeclaringClass()) { // skip the generated "override" of the generic method continue; } } else { //TODO: handle cases beyond Object where output isn't defined mapInClass = paramTypes[0]; mapin = getSchema(paramTypes[0].newInstance()); } mapOutClass = paramTypes[1]; foundIn = m.getDeclaringClass(); } } catch (Exception e) { errors.add(new PhaseError(e, "Can't create mapper: " + mapperClass)); } } } } } } if (combiners != null && combiners.length > 0) { if (combiners.length > 1) { errors.add(new PhaseError( "Colossal phase/avro currently only supports one combiner per process: " + name)); } else { conf.set(COMBINER, combiners[0].getName()); conf.setCombinerClass(ColHadoopCombiner.class); } } Schema reduceout = null; Class<?> reduceOutClass = null; Class<? extends ColReducer> reducerClass = null; if (reducers != null && reducers.length > 0) { if (reducers.length != 1) { errors.add(new PhaseError( "Colossal phase/avro currently only supports one reducer per process: " + name)); } else { reducerClass = reducers[0]; conf.set(REDUCER, reducers[0].getName()); Class<?> foundIn = null; for (Method m : reducerClass.getMethods()) { if ("reduce".equals(m.getName())) { Class<?>[] paramTypes = m.getParameterTypes(); if (paramTypes.length >= 3) { if (foundIn == null || foundIn.isAssignableFrom(m.getDeclaringClass())) { if (foundIn == m.getDeclaringClass() && paramTypes[1] == Object.class) { // skip the generated "override" of the generic method continue; } // prefer subclass methods to superclass methods reduceOutClass = paramTypes[1]; foundIn = m.getDeclaringClass(); } } } } // XXX validation! } } Object reduceOutProto = null; //TODO: handle cases beyond Object where output isn't defined if ((reduceOutClass == null || reduceOutClass == Object.class) && mainWrites != null && mainWrites.size() > 0) { reduceOutProto = mainWrites.get(0).getPrototype(); reduceOutClass = reduceOutProto.getClass(); } else { try { reduceOutProto = reduceOutClass.newInstance(); } catch (Exception e) { errors.add(new PhaseError(e, "Can't create reducer output class: " + reduceOutClass)); } } if (reduceOutProto != null) reduceout = getSchema(reduceOutProto); conf.set(REDUCE_OUT_CLASS, reduceOutClass.getName()); Schema valueSchema = null; if (mainWrites.size() != 1) { errors.add( new PhaseError("Colossal phase/avro currently only supports one output per process: " + name)); } else { ColFile output = mainWrites.get(0); AvroOutputFormat.setOutputPath(conf, new Path(output.getPath())); if (output.getPrototype() != null) { valueSchema = getSchema(output.getPrototype()); if (reduceout != null) { assert reduceout.equals(valueSchema); // should make an error not assert this! } } else { if (reduceout == null) { errors.add(new PhaseError("No output format defined")); } valueSchema = reduceout; } output.setupOutput(conf); } conf.set(AvroJob.OUTPUT_SCHEMA, valueSchema.toString()); if (deflateLevel != null) AvroOutputFormat.setDeflateLevel(conf, deflateLevel); Object proto = null; if (mainReads != null && mainReads.size() > 0) { Path[] inPaths = new Path[mainReads.size()]; int i = 0; for (ColFile file : mainReads) { inPaths[i++] = new Path(file.getPath()); Object myProto = file.getPrototype(); if (myProto == null) { errors.add(new PhaseError("Files need non-null prototypes " + file)); } else if (proto != null) { if (myProto.getClass() != proto.getClass()) { errors.add(new PhaseError("Inconsistent prototype classes for inputs: " + myProto.getClass() + " vs " + proto.getClass() + " for " + file)); } } else { proto = myProto; } } AvroInputFormat.setInputPaths(conf, inPaths); if (mapin == null) { if (proto == null) { errors.add(new PhaseError("Undefined input format")); } else { mapin = getSchema(proto); mapInClass = proto.getClass(); } } mainReads.get(0).setupInput(conf); if (conf.get("mapred.input.format.class") == null) conf.setInputFormat(AvroInputFormat.class); } Schema mapValueSchema = null; try { //TODO: handle cases beyond Object where input isn't defined if (mapOutClass == null || mapOutClass == Object.class) { assert mapperClass == null; if (proto != null) { mapOutClass = proto.getClass(); mapValueSchema = getSchema(proto); } else { // not available - try to get it from the reducer if (reducerClass == null) { mapOutClass = reduceOutClass; mapValueSchema = getSchema(reduceOutClass.newInstance()); } else { // can't get it from reducer input - that's just Iterable String fname = "no input file specified"; if (mainReads != null && mainReads.size() > 0) fname = mainReads.get(0).getPath(); errors.add(new PhaseError( "No input format specified for identity mapper - specify it on input file " + fname)); } } } else { mapValueSchema = getSchema(mapOutClass.newInstance()); } if (mapValueSchema != null) conf.set(MAP_OUT_VALUE_SCHEMA, mapValueSchema.toString()); } catch (Exception e) { errors.add(new PhaseError(e, "Can't create instance of map output class: " + mapOutClass)); } conf.set(MAP_OUT_CLASS, mapOutClass.getName()); conf.set(MAP_IN_CLASS, mapInClass.getName()); // XXX validation! if (proto != null) { conf.set(AvroJob.INPUT_SCHEMA, getSchema(proto).toString()); } else if (mapin != null) { conf.set(AvroJob.INPUT_SCHEMA, mapin.toString()); } else { errors.add(new PhaseError("No map input defined")); } if (groupBy != null || sortBy != null) { conf.set(MAP_OUT_KEY_SCHEMA, group(mapValueSchema, groupBy, sortBy).toString()); } if (groupBy != null) { conf.set(GROUP_BY, groupBy); AvroJob.setOutputMeta(conf, GROUP_BY, groupBy); } if (sortBy != null) { conf.setPartitionerClass(AvroGroupPartitioner.class); conf.set(SORT_BY, sortBy); AvroJob.setOutputMeta(conf, SORT_BY, sortBy); } conf.setMapOutputKeyClass(AvroKey.class); conf.setMapOutputValueClass(AvroValue.class); conf.setOutputKeyComparatorClass(ColKeyComparator.class); conf.setMapperClass(ColHadoopMapper.class); conf.setReducerClass(ColHadoopReducer.class); for (Map.Entry<String, String> entry : textMeta.entrySet()) AvroJob.setOutputMeta(conf, entry.getKey(), entry.getValue()); // add ColAvroSerialization to io.serializations Collection<String> serializations = conf.getStringCollection("io.serializations"); if (!serializations.contains(ColAvroSerialization.class.getName())) { serializations.add(ColAvroSerialization.class.getName()); conf.setStrings("io.serializations", serializations.toArray(new String[0])); } return errors; }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileJob.java
License:Apache License
/** * The driver for the MapReduce job.// w w w. j a v a 2 s.c o m * * @param conf configuration * @param inputDirAsString input directory in CSV-form * @param outputDirAsString output directory * @return true if the job completed successfully * @throws java.io.IOException if something went wrong * @throws java.net.URISyntaxException if a URI wasn't correctly formed */ public boolean runJob(final Configuration conf, final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException, ClassNotFoundException, InterruptedException { JobConf job = new JobConf(conf); job.setJarByClass(CombineSequenceFileJob.class); job.setJobName("seqfilecombiner"); job.setNumReduceTasks(0); job.setMapperClass(IdentityMapper.class); job.setInputFormat(CombineSequenceFileInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, inputDirAsString); FileOutputFormat.setOutputPath(job, new Path(outputDirAsString)); Date startTime = new Date(); System.out.println("Job started: " + startTime); RunningJob jobResult = JobClient.runJob(job); Date endTime = new Date(); System.out.println("Job ended: " + endTime); System.out.println("The job took " + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds."); return jobResult.isSuccessful(); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
@Test public void testOneFile() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile = new Path(dir, "file1.txt"); writeSequenceFile(inputFile);/*w w w . jav a 2 s. com*/ Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.addInputPath(jobConf, inputFile); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); assertEquals(1, splits.length); CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat .getRecordReader(splits[0], jobConf, new DummyReporter()); Text k = new Text(); Text v = new Text(); assertTrue(rr.next(k, v)); assertEquals(key, k); assertEquals(value, v); assertFalse(rr.next(k, v)); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java
License:Apache License
@Test public void testTwoFiles() throws IOException, InterruptedException { Path dir = new Path(tempFolder.getRoot().getAbsolutePath()); CombineSequenceFileInputFormat<Text, Text> inputFormat = new CombineSequenceFileInputFormat<Text, Text>(); Path inputFile1 = new Path(dir, "file1.txt"); Path inputFile2 = new Path(dir, "file2.txt"); writeSequenceFile(inputFile1);/* ww w .j a v a2s . com*/ writeSequenceFile(inputFile2); Configuration conf = new Configuration(); JobConf jobConf = new JobConf(conf); FileInputFormat.addInputPath(jobConf, inputFile1); FileInputFormat.addInputPath(jobConf, inputFile2); InputSplit[] splits = inputFormat.getSplits(jobConf, 1); assertEquals(1, splits.length); CommonCombineRecordReader<Text, Text> rr = (CommonCombineRecordReader<Text, Text>) inputFormat .getRecordReader(splits[0], jobConf, new DummyReporter()); Text k = new Text(); Text v = new Text(); assertTrue(rr.next(k, v)); assertEquals(key, k); assertEquals(value, v); assertEquals(0.5f, rr.getProgress(), 0.1); assertTrue(rr.next(k, v)); assertEquals(key, k); assertEquals(value, v); assertFalse(rr.next(k, v)); assertEquals(1.0f, rr.getProgress(), 0.1); }
From source file:com.alexholmes.hadooputils.sort.Sort.java
License:Apache License
/** * The driver for sort program which works with command-line arguments. * * @param args command-line arguments// w w w .ja v a 2 s. co m * @return 0 if everything went well, non-zero for everything else * @throws Exception When there is communication problems with the * job tracker. */ @SuppressWarnings("unchecked") public int run(final String[] args) throws Exception { SortConfig sortConfig = new SortConfig(getConf()); Integer numMapTasks = null; Integer numReduceTasks = null; List<String> otherArgs = new ArrayList<String>(); InputSampler.Sampler<K, V> sampler = null; Class<? extends CompressionCodec> codecClass = null; Class<? extends CompressionCodec> mapCodecClass = null; boolean createLzopIndex = false; for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { numMapTasks = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { numReduceTasks = Integer.parseInt(args[++i]); } else if ("-f".equals(args[i]) || "--ignore-case".equals(args[i])) { sortConfig.setIgnoreCase(true); } else if ("-u".equals(args[i]) || "--unique".equals(args[i])) { sortConfig.setUnique(true); } else if ("-k".equals(args[i]) || "--key".equals(args[i])) { String[] parts = StringUtils.split(args[++i], ","); sortConfig.setStartKey(Integer.valueOf(parts[0])); if (parts.length > 1) { sortConfig.setEndKey(Integer.valueOf(parts[1])); } } else if ("-t".equals(args[i]) || "--field-separator".equals(args[i])) { sortConfig.setFieldSeparator(args[++i]); } else if ("--total-order".equals(args[i])) { double pcnt = Double.parseDouble(args[++i]); int numSamples = Integer.parseInt(args[++i]); int maxSplits = Integer.parseInt(args[++i]); if (0 >= maxSplits) { maxSplits = Integer.MAX_VALUE; } sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits); } else if ("--map-codec".equals(args[i])) { mapCodecClass = (Class<? extends CompressionCodec>) Class.forName(args[++i]); } else if ("--codec".equals(args[i])) { codecClass = (Class<? extends CompressionCodec>) Class.forName(args[++i]); } else if ("--lzop-index".equals(args[i])) { createLzopIndex = true; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Make sure there are exactly 2 parameters left. if (otherArgs.size() != 2) { System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2."); return printUsage(); } if (runJob(new JobConf(sortConfig.getConfig()), numMapTasks, numReduceTasks, sampler, codecClass, mapCodecClass, createLzopIndex, otherArgs.get(0), otherArgs.get(1))) { return 0; } return 1; }
From source file:com.alexholmes.hadooputils.sort.SortTest.java
License:Apache License
public void run(TextIOJobBuilder builder) throws Exception { run(new JobConf(new SortConfig(builder.getFs().getConf()).getConfig()), builder, 1, 1, null); }
From source file:com.alexholmes.hadooputils.sort.SortTest.java
License:Apache License
public void run(SortConfig sortConfig, TextIOJobBuilder builder) throws Exception { run(new JobConf(sortConfig.getConfig()), builder, 1, 1, null); }