Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencloudengine.flamingo.mapreduce.core; import com.google.common.base.Preconditions; import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; import org.apache.commons.cli2.OptionException; import org.apache.commons.cli2.builder.ArgumentBuilder; import org.apache.commons.cli2.builder.DefaultOptionBuilder; import org.apache.commons.cli2.builder.GroupBuilder; import org.apache.commons.cli2.commandline.Parser; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.util.Tool; import org.opencloudengine.flamingo.mapreduce.util.CommandLineUtil; import org.opencloudengine.flamingo.mapreduce.util.DefaultOptionCreator; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.StringWriter; import java.text.SimpleDateFormat; import java.util.*; import java.util.concurrent.atomic.AtomicInteger; /** * Flamingo MapReduce? Hadoop Job Driver? ? ?. * ? MapReduce Driver ?? ??? ? ? Map Reduce Task? * ?? Hadoop Configuration? . ? ? * MapReduce Driver ??? ? . * <p/> * ??? ?? MapReduce Driver? ? ? . * <ul> * <li>--tempDir (path): Job ? . ( "<tt>/temp/${user.home}</tt>") * <li>--help: ??</li> * </ul> * <p/> * ? MapReduce Job? ? ?? ? ? ? . * <p/> * <ul> * <li>-Dmapred.job.name=(name): Hadoop MapReduce Job? ?. Driver ? .</li> * <li>-Dmapred.output.compress={true,false}: ( true)</li> * <li>-Dmapred.input.dir=(path): ? ? ()</li> * <li>-Dmapred.output.dir=(path): ? ()</li> * </ul> * <tt>-D</tt> ? ? ? . */ public abstract class AbstractJob extends Configured implements Tool { /** * SLF4J API */ private static final Logger log = LoggerFactory.getLogger(AbstractJob.class); /** * MapReduce . */ private Option inputOption; /** * MapReduce . */ private Option outputOption; /** * {@link #parseArguments(String[])}? ? MapReduce . */ private Path inputPath; /** * {@link #parseArguments(String[])}? ? MapReduce . */ private Path outputPath; /** * {@link #parseArguments(String[])}? ? MapReduce . * ? CLASSPATH? <tt>flamingo-hadoop-site.xml</tt>? <tt>tempDir</tt> ? ? * <tt>/temp/${user.home}</tt>? . */ private Path tempPath; /** * MapReduce Job? ? ?? Key Value Map. */ private Map<String, String> argMap; /** * ? ?. */ private final List<Option> options; /** * ??. */ protected AbstractJob() { options = new LinkedList<Option>(); if (getConf() == null) { setConf(new Configuration()); // Flamingo MapReduce? ?? Hadoop Configuration? . getConf().addResource(getClass().getResource("/flamingo-mapreduce-site.xml")); } } /** * {@link #parseArguments(String[])} ? ?? . * Hadoop MapReduce Driver? {@link #addInputOption()} ? * ? Hadoop Configuration? * {@code mapred.input.dir} ? . * * @return */ protected Path getInputPath() { return inputPath; } /** * {@link #parseArguments(String[])} ? ?? . * Hadoop MapReduce Driver? {@link #addOutputOption()} ? * ? Hadoop Configuration? * {@code mapred.output.dir} ? . * * @return */ protected Path getOutputPath() { return outputPath; } /** * ? ? ?? . * * @param child ?? * @return Path */ protected Path getOutputPath(String child) { return new Path(outputPath, child); } /** * ?? . * * @param path ?? * @return */ protected Path getTempPath(String path) { return new Path(getTempPath(), path); } /** * . * ? {@link Constants#TEMP_DIR}? CLASSPATH? <tt>flamingo-mapreudce-site.xml</tt> ?? ?? * ? ? ? ? ? ??? <tt>--tempDir</tt>? ? . * ? ? ? ? ? ? ?. * ? Hadoop MapReduce Driver? ? ? . * <p/> * <pre> * Path tempDir = getTempPath(); * </pre> * * @return */ protected Path getTempPath() { String defaultTempDir = getConf().get("tempDir"); if (argMap.containsKey(keyFor(Constants.TEMP_DIR))) { defaultTempDir = argMap.get(keyFor(Constants.TEMP_DIR)); } return new Path(defaultTempDir); } /** * ? . ? * <tt>flamingo-hadoop-site.xml</tt> ?? <tt>tempDir.date.pattern</tt> * <tt>yyyyMMdd-HHmmss-SSS</tt>? . * * @return */ protected Path getTimestampTempPath() { SimpleDateFormat formatter = new SimpleDateFormat(getConf().get("tempDir.date.pattern")); return getTempPath(formatter.format(new Date())); } /** * Prefix . * * @param prefix Prefix * @param path ?? * @return */ protected Path getTempPath(String prefix, String path) { Path tempPath = getTempPath(); Path prefixPath = new Path(tempPath, prefix); return new Path(prefixPath, path); } /** * ?? ? . ?? ?? ?. * * @param name ?(; <tt>inputPath</tt>) * @param shortName ?(; <tt>i</tt>) * @param description ?? */ protected void addFlag(String name, String shortName, String description) { options.add(buildOption(name, shortName, description, false, false, null)); } /** * ? ? . ? ? ? . * * @param name ?(; <tt>inputPath</tt>) * @param shortName ?(; <tt>i</tt>) * @param description ?? */ protected void addOption(String name, String shortName, String description) { options.add(buildOption(name, shortName, description, true, false, null)); } /** * Hadoop MapReduce? ? . ??? * ??? {@link #parseArguments(String[])} ?. * * @param name ?(; <tt>inputPath</tt>) * @param shortName ?(; <tt>i</tt>) * @param description ?? * @param required ? ? <tt>true</tt>? ??? ? * ?. ? ? ?. */ protected void addOption(String name, String shortName, String description, boolean required) { options.add(buildOption(name, shortName, description, true, required, null)); } /** * Hadoop MapReduce? ? . ??? * ??? {@link #parseArguments(String[])} ?. * * @param name ?(; <tt>inputPath</tt>) * @param shortName ?(; <tt>i</tt>) * @param description ?? * @param defaultValue ??? ?? ? null? . */ protected void addOption(String name, String shortName, String description, String defaultValue) { options.add(buildOption(name, shortName, description, true, false, defaultValue)); } /** * Hadoop MapReduce? ? . ??? * ??? {@link #parseArguments(String[])} ?. * ? ? ?? {@code parseArguments} ? * map? {@code containsKey} ? ? ?. * ? ? ? ? '--'? map? ? * ? ?? . * * @param option * @return */ protected Option addOption(Option option) { options.add(option); return option; } /** * ? . ??? <tt>'-i'</tt> ? * {@link #parseArguments(String[])} ? ? ? * ?. ? Hadoop Job? * ? ? <tt>required</tt> ?? . */ protected void addInputOption() { this.inputOption = addOption(DefaultOptionCreator.inputOption().create()); } /** * ? . ??? <tt>'-o'</tt> ? * {@link #parseArguments(String[])} ? ? ? * ?. ? Hadoop Job? * ? ? <tt>required</tt> ?? . */ protected void addOutputOption() { this.outputOption = addOption(DefaultOptionCreator.outputOption().create()); } /** * ? ? . ? ? . * required. * * @param name ??? '--'? prefix ? ? * @param shortName ??? '--'? prefix ? ? ? * @param description ??? ? ? * @param hasArg ?? <tt>true</tt> * @param required ?? <tt>true</tt> * @param defaultValue ??? . <tt>null</tt>? . * @return */ protected static Option buildOption(String name, String shortName, String description, boolean hasArg, boolean required, String defaultValue) { DefaultOptionBuilder optBuilder = new DefaultOptionBuilder().withLongName(name).withDescription(description) .withRequired(required); if (shortName != null) { optBuilder.withShortName(shortName); } if (hasArg) { ArgumentBuilder argBuilder = new ArgumentBuilder().withName(name).withMinimum(1).withMaximum(1); if (defaultValue != null) { argBuilder = argBuilder.withDefault(defaultValue); } optBuilder.withArgument(argBuilder.create()); } return optBuilder.create(); } /** * ? ??? . * ? <tt>-h</tt> ? ??? <tt>null</tt>? . * * @param args ?? * @return ?? ??? ? ? {@code Map<String,String>}. * ??? key ? ? ? '--'? prefix . * ? ? {@code Map<String,String>} ? ? ? '--'? ??? . */ public Map<String, String> parseArguments(String[] args) throws Exception { Option helpOpt = addOption(DefaultOptionCreator.helpOption()); addOption("tempDir", null, " ", false); addOption("startPhase", null, " ", "0"); addOption("endPhase", null, " ", String.valueOf(Integer.MAX_VALUE)); GroupBuilder groupBuilder = new GroupBuilder().withName("Hadoop MapReduce Job :"); for (Option opt : options) { groupBuilder = groupBuilder.withOption(opt); } Group group = groupBuilder.create(); CommandLine cmdLine; try { Parser parser = new Parser(); parser.setGroup(group); parser.setHelpOption(helpOpt); cmdLine = parser.parse(args); } catch (OptionException e) { log.error(e.getMessage()); CommandLineUtil.printHelpWithGenericOptions(group, e); return null; } if (cmdLine.hasOption(helpOpt)) { CommandLineUtil.printHelpWithGenericOptions(group); return null; } try { parseDirectories(cmdLine); } catch (IllegalArgumentException e) { log.error(e.getMessage()); CommandLineUtil.printHelpWithGenericOptions(group); return null; } argMap = new TreeMap<String, String>(); maybePut(argMap, cmdLine, this.options.toArray(new Option[this.options.size()])); log.info("Command line arguments: ", argMap); Set<String> keySet = argMap.keySet(); for (Iterator<String> iterator = keySet.iterator(); iterator.hasNext();) { String key = iterator.next(); log.info(" {} = {}", key, argMap.get(key)); } return argMap; } /** * ? . ? <tt>name</tt> ?? <tt>--name</tt>? ?. * * @param optionName */ public static String keyFor(String optionName) { return "--" + optionName; } /** * ? Option ? . * * @return ? <tt>Option</tt>, <tt>null</tt>? . */ public String getOption(String optionName) { return argMap.get(keyFor(optionName)); } /** * ? ?. * * @return ? <tt>true</tt> */ public boolean hasOption(String optionName) { return argMap.containsKey(keyFor(optionName)); } /** * ?? ? Hadoop Configuration ?? ? ? ??. * {@code addInputOption} ? {@code addOutputOption} * ?? ? Hadoop Configuration? ? ?? * {@code OptionException}? ?. ? Hadoop Configuration ? * {@code inputPath} ? {@code outputPath}? * <tt>non-null</tt> . ?? ? Hadoop Configuration ? * . * * @param cmdLine ?? * @throws IllegalArgumentException inputOption ? {@code --input} {@code -Dmapred.input dir} ?? , outputOption ? {@code --output} {@code -Dmapred.output dir} ?? , */ protected void parseDirectories(CommandLine cmdLine) { Configuration conf = getConf(); if (inputOption != null && cmdLine.hasOption(inputOption)) { this.inputPath = new Path(cmdLine.getValue(inputOption).toString()); } if (inputPath == null && conf.get("mapred.input.dir") != null) { this.inputPath = new Path(conf.get("mapred.input.dir")); } if (outputOption != null && cmdLine.hasOption(outputOption)) { this.outputPath = new Path(cmdLine.getValue(outputOption).toString()); } if (outputPath == null && conf.get("mapred.output.dir") != null) { this.outputPath = new Path(conf.get("mapred.output.dir")); } // Temporary Path . ? CLASSPATH? <tt>flamingo-hadoop-site.xml</tt> ?? ? . if (tempPath == null && conf.get("tempDir") != null) { this.tempPath = new Path(conf.get("tempDir")); } Preconditions.checkArgument(inputOption == null || inputPath != null, " ? -Dmapred.input.dir ? ? . -Dmapred.input.dir ? ? ? ? ? ."); Preconditions.checkArgument(outputOption == null || outputPath != null, " ? -Dmapred.output.dir ? ? . -Dmapred.input.dir ? ? ? ? ? ."); } protected static void maybePut(Map<String, String> args, CommandLine cmdLine, Option... opt) { for (Option o : opt) { // ??? ? ? ? if (cmdLine.hasOption(o) || cmdLine.getValue(o) != null) { // ??? ? ? OK // nulls are ok, for cases where options are simple flags. Object vo = cmdLine.getValue(o); String value = vo == null ? null : vo.toString(); args.put(o.getPreferredName(), value); } } } /** * ? Job? ? Phase? . * * @param args Key Value ? * @param currentPhase Phase * @return ? . ? <tt>true</tt> . */ protected static boolean shouldRunNextPhase(Map<String, String> args, AtomicInteger currentPhase) { int phase = currentPhase.getAndIncrement(); String startPhase = args.get("--startPhase"); String endPhase = args.get("--endPhase"); boolean phaseSkipped = (startPhase != null && phase < Integer.parseInt(startPhase)) || (endPhase != null && phase > Integer.parseInt(endPhase)); if (phaseSkipped) { log.info("Skipping phase {}", phase); } return !phaseSkipped; } /** * Mappre, Reducer Hadoop Job? ?. * * @param inputPath * @param outputPath * @param inputFormat ? * @param mapper Mapper * @param mapperKey Mapper? Output Key Class * @param mapperValue Mapper? Output Value Class * @param reducer Reducer * @param reducerKey Reducer? Output Key Class * @param reducerValue Reducer? Output Value Class * @param outputFormat ? * @return Hadoop Job * @throws IOException Hadoop Job? ? */ protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job, mapper, reducer)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; } /** * Mapper, Reducer Hadoop Job? ?. * * @param inputPath * @param outputPath * @param inputFormat ? * @param mapper Mapper * @param mapperKey Mapper? Output Key Class * @param mapperValue Mapper? Output Value Class * @param reducer Reducer * @param reducerKey Reducer? Output Key Class * @param reducerValue Reducer? Output Value Class * @param outputFormat ? * @param reduceTask Reducer? * @return Hadoop Job * @throws IOException Hadoop Job? ? */ protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, int reduceTask) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setJobName(getCustomJobName(job, mapper, reducer)); job.setNumReduceTasks(reduceTask); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; } /** * Mapper Hadoop Job? ?. * ? Mapper ? MapReduce Job? ? . * * @param inputPath * @param outputPath * @param inputFormat ? * @param mapper Mapper * @param mapperKey Mapper? Output Key Class * @param mapperValue Mapper? Output Value Class * @param outputFormat ? * @return Hadoop Job * @throws IOException Hadoop Job? ? */ protected Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat) throws IOException { Job job = new Job(new Configuration(getConf())); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper"); } job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setJobName(getCustomJobName(job, mapper)); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; } /** * Mapper Reducer Hadoop Job ?? . * * @param job Hadoop Job * @param mapper {@link org.apache.hadoop.mapreduce.Mapper} * @param reducer Reducer * @return Job Name */ private String getCustomJobName(JobContext job, Class<? extends Mapper> mapper, Class<? extends Reducer> reducer) { StringBuilder name = new StringBuilder(100); String customJobName = job.getJobName(); if (customJobName == null || customJobName.trim().length() == 0) { name.append(getClass().getSimpleName()); } else { name.append(customJobName); } return name.toString(); } /** * Mapper Hadoop Job ?? . * * @param job Hadoop Job * @param mapper {@link org.apache.hadoop.mapreduce.Mapper} * @return Job Name */ private String getCustomJobName(JobContext job, Class<? extends Mapper> mapper) { StringBuilder name = new StringBuilder(100); String customJobName = job.getJobName(); if (customJobName == null || customJobName.trim().length() == 0) { name.append(getClass().getSimpleName()); } else { name.append(customJobName); } return name.toString(); } /** * Key Value ?? ? . ? * Workflow Engine? Key Value ? Map? ?? ?? * . * * @param key MapReduce Job? ? Key * @param value MapReduce Job? ? Key? Value * @return ?? */ public static String getKeyValuePair(String key, String value) { return keyFor(key) + " " + value; } /** * HDFS Configuration? ?. * * @throws Exception Hadoop Configuration? ? ?? */ public void dump() throws Exception { StringWriter writer = new StringWriter(); this.getConf().dumpConfiguration(this.getConf(), writer); System.out.println(writer.toString()); } }