Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.eagle.jpm.analyzer.mr.suggestion; import org.apache.eagle.jpm.analyzer.Processor; import org.apache.eagle.jpm.analyzer.meta.model.MapReduceAnalyzerEntity; import org.apache.eagle.jpm.analyzer.publisher.Result; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import java.util.ArrayList; import java.util.List; import static org.apache.hadoop.mapreduce.MRJobConfig.MAP_OUTPUT_COMPRESS; import static org.apache.hadoop.mapreduce.MRJobConfig.MAP_OUTPUT_COMPRESS_CODEC; import static org.apache.hadoop.mapreduce.MRJobConfig.NUM_REDUCES; import static org.apache.hadoop.mapreduce.MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR; public class MapReduceCompressionSettingProcessor implements Processor<MapReduceAnalyzerEntity> { private MapReduceJobSuggestionContext context; public MapReduceCompressionSettingProcessor(MapReduceJobSuggestionContext context) { this.context = context; } @Override public Result.ProcessorResult process(MapReduceAnalyzerEntity jobAnalysisEntity) { StringBuilder sb = new StringBuilder(); List<String> optSettings = new ArrayList<>(); JobConf jobconf = new JobConf(context.getJobconf()); if (jobconf.getLong(NUM_REDUCES, 0) > 0) { if (!jobconf.getCompressMapOutput()) { optSettings.add(String.format("%s=true", MAP_OUTPUT_COMPRESS)); sb.append("Please set " + MAP_OUTPUT_COMPRESS + " to true to reduce network IO.\n"); } else { String codecClassName = jobconf.get(MAP_OUTPUT_COMPRESS_CODEC); if (!(codecClassName.endsWith("LzoCodec") || codecClassName.endsWith("SnappyCodec"))) { optSettings.add(String.format("%s=LzoCodec or SnappyCodec", MAP_OUTPUT_COMPRESS_CODEC)); sb.append("Best practice: use LzoCodec or SnappyCodec for " + MAP_OUTPUT_COMPRESS_CODEC) .append("\n"); } } } if (!jobconf.getBoolean(FileOutputFormat.COMPRESS, false)) { optSettings.add(String.format("%s=true", FileOutputFormat.COMPRESS)); sb.append( "Please set " + FileOutputFormat.COMPRESS + " to true to reduce disk usage and network IO.\n"); } else { String codecName = jobconf.get(FileOutputFormat.COMPRESS_CODEC, ""); String outputFileFormat = jobconf.get(OUTPUT_FORMAT_CLASS_ATTR, ""); if ((codecName.endsWith("GzipCodec") || codecName.endsWith("SnappyCodec") || codecName.endsWith("DefaultCodec")) && outputFileFormat.endsWith("TextOutputFormat")) { sb.append("Best practice: don't use Gzip/Snappy/DefaultCodec with TextOutputFormat"); sb.append(" as this will cause the output files to be unsplittable. "); sb.append("Please use LZO instead or "); sb.append("use a container file format such as SequenceFileOutputFormat.\n"); } } if (sb.length() > 0) { return new Result.ProcessorResult(Result.RuleType.COMPRESS, Result.ResultLevel.INFO, sb.toString(), optSettings); } return null; } }