Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package explain; import java.io.OutputStream; import java.io.PrintStream; import java.io.Serializable; import java.lang.annotation.Annotation; import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.Map.Entry; import org.apache.hadoop.hive.ql.exec.ConditionalTask; import org.apache.hadoop.hive.ql.exec.MapRedTask; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.hive.ql.parse.ParseException; import org.apache.hadoop.hive.ql.plan.Explain; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; import org.apache.hadoop.hive.ql.plan.HashTableSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.mapred.JobConf; public class ExplainTask { private static final long serialVersionUID = 1L; private String stageid; private MapredWork mapredwork;//?mr private JobConf jobconf; private static Map<String, String> explainWorkName = new HashMap<String, String>(); private static Map<Class, String> explainOpName = new HashMap<Class, String>(); private static Set<String> hideDesc = new HashSet<String>(); private static Set<String> hideWork = new HashSet<String>(); private static Set<Class> hideOp = new HashSet<Class>(); private static Map<String, String> path2stage = new HashMap<String, String>(); private static Map<String, Set<String>> stage2input = new HashMap<String, Set<String>>(); private ParseDriver parser = new ParseDriver(); private String parseredSQL = null; private QueryInfo queryBlock = null; private MRBlockInfo mrBlock = null; { explainWorkName.put("getAliasToWork", "[Map ] ? "); explainWorkName.put("getReducer", "[Reduce ] ? "); explainWorkName.put("getMapLocalWork", "?"); explainOpName.put(TableScanDesc.class, "=>:"); explainOpName.put(JoinDesc.class, "Join?"); explainOpName.put(MapJoinDesc.class, "MapJoin"); explainOpName.put(ReduceSinkDesc.class, ""); explainOpName.put(GroupByDesc.class, " GroupBy"); explainOpName.put(SelectDesc.class, "");// explainOpName.put(FileSinkDesc.class, "");// explainOpName.put(HashTableSinkDesc.class, "HashMapjoin"); explainOpName.put(FilterDesc.class, "where?"); hideWork.add("getAliasToFetchWork"); hideDesc.add(SelectDesc.class.getName()); hideDesc.add(FileSinkDesc.class.getName()); hideDesc.add(ReduceSinkDesc.class.getName()); hideDesc.add(TableScanDesc.class.getName()); hideDesc.add(GroupByDesc.class.getName()); hideDesc.add(JoinDesc.class.getName()); hideDesc.add(HashTableSinkDesc.class.getName()); hideDesc.add(MapJoinDesc.class.getName()); } public ExplainTask() { super(); } public int explain(ArrayList<Task<? extends Serializable>> rootTasks, OutputStream outS) { PrintStream out = null; try { out = new PrintStream(outS); // Go over all the tasks and dump out the plans outputStagePlans(out, rootTasks, 0); return (0); } catch (Exception e) { return (1); } finally { IOUtils.closeStream(out); } } public int explain(String stageid, Task<? extends Serializable> rootTask, OutputStream outS, JobConf jobconf) { this.stageid = stageid; this.jobconf = jobconf; this.mrBlock = new MRBlockInfo(); String sql = jobconf.get("hive.query.string", "").replace("\n", " "); if (!sql.equals(parseredSQL)) { //if need to refresh try { queryBlock = parser.getQueryBlock(sql); parseredSQL = sql; } catch (ParseException e) { // TODO Auto-generated catch block e.printStackTrace(); } } if (rootTask != null && rootTask instanceof MapRedTask) { mapredwork = ((MapRedTask) rootTask).getWork(); } PrintStream out = null; try { out = new PrintStream(outS); List rootTasks = new ArrayList(); rootTasks.add(rootTask); // Go over this task and dump out the plan outputStagePlans(out, rootTasks, 0); //output the sql this task will execute outputSQL(out); return (0); } catch (Exception e) { e.printStackTrace(); return (1); } finally { IOUtils.closeStream(out); } } private void outputSQL(PrintStream out) { List<Position> inputlist = new ArrayList<Position>(); for (String inputTablealias : mrBlock.inputTable) { Position inp = queryBlock.joinmap.get(inputTablealias); if (inp != null) { inputlist.add(inp); } } stage2input.put(this.stageid, mrBlock.inputTable);//?? List<Position> inputStage = new ArrayList<Position>(); for (String inputStageid : mrBlock.inputStage) { ASTPNode parent = queryBlock.findParent(stage2input.get(inputStageid)); if (parent == null) continue; Position stagepos = new Position(parent.startindex, parent.stopindex); stagepos.outputPrexfix = "?" + inputStageid + " "; stagepos.outputPostfix = stagepos.outputPrexfix; inputStage.add(stagepos); Position parentpos = queryBlock.joinmap.get(parent.scope); if (parentpos != null) { inputlist.add(parentpos); } } //Position.sort(inputlist); System.out.println("****meregr sql****"); Position.mergerOutput(inputlist, inputStage, System.out, queryBlock.sql); System.out.println("****debug sql****"); for (Position s : inputlist) { System.out.println(queryBlock.sql.substring(s.startindex, s.stopindex + 1)); } System.out.println("****stageid****"); for (Position s : inputStage) { System.out.println(queryBlock.sql.substring(s.startindex, s.stopindex + 1)); } System.out.println("********"); } private String indentString(int indent) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < indent; ++i) { sb.append(" "); } return sb.toString(); } // ?aliaspath.? private String aliasToPath(String alias) { Set<Entry<String, ArrayList<String>>> set = mapredwork.getPathToAliases().entrySet(); for (Entry<String, ArrayList<String>> entry : set) { if (entry.getValue().get(0).equals(alias)) { return entry.getKey(); } } return ""; } // ?stage private String pathToStage(String path) { return path2stage.get(path); } private void addPathToStage(FileSinkDesc fsd) { path2stage.put(fsd.getDirName(), stageid); } private void outputMap(Map<?, ?> mp, String header, PrintStream out, boolean extended, int indent) throws Exception { boolean first_el = true; TreeMap<Object, Object> tree = new TreeMap<Object, Object>(); tree.putAll(mp); for (Entry<?, ?> ent : tree.entrySet()) { if (first_el) { out.println(header); } first_el = false; if (ent.getValue() instanceof TableScanOperator) {//TableScanDesc out.print(indentString(indent)); String rawTableName = ent.getKey().toString(); String input = ""; if (rawTableName.contains("://")) {// String stage = pathToStage(rawTableName); input = stage + ""; mrBlock.inputStage.add(stage); } else if (rawTableName.contains("$")) {//$INTNAME String stage = pathToStage(aliasToPath(rawTableName)); input = stage + ""; mrBlock.inputStage.add(stage); } else { // input = " " + rawTableName; mrBlock.inputTable.add(rawTableName); } out.printf("%s ", explainOpName.get(TableScanDesc.class) + input); } else { // Print the key out.print(indentString(indent)); out.printf("%s ", ent.getKey().toString()); } // Print the value if (isPrintable(ent.getValue())) { out.print(ent.getValue()); out.println(); } else if (ent.getValue() instanceof List || ent.getValue() instanceof Map) { out.print(ent.getValue().toString()); out.println(); } else if (ent.getValue() instanceof Serializable) { out.println(); outputPlan((Serializable) ent.getValue(), out, extended, indent + 2); } else { out.println(); } } } private void outputList(List<?> l, String header, PrintStream out, boolean extended, int indent) throws Exception { boolean first_el = true; boolean nl = false; for (Object o : l) { if (first_el) { out.print(header); } if (isPrintable(o)) { if (!first_el) { out.print(", "); } else { out.print(" "); } out.print(o); nl = true; } else if (o instanceof Serializable) { if (first_el) { out.println(); } outputPlan((Serializable) o, out, extended, indent + 2); } first_el = false; } if (nl) { out.println(); } } private boolean isPrintable(Object val) { if (val instanceof Boolean || val instanceof String || val instanceof Integer || val instanceof Byte || val instanceof Float || val instanceof Double) { return true; } if (val != null && val.getClass().isPrimitive()) { return true; } return false; } private void outputPlan(Serializable work, PrintStream out, boolean extended, int indent) throws Exception { // Check if work has an explain annotation Annotation note = work.getClass().getAnnotation(Explain.class); if (note instanceof Explain) { Explain xpl_note = (Explain) note; if (extended || xpl_note.normalExplain()) { out.print(indentString(indent)); String displayName = xpl_note.displayName(); //?,work if (explainOpName.get(work.getClass()) != null) { displayName = explainOpName.get(work.getClass()); } if (work instanceof FileSinkDesc) { //FileSinkDesc addPathToStage((FileSinkDesc) work); } if (work instanceof TableScanDesc) { displayName = ""; } out.println(displayName); } } // If this is an operator then we need to call the plan generation on the // conf and then // the children if (work instanceof Operator) { Operator<? extends Serializable> operator = (Operator<? extends Serializable>) work; if (operator.getConf() != null) { outputPlan(operator.getConf(), out, extended, indent); } if (operator.getChildOperators() != null) { for (Operator<? extends Serializable> op : operator.getChildOperators()) { outputPlan(op, out, extended, indent + 2); } } return; } else if (hideDesc.contains(work.getClass().getName())) { return; } // We look at all methods that generate values for explain Method[] methods = work.getClass().getMethods(); Arrays.sort(methods, new MethodComparator()); for (Method m : methods) { int prop_indents = indent + 2; note = m.getAnnotation(Explain.class); if (note instanceof Explain) { Explain xpl_note = (Explain) note; if (extended || xpl_note.normalExplain()) { Object val = m.invoke(work); if (val == null) { continue; } String displayName = xpl_note.displayName(); if (hideWork.contains(m.getName())) { continue; } if (explainWorkName.get(m.getName()) != null) { displayName = explainWorkName.get(m.getName()); } String header = null; if (!displayName.equals("")) { header = indentString(prop_indents) + displayName + ":"; } else { prop_indents = indent; header = indentString(prop_indents); } if (isPrintable(val)) { out.printf("%s ", header); out.println(val); continue; } // Try this as a map try { // Go through the map and print out the stuff Map<?, ?> mp = (Map<?, ?>) val; outputMap(mp, header, out, extended, prop_indents + 2); continue; } catch (ClassCastException ce) { // Ignore - all this means is that this is not a map } // Try this as a list try { List<?> l = (List<?>) val; outputList(l, header, out, extended, prop_indents + 2); continue; } catch (ClassCastException ce) { // Ignore } // Finally check if it is serializable try { Serializable s = (Serializable) val; out.println(header); outputPlan(s, out, extended, prop_indents + 2); continue; } catch (ClassCastException ce) { // Ignore } } } } } private void outputPlan(Task<? extends Serializable> task, PrintStream out, boolean extended, HashSet<Task<? extends Serializable>> displayedSet, int indent) throws Exception { if (displayedSet.contains(task)) { return; } displayedSet.add(task); // stageid?stage if (stageid == null || (stageid != null && task.getId().equals(stageid))) { out.print(indentString(indent)); out.printf("Stage: %s\n", task.getId()); // Start by getting the work part of the task and call the output plan for // the work outputPlan(task.getWork(), out, extended, indent + 2);// work operator out.println(); } if (task instanceof ConditionalTask && ((ConditionalTask) task).getListTasks() != null) { for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) { outputPlan(con, out, extended, displayedSet, indent); } } if (task.getChildTasks() != null) { for (Task<? extends Serializable> child : task.getChildTasks()) { outputPlan(child, out, extended, displayedSet, indent); } } } public void outputDependencies(PrintStream out, List<Task<? extends Serializable>> rootTasks, int indent) throws Exception { out.print(indentString(indent)); out.println("STAGE DEPENDENCIES:"); for (Task<? extends Serializable> rootTask : rootTasks) { outputDependencies(rootTask, out, indent + 2, true); } } private final Set<Task<? extends Serializable>> dependeciesTaskSet = new HashSet<Task<? extends Serializable>>(); private void outputDependencies(Task<? extends Serializable> task, PrintStream out, int indent, boolean rootTskCandidate) throws Exception { if (dependeciesTaskSet.contains(task)) {//task return; } dependeciesTaskSet.add(task); boolean first = true; out.print(indentString(indent)); out.printf("%s", task.getId()); if ((task.getParentTasks() == null || task.getParentTasks().isEmpty())) { if (rootTskCandidate) { out.print(" is a root stage"); } } else { out.print(" depends on stages: "); first = true; for (Task<? extends Serializable> parent : task.getParentTasks()) { if (!first) { out.print(", "); } first = false; out.print(parent.getId()); } } Task<? extends Serializable> cuurBackupTask = task.getBackupTask(); if (cuurBackupTask != null) { out.print(" has a backup stage: "); if (!first) { out.print(", "); } first = false; out.print(cuurBackupTask.getId()); } if (task instanceof ConditionalTask && ((ConditionalTask) task).getListTasks() != null) { out.print(" , consists of "); first = true; for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) { if (!first) { out.print(", "); } first = false; out.print(con.getId()); } } out.println(); if (task instanceof ConditionalTask && ((ConditionalTask) task).getListTasks() != null) { for (Task<? extends Serializable> con : ((ConditionalTask) task).getListTasks()) { outputDependencies(con, out, indent, false); } } if (task.getChildTasks() != null) { for (Task<? extends Serializable> child : task.getChildTasks()) { outputDependencies(child, out, indent, true); } } } public void outputStagePlans(PrintStream out, List<Task<? extends Serializable>> rootTasks, int indent) throws Exception { out.print(indentString(indent)); out.println("STAGE PLANS:"); HashSet<Task<? extends Serializable>> displayedSet = new HashSet<Task<? extends Serializable>>(); for (Task<? extends Serializable> rootTask : rootTasks) { outputPlan(rootTask, out, false, displayedSet, indent + 2); } } /** * MethodComparator. * */ public static class MethodComparator implements Comparator { public int compare(Object o1, Object o2) { Method m1 = (Method) o1; Method m2 = (Method) o2; return m1.getName().compareTo(m2.getName()); } } }