Java tutorial
/** * Copyright 2010-2012 TransPac Software, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.scaleunlimited.cascading; import java.util.Collection; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.hadoop.mapred.JobConf; import cascading.flow.hadoop.HadoopFlowStep; import cascading.flow.planner.BaseFlowStep; import cascading.flow.planner.NamingFlowStep; import cascading.operation.Operation; import cascading.pipe.Group; import cascading.stats.FlowStepStats; public class StepUtils { private static final Pattern DEFAULT_OPERATION_NAME_PATTERN = Pattern.compile("(.+)\\[.+\\]"); public static long safeGetCounter(FlowStepStats stepStats, Enum<?> counter) { try { return stepStats.getCounterValue(counter); } catch (NullPointerException e) { // Catch case of job having ended, so stepStats.getRunningJob() returns // null, but Cascading doesn't check for this and tries to get the counter, // resulting in a NPE. return 0; } } @SuppressWarnings({ "unchecked", "rawtypes" }) public static void nameFlowStep(BaseFlowStep step) { List<Group> groups = step.getGroups(); String stepName = ""; if (groups.size() == 0) { Collection<Operation> operations = step.getAllOperations(); for (Operation operation : operations) { String operationName = operation.toString(); Matcher defaultNameMatcher = DEFAULT_OPERATION_NAME_PATTERN.matcher(operationName); if (defaultNameMatcher.matches()) { operationName = defaultNameMatcher.group(1); } stepName = stepName + operationName + "+"; } if (operations.size() > 0) { stepName = stepName.substring(0, stepName.length() - 1); } } else { // Get the name of the last group. We should only have one group unless // we're running in Cascading local mode (or maybe HashJoin on map side???) // FUTURE - try to pick the "best" group name? // or combine first/last group names? stepName = groups.get(groups.size() - 1).getName(); } // We want the full step name to be <our better step name> (step #/total steps) <optional tap identifier> // The extra stuff should already exist for the step, so just append it. String curStepName = step.getName(); if (curStepName != null) { stepName = String.format("%s %s", stepName, curStepName); } // setName exists, but it's protected. So we use our special class that's in the // same package, to work around this. NamingFlowStep.setName(step, stepName); // But wait, the JobConf (for Hadoop jobs) already has the job name set in its // config, so we need to update that as well. Here we want to set the job name // to be <flow name>/<step name> if (step instanceof HadoopFlowStep) { HadoopFlowStep hfs = (HadoopFlowStep) step; JobConf conf = hfs.getConfig(); String jobName = String.format("%s/%s", step.getFlowName(), stepName); conf.setJobName(jobName); } } }