Java tutorial
/* * Copyright 2009-2013 Scale Unlimited * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */package bixo.config; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.File; import java.util.Date; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import cascading.flow.FlowConnector; import cascading.flow.FlowProcess; import cascading.flow.FlowProcess.NullFlowProcess; import cascading.flow.hadoop.HadoopFlowProcess; import cascading.flow.local.LocalFlowProcess; import cascading.scheme.Scheme; import cascading.tap.SinkMode; import cascading.tap.Tap; import cascading.tuple.Fields; import cascading.tuple.Tuple; import com.scaleunlimited.cascading.BasePath; import com.scaleunlimited.cascading.BasePlatform; import com.scaleunlimited.cascading.LoggingFlowProcess; import com.scaleunlimited.cascading.hadoop.HadoopPlatform; import com.scaleunlimited.cascading.local.LocalPlatform; @SuppressWarnings({ "rawtypes", "serial" }) public class BixoPlatform extends BasePlatform { // TODO Replace these with LocalPlatform.PLATFORM_TYPE and // HadoopPlatform.PLATFORM_TYPE? public enum Platform { Local, Hadoop } private static final long CASCADING_LOCAL_JOB_POLLING_INTERVAL = 100; private static final long LOCAL_HADOOP_JOB_POLLING_INTERVAL = 100; private BasePlatform _platform; private JobConf _hadoopJobConf = null; public BixoPlatform(Class applicationJarClass, Platform platform) throws Exception { super(applicationJarClass); if (platform == Platform.Local) { _platform = new LocalPlatform(applicationJarClass); setJobPollingInterval(CASCADING_LOCAL_JOB_POLLING_INTERVAL); } else { configureHadoopPlatform(applicationJarClass, new JobConf()); } } public BixoPlatform(Class applicationJarClass, Configuration conf) throws Exception { this(applicationJarClass, new JobConf(conf)); } public BixoPlatform(Class applicationJarClass, JobConf conf) throws Exception { super(applicationJarClass); configureHadoopPlatform(applicationJarClass, conf); } private void configureHadoopPlatform(Class applicationJarClass, JobConf jobConf) throws Exception { _hadoopJobConf = jobConf; HadoopPlatform hp = new HadoopPlatform(applicationJarClass, _hadoopJobConf); _platform = hp; // Special configuration for Hadoop. if (isLocal()) { setNumReduceTasks(1); setJobPollingInterval(LOCAL_HADOOP_JOB_POLLING_INTERVAL); } else { setNumReduceTasks(BasePlatform.CLUSTER_REDUCER_COUNT); } hp.setMapSpeculativeExecution(false); hp.setReduceSpeculativeExecution(false); } @Override public String getPlatformType() { return _platform.getPlatformType(); } public static Tuple clone(Tuple t, FlowProcess flowProcess) { if (flowProcess instanceof LoggingFlowProcess) { // Need to unwrap to get at actual flow process. flowProcess = ((LoggingFlowProcess) flowProcess).getDelegate(); } if (flowProcess instanceof HadoopFlowProcess) { // Hadoop will do a deep copy implicitly (via serialization), so we don't // have to do any copying ourselves, and thus the tuple isn't modified. return t; } else if (flowProcess instanceof NullFlowProcess) { // I'm guessing we don't need to copy Tuples when running a CascadingTestCase. return t; } else if (flowProcess instanceof LocalFlowProcess) { // TODO We don't really need to copy the Tuple if every value // inside is immutable. // Cascading local mode just does a shallow copy of the tuple, so we need // to clone the tuple, and then deep-clone any tuple fields that aren't scalars. // Currently we only handle Tuple & Date as an embedded object, or anything // that's Writable. Tuple result = t; for (int i = 0; i < result.size(); i++) { Object value = result.getObject(i); if ((value == null) || (value instanceof Boolean) || (value instanceof String) || (value instanceof Double) || (value instanceof Float) || (value instanceof Integer) || (value instanceof Long) || (value instanceof Short)) { // All set, Cascading will do shallow copy } else { // we need to clone the Tuple if (result == t) { result = new Tuple(t); } result.set(i, cloneValue(value, i)); } } return result; } else { throw new IllegalArgumentException( "Unknown class of flow process: " + flowProcess.getClass().getName()); } } private static Object cloneValue(Object value, int index) { if (value instanceof Tuple) { return new Tuple((Tuple) value); } else if (value instanceof Date) { return new Date(((Date) value).getTime()); } else if (value instanceof Writable) { // FUTURE - add threadlocal variables that we reuse here, versus re-allocating constantly. try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); ((Writable) value).write(new DataOutputStream(baos)); Object result = value.getClass().newInstance(); ((Writable) result).readFields(new DataInputStream(new ByteArrayInputStream(baos.toByteArray()))); return result; } catch (Exception e) { throw new RuntimeException("Error doing deep copy of Writable", e); } } else { // FUTURE - add support for Serializable types throw new RuntimeException( String.format("Field value of type %s at position %d can't be cloned, unknown type", value.getClass().getName(), index)); } } @Override public File getDefaultLogDir() { return _platform.getDefaultLogDir(); } @Override public BasePath getTempDir() throws Exception { return _platform.getTempDir(); } @Override public boolean isLocal() { return _platform.isLocal(); } @Override public boolean isTextSchemeCompressable() { return _platform.isTextSchemeCompressable(); } @Override public Scheme makeBinaryScheme(Fields fields) throws Exception { return _platform.makeBinaryScheme(fields); } @Override public FlowConnector makeFlowConnector() throws Exception { return _platform.makeFlowConnector(); } @Override public FlowProcess makeFlowProcess() throws Exception { return _platform.makeFlowProcess(); } @Override public BasePath makePath(String path) throws Exception { return _platform.makePath(path); } @Override public BasePath makePath(BasePath parent, String subdir) throws Exception { return _platform.makePath(parent, subdir); } @Override public Tap makeTap(Scheme scheme, BasePath path, SinkMode mode) throws Exception { return _platform.makeTap(scheme, path, mode); } @Override public Tap makeTemplateTap(Tap tap, String pattern, Fields fields) throws Exception { return _platform.makeTemplateTap(tap, pattern, fields); } @Override public Scheme makeTextScheme() throws Exception { return _platform.makeTextScheme(); } @Override public Scheme makeTextScheme(boolean isEnableCompression) throws Exception { return _platform.makeTextScheme(isEnableCompression); } @Override public boolean rename(BasePath srcPath, BasePath destPath) throws Exception { return _platform.rename(srcPath, destPath); } @Override public void resetNumReduceTasks() throws Exception { _platform.resetNumReduceTasks(); } @Override public void setFlowPriority(FlowPriority flowPriority) throws Exception { _platform.setFlowPriority(flowPriority); } @Override public void setNumReduceTasks(int numReduceTasks) throws Exception { _platform.setNumReduceTasks(numReduceTasks); } @Override public int getNumReduceTasks() throws Exception { return _platform.getNumReduceTasks(); } @Override public String shareLocalDir(String localDirName) { return _platform.shareLocalDir(localDirName); } @Override public String copySharedDirToLocal(FlowProcess flowProcess, String sharedDirName) { return _platform.copySharedDirToLocal(flowProcess, sharedDirName); } public String getProperty(String name) { return _platform.getProperty(name); } public void setProperty(String name, int value) { _platform.setProperty(name, value); } public void setProperty(String name, String value) { _platform.setProperty(name, value); } }