Java tutorial
/* Copyright (c) 2015 by ScaleOut Software, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package com.scaleoutsoftware.soss.hserver.hadoop; import com.scaleoutsoftware.soss.hserver.InvocationParameters; import com.scaleoutsoftware.soss.hserver.MapOutputAccumulator; import com.scaleoutsoftware.soss.hserver.interop.KeyValueProducer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.*; import org.apache.hadoop.mapreduce.*; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.JobID; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.OutputCommitter; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.Partitioner; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.task.MapContextImpl; import org.apache.hadoop.mapreduce.task.ReduceContextImpl; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.hadoop.security.Credentials; import org.apache.hadoop.yarn.util.ResourceCalculatorProcessTree; import java.io.IOException; import java.lang.reflect.Constructor; import java.net.URI; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.TimeoutException; public class HadoopVersionSpecificCode_CDH5_2_YARN extends HadoopVersionSpecificCode { @Override public String getHadoopLibraryString() { return "cdh5.2.1-yarn"; } @Override public TaskAttemptID createTaskAttemptId(JobID jobID, boolean isMapper, int hadoopPartition) { return new TaskAttemptID(new TaskID(jobID, isMapper, hadoopPartition), 0); } @Override public TaskAttemptContext createTaskAttemptContext(Configuration configuration, TaskAttemptID id) { return new TaskAttemptContextImpl(configuration, id); } @Override public org.apache.hadoop.mapred.JobContext createJobContext(JobConf configuration, JobID jobID) { //Initialize the distributed cache return new org.apache.hadoop.mapred.JobContextImpl(configuration, jobID); } @Override public <KEYIN, VALUEIN, KEYOUT, VALUEOUT> Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context getReducerContext( Configuration configuration, TaskAttemptID id, OutputCommitter outputCommitter, RecordWriter<KEYOUT, VALUEOUT> output, KeyValueProducer<KEYIN, Iterable<VALUEIN>> transport, MapOutputAccumulator<KEYOUT, VALUEOUT> consumer) throws IOException, InterruptedException { return (new WrappingReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>()).getReducerContext(configuration, id, outputCommitter, output, transport, consumer); } static class WrappingReducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> extends org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT> { public WrappingReducer() { } @SuppressWarnings("unchecked") public org.apache.hadoop.mapreduce.Reducer<KEYIN, VALUEIN, KEYOUT, VALUEOUT>.Context getReducerContext( Configuration configuration, TaskAttemptID id, OutputCommitter outputCommitter, RecordWriter<KEYOUT, VALUEOUT> output, KeyValueProducer<KEYIN, Iterable<VALUEIN>> transport, MapOutputAccumulator<KEYOUT, VALUEOUT> consumer ) throws IOException, InterruptedException { return new WrappingContext(configuration, id, outputCommitter, output, transport, consumer); } public class WrappingContext extends org.apache.hadoop.mapreduce.Reducer.Context { ReduceContextImpl impl; KeyValueProducer<KEYIN, Iterable<VALUEIN>> transport; MapOutputAccumulator<KEYOUT, VALUEOUT> mapOutputAccumulatorCallback; @SuppressWarnings("unchecked") WrappingContext(Configuration configuration, TaskAttemptID id, OutputCommitter outputCommitter, RecordWriter<KEYOUT, VALUEOUT> output, KeyValueProducer<KEYIN, Iterable<VALUEIN>> transport, MapOutputAccumulator<KEYOUT, VALUEOUT> consumer) throws IOException, InterruptedException { //Override the actual key and value class with Writables, to ensure that constructor //will not throw exception if SerializationFactory does not support that class. //Any actual serialization/deserialization is performed by DataTransport, so this //factory is never used. super(); impl = new ReduceContextImpl(configuration, id, new DummyRawIterator(), null, null, output, outputCommitter, new TaskAttemptContextImpl.DummyReporter(), null, Writable.class, Writable.class); this.transport = transport; this.mapOutputAccumulatorCallback = consumer; } @Override public boolean nextKey() throws IOException, InterruptedException { try { if (transport != null) { return transport.readNext(); } else { return false; } } catch (TimeoutException e) { throw new IOException("Read operation timed out.", e); } } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return impl.nextKeyValue(); } @Override public KEYIN getCurrentKey() { return transport != null ? transport.getKey() : null; } @Override public Object getCurrentValue() { return impl.getCurrentValue(); } @Override public void write(Object key, Object value) throws IOException, InterruptedException { if (mapOutputAccumulatorCallback != null) { mapOutputAccumulatorCallback.saveCombineResult((KEYOUT) key, (VALUEOUT) value); } else { impl.write(key, value); } } @Override public OutputCommitter getOutputCommitter() { return impl.getOutputCommitter(); } @Override public boolean userClassesTakesPrecedence() { return true; } @Override public TaskAttemptID getTaskAttemptID() { return impl.getTaskAttemptID(); } @Override public String getStatus() { return impl.getStatus(); } @Override public Counter getCounter(Enum<?> counterName) { return impl.getCounter(counterName); } @Override public Counter getCounter(String groupName, String counterName) { return impl.getCounter(groupName, counterName); } @Override public void progress() { //impl.progress(); } @Override public void setStatus(String status) { impl.setStatus(status); } // @Override // public float getProgress() { // return impl.getProgress(); // } @Override public Configuration getConfiguration() { return impl.getConfiguration(); } @Override public JobID getJobID() { return impl.getJobID(); } @Override public int getNumReduceTasks() { return impl.getNumReduceTasks(); } @Override public Path getWorkingDirectory() throws IOException { return impl.getWorkingDirectory(); } @Override public Class<?> getOutputKeyClass() { return impl.getOutputKeyClass(); } @Override public Class<?> getOutputValueClass() { return impl.getOutputValueClass(); } @Override public Class<?> getMapOutputKeyClass() { return impl.getMapOutputKeyClass(); } @Override public Class<?> getMapOutputValueClass() { return impl.getMapOutputValueClass(); } @Override public String getJobName() { return impl.getJobName(); } @Override public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException { return impl.getInputFormatClass(); } @Override public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException { return impl.getMapperClass(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException { return impl.getCombinerClass(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException { return impl.getReducerClass(); } @Override public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException { return impl.getOutputFormatClass(); } @Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { return impl.getPartitionerClass(); } @Override public RawComparator<?> getSortComparator() { return impl.getSortComparator(); } @Override public String getJar() { return impl.getJar(); } @Override public RawComparator<?> getGroupingComparator() { return impl.getGroupingComparator(); } @Override public boolean getJobSetupCleanupNeeded() { return impl.getJobSetupCleanupNeeded(); } // @Override // public boolean getTaskCleanupNeeded() { // return impl.getTaskCleanupNeeded(); // } @Override public boolean getSymlink() { return impl.getSymlink(); } @Override public Path[] getArchiveClassPaths() { return impl.getArchiveClassPaths(); } @Override public URI[] getCacheArchives() throws IOException { return impl.getCacheArchives(); } @Override public URI[] getCacheFiles() throws IOException { return impl.getCacheFiles(); } @Override public Path[] getLocalCacheArchives() throws IOException { return impl.getLocalCacheArchives(); } @Override public Path[] getLocalCacheFiles() throws IOException { return impl.getLocalCacheFiles(); } @Override public Path[] getFileClassPaths() { return impl.getFileClassPaths(); } @Override public String[] getArchiveTimestamps() { return impl.getArchiveTimestamps(); } @Override public String[] getFileTimestamps() { return impl.getFileTimestamps(); } @Override public int getMaxMapAttempts() { return impl.getMaxMapAttempts(); } @Override public int getMaxReduceAttempts() { return impl.getMaxReduceAttempts(); } @Override public boolean getProfileEnabled() { return impl.getProfileEnabled(); } @Override public String getProfileParams() { return impl.getProfileParams(); } // @Override // public Configuration.IntegerRanges getProfileTaskRange(boolean isMap) { // return impl.getProfileTaskRange(isMap); // } @Override public String getUser() { return impl.getUser(); } @Override public Credentials getCredentials() { return impl.getCredentials(); } @Override @SuppressWarnings("unchecked") public Iterable getValues() throws IOException, InterruptedException { return transport != null ? transport.getValue() : null; } @Override public RawComparator<?> getCombinerKeyGroupingComparator() { return impl.getCombinerKeyGroupingComparator(); } @Override public float getProgress() { return impl.getProgress(); } @Override public boolean getTaskCleanupNeeded() { return impl.getTaskCleanupNeeded(); } @Override public Configuration.IntegerRanges getProfileTaskRange(boolean b) { return impl.getProfileTaskRange(b); } public void setJobID(JobID jobId) { impl.setJobID(jobId); } } } @Override public <INKEY, INVALUE, OUTKEY, OUTVALUE> Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context getMapperContext( Configuration configuration, TaskAttemptID taskid, RecordReader reader, RecordWriter writer) throws IOException, InterruptedException { return new MapperContextHolder(configuration, taskid, reader, writer).getContext(); } /** * This class overrides mapper, to provide dummy context for user-defined mapper invocation. */ static class MapperContextHolder extends org.apache.hadoop.mapreduce.Mapper { private HServerContext context; @SuppressWarnings("unchecked") public class HServerContext extends Context { MapContextImpl impl; public HServerContext(Configuration configuration, TaskAttemptID taskid, RecordReader reader, RecordWriter writer) throws IOException, InterruptedException { impl = new MapContextImpl(configuration, taskid, reader, writer, null, new TaskAttemptContextImpl.DummyReporter(), null); } public void setJobID(JobID jobId) { impl.setJobID(jobId); } @Override public InputSplit getInputSplit() { return impl.getInputSplit(); } @Override public Object getCurrentKey() throws IOException, InterruptedException { return impl.getCurrentKey(); } @Override public boolean userClassesTakesPrecedence() { return true; } @Override public Object getCurrentValue() throws IOException, InterruptedException { return impl.getCurrentValue(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return impl.nextKeyValue(); } @Override public void write(Object key, Object value) throws IOException, InterruptedException { impl.write(key, value); } @Override public OutputCommitter getOutputCommitter() { return impl.getOutputCommitter(); } @Override public TaskAttemptID getTaskAttemptID() { return impl.getTaskAttemptID(); } @Override public String getStatus() { return impl.getStatus(); } @Override public Counter getCounter(Enum<?> counterName) { return impl.getCounter(counterName); } @Override public Counter getCounter(String groupName, String counterName) { return impl.getCounter(groupName, counterName); } @Override public void progress() { impl.progress(); } @Override public void setStatus(String status) { impl.setStatus(status); } // @Override // public float getProgress() { // return impl.getProgress(); // } @Override public Configuration getConfiguration() { return impl.getConfiguration(); } @Override public JobID getJobID() { return impl.getJobID(); } @Override public int getNumReduceTasks() { return impl.getNumReduceTasks(); } @Override public Path getWorkingDirectory() throws IOException { return impl.getWorkingDirectory(); } @Override public Class<?> getOutputKeyClass() { return impl.getOutputKeyClass(); } @Override public Class<?> getOutputValueClass() { return impl.getOutputValueClass(); } @Override public Class<?> getMapOutputKeyClass() { return impl.getMapOutputKeyClass(); } @Override public Class<?> getMapOutputValueClass() { return impl.getMapOutputValueClass(); } @Override public String getJobName() { return impl.getJobName(); } @Override public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException { return impl.getInputFormatClass(); } @Override public Class<? extends Mapper<?, ?, ?, ?>> getMapperClass() throws ClassNotFoundException { return impl.getMapperClass(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getCombinerClass() throws ClassNotFoundException { return impl.getCombinerClass(); } @Override public Class<? extends Reducer<?, ?, ?, ?>> getReducerClass() throws ClassNotFoundException { return impl.getReducerClass(); } @Override public Class<? extends OutputFormat<?, ?>> getOutputFormatClass() throws ClassNotFoundException { return impl.getOutputFormatClass(); } @Override public Class<? extends Partitioner<?, ?>> getPartitionerClass() throws ClassNotFoundException { return impl.getPartitionerClass(); } @Override public RawComparator<?> getSortComparator() { return impl.getSortComparator(); } @Override public String getJar() { return impl.getJar(); } @Override public RawComparator<?> getGroupingComparator() { return impl.getGroupingComparator(); } @Override public boolean getJobSetupCleanupNeeded() { return impl.getJobSetupCleanupNeeded(); } // @Override // public boolean getTaskCleanupNeeded() { // return impl.getTaskCleanupNeeded(); // } @Override public boolean getSymlink() { return impl.getSymlink(); } @Override public Path[] getArchiveClassPaths() { return impl.getArchiveClassPaths(); } @Override public URI[] getCacheArchives() throws IOException { return impl.getCacheArchives(); } @Override public URI[] getCacheFiles() throws IOException { return impl.getCacheFiles(); } @Override public Path[] getLocalCacheArchives() throws IOException { return impl.getLocalCacheArchives(); } @Override public Path[] getLocalCacheFiles() throws IOException { return impl.getLocalCacheFiles(); } @Override public Path[] getFileClassPaths() { return impl.getFileClassPaths(); } @Override public String[] getArchiveTimestamps() { return impl.getArchiveTimestamps(); } @Override public String[] getFileTimestamps() { return impl.getFileTimestamps(); } @Override public int getMaxMapAttempts() { return impl.getMaxMapAttempts(); } @Override public int getMaxReduceAttempts() { return impl.getMaxReduceAttempts(); } @Override public boolean getProfileEnabled() { return impl.getProfileEnabled(); } @Override public String getProfileParams() { return impl.getProfileParams(); } // @Override // public Configuration.IntegerRanges getProfileTaskRange(boolean isMap) { // return impl.getProfileTaskRange(isMap); // } @Override public String getUser() { return impl.getUser(); } @Override public Credentials getCredentials() { return impl.getCredentials(); } @Override public RawComparator<?> getCombinerKeyGroupingComparator() { return impl.getCombinerKeyGroupingComparator(); } @Override public float getProgress() { return impl.getProgress(); } @Override public boolean getTaskCleanupNeeded() { return impl.getTaskCleanupNeeded(); } @Override public Configuration.IntegerRanges getProfileTaskRange(boolean b) { return impl.getProfileTaskRange(b); } } public MapperContextHolder(Configuration configuration, TaskAttemptID taskid, RecordReader reader, RecordWriter writer) throws IOException, InterruptedException { context = new HServerContext(configuration, taskid, reader, writer); } public HServerContext getContext() { return context; } } //----------------------------MAPRED------------------ @Override public org.apache.hadoop.mapred.TaskAttemptContext createTaskAttemptContextMapred(JobConf jobConf, org.apache.hadoop.mapred.TaskAttemptID taskId) { try { Constructor<org.apache.hadoop.mapred.TaskAttemptContextImpl> contextConstructor = org.apache.hadoop.mapred.TaskAttemptContextImpl.class .getDeclaredConstructor(JobConf.class, org.apache.hadoop.mapred.TaskAttemptID.class); contextConstructor.setAccessible(true); return contextConstructor.newInstance(jobConf, taskId); } catch (Exception e) { throw new RuntimeException("Cannot instantiate TaskAttemptContext.", e); } } //------------------INITILIZE AND TEARDOWN private static ConcurrentHashMap<JobID, DistributedCacheManager> distributedCaches = new ConcurrentHashMap<JobID, DistributedCacheManager>(); @Override public void onJobInitialize(InvocationParameters parameters) throws IOException { //Take this chance to stub out ResourceCalculatorProcessTree parameters.getHadoopInvocationParameters().getConfiguration().setClass( MRConfig.RESOURCE_CALCULATOR_PROCESS_TREE, DummyResourceCalculatorProcessTree.class, ResourceCalculatorProcessTree.class); //Initialize the distributed cache DistributedCacheManager cacheManager = new DistributedCacheManager(); cacheManager.setup(parameters.getHadoopInvocationParameters().getConfiguration()); distributedCaches.put(parameters.getHadoopInvocationParameters().getJobID(), cacheManager); super.onJobInitialize(parameters); } @Override public void onJobDone(InvocationParameters parameters) throws IOException { DistributedCacheManager cacheManager = distributedCaches.get(parameters.getJobId()); if (cacheManager != null) { cacheManager.close(); distributedCaches.remove(parameters.getJobId()); } super.onJobDone(parameters); } }