Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.tdbloader4; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; import java.util.HashMap; import java.util.Map; import java.util.zip.GZIPOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.jena.tdbloader4.io.LongQuadWritable; import org.openjena.atlas.event.Event; import org.openjena.atlas.event.EventManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class FourthReducer extends Reducer<LongQuadWritable, NullWritable, NullWritable, NullWritable> { private static final Logger log = LoggerFactory.getLogger(FourthReducer.class); private Map<String, OutputStream> outputs; private FileSystem fs; private Path outLocal; private Path outRemote; private TaskAttemptID taskAttemptID; private Counters counters; @Override public void setup(Context context) { this.taskAttemptID = context.getTaskAttemptID(); outputs = new HashMap<String, OutputStream>(); String outputRootDirectory = context.getConfiguration().get(Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR, Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR_DEFAULT); try { fs = FileSystem.get(context.getConfiguration()); outRemote = FileOutputFormat.getWorkOutputPath(context); outLocal = new Path(outputRootDirectory, context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID); new File(outLocal.toString()).mkdir(); // TODO: does this make sense? fs.setReplication(outLocal, (short) 2); fs.startLocalOutput(outRemote, outLocal); } catch (Exception e) { throw new TDBLoader4Exception(e); } counters = new Counters(context); } @Override public void reduce(LongQuadWritable key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { log.debug("< ({}, {})", key, values.iterator().next()); String filename = key.getIndexName(); OutputStream out = getOutputStream(filename); if (out != null) { out.write(Utils.toHex(key.get(0))); out.write(' '); out.write(Utils.toHex(key.get(1))); out.write(' '); out.write(Utils.toHex(key.get(2))); if (key.get(3) != -1l) { out.write(' '); out.write(Utils.toHex(key.get(3))); } out.write('\n'); } context.progress(); EventManager.send(counters, new Event(Constants.eventRecord, null)); log.debug("> {}:{}", filename, key); } private OutputStream getOutputStream(String filename) throws IOException { OutputStream output = null; if (!outputs.containsKey(filename)) { output = new GZIPOutputStream( new FileOutputStream(outLocal.toString() + "/" + filename + "_" + taskAttemptID + ".gz")); outputs.put(filename, output); } return outputs.get(filename); } @Override public void cleanup(Context context) throws IOException { for (String filename : outputs.keySet()) { outputs.get(filename).close(); } fs.completeLocalOutput(outRemote, outLocal); counters.close(); } }