org.apache.jena.tdbloader4.FourthReducer.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.jena.tdbloader4.FourthReducer.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jena.tdbloader4;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.zip.GZIPOutputStream;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.jena.tdbloader4.io.LongQuadWritable;
import org.openjena.atlas.event.Event;
import org.openjena.atlas.event.EventManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class FourthReducer extends Reducer<LongQuadWritable, NullWritable, NullWritable, NullWritable> {

    private static final Logger log = LoggerFactory.getLogger(FourthReducer.class);

    private Map<String, OutputStream> outputs;
    private FileSystem fs;
    private Path outLocal;
    private Path outRemote;
    private TaskAttemptID taskAttemptID;
    private Counters counters;

    @Override
    public void setup(Context context) {
        this.taskAttemptID = context.getTaskAttemptID();
        outputs = new HashMap<String, OutputStream>();
        String outputRootDirectory = context.getConfiguration().get(Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR,
                Constants.OPTION_FOURTH_LOCAL_OUTPUT_DIR_DEFAULT);
        try {
            fs = FileSystem.get(context.getConfiguration());
            outRemote = FileOutputFormat.getWorkOutputPath(context);
            outLocal = new Path(outputRootDirectory,
                    context.getJobName() + "_" + context.getJobID() + "_" + taskAttemptID);
            new File(outLocal.toString()).mkdir();
            // TODO: does this make sense?
            fs.setReplication(outLocal, (short) 2);
            fs.startLocalOutput(outRemote, outLocal);
        } catch (Exception e) {
            throw new TDBLoader4Exception(e);
        }
        counters = new Counters(context);
    }

    @Override
    public void reduce(LongQuadWritable key, Iterable<NullWritable> values, Context context)
            throws IOException, InterruptedException {
        log.debug("< ({}, {})", key, values.iterator().next());

        String filename = key.getIndexName();
        OutputStream out = getOutputStream(filename);
        if (out != null) {
            out.write(Utils.toHex(key.get(0)));
            out.write(' ');
            out.write(Utils.toHex(key.get(1)));
            out.write(' ');
            out.write(Utils.toHex(key.get(2)));
            if (key.get(3) != -1l) {
                out.write(' ');
                out.write(Utils.toHex(key.get(3)));
            }
            out.write('\n');
        }
        context.progress();
        EventManager.send(counters, new Event(Constants.eventRecord, null));
        log.debug("> {}:{}", filename, key);
    }

    private OutputStream getOutputStream(String filename) throws IOException {
        OutputStream output = null;
        if (!outputs.containsKey(filename)) {
            output = new GZIPOutputStream(
                    new FileOutputStream(outLocal.toString() + "/" + filename + "_" + taskAttemptID + ".gz"));
            outputs.put(filename, output);
        }
        return outputs.get(filename);
    }

    @Override
    public void cleanup(Context context) throws IOException {
        for (String filename : outputs.keySet()) {
            outputs.get(filename).close();
        }
        fs.completeLocalOutput(outRemote, outLocal);
        counters.close();
    }

}