com.linkedin.cubert.io.text.PigTextOutputFormatWrapper.java Source code

Java tutorial

Introduction

Here is the source code for com.linkedin.cubert.io.text.PigTextOutputFormatWrapper.java

Source

/* (c) 2014 LinkedIn Corp. All rights reserved.
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package com.linkedin.cubert.io.text;

import java.io.IOException;

import org.apache.commons.lang.StringEscapeUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.pig.backend.hadoop.executionengine.mapReduceLayer.PigTextOutputFormat;
import org.apache.pig.data.Tuple;

import com.linkedin.cubert.plan.physical.CubertStrings;

/***
 * 
 * The wrapper around pig text output format. Provides the ability to set the appropriate
 * field separator while writing the text file. We need this wrapper because
 * PigTextOutputFormat doesn't provide empty constructor, which means it's objects cannot
 * be created via reflection. This wrapper solves both problems.
 * 
 * @author Krishna Puttaswamy
 * 
 * */

public class PigTextOutputFormatWrapper<K, V> extends PigTextOutputFormat {
    public static byte defaultDelimiter = '\t';

    public PigTextOutputFormatWrapper() {
        super(defaultDelimiter);
    }

    public PigTextOutputFormatWrapper(byte delimiter) {
        super(delimiter);
    }

    @Override
    public RecordWriter<WritableComparable, Tuple> getRecordWriter(TaskAttemptContext context)
            throws IOException, InterruptedException {
        Configuration conf = context.getConfiguration();

        PigTextOutputFormat outputFormat;
        if (conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR) == null) {
            outputFormat = new PigTextOutputFormat(defaultDelimiter);
        } else {
            String str = conf.get(CubertStrings.TEXT_OUTPUT_SEPARATOR);
            str = StringEscapeUtils.unescapeJava(str);
            byte[] bytes = str.getBytes("UTF-8");

            if (bytes.length > 1)
                throw new RuntimeException(String.format("Invalid separator in text output format %s", str));

            outputFormat = new PigTextOutputFormat(bytes[0]);
        }

        return outputFormat.getRecordWriter(context);
    }
}