kafka.etl.KafkaETLUtils.java Source code

Java tutorial

Introduction

Here is the source code for kafka.etl.KafkaETLUtils.java

Source

/*
 * Copyright 2010 LinkedIn
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package kafka.etl;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.List;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.BytesWritable;

public class KafkaETLUtils {

    public static PathFilter PATH_FILTER = new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return !path.getName().startsWith("_") && !path.getName().startsWith(".");
        }
    };

    public static Path getLastPath(Path path, FileSystem fs) throws IOException {

        FileStatus[] statuses = fs.listStatus(path, PATH_FILTER);

        if (statuses.length == 0) {
            return path;
        } else {
            Arrays.sort(statuses);
            return statuses[statuses.length - 1].getPath();
        }
    }

    public static String getFileName(Path path) throws IOException {
        String fullname = path.toUri().toString();
        String[] parts = fullname.split(Path.SEPARATOR);
        if (parts.length < 1)
            throw new IOException("Invalid path " + fullname);
        return parts[parts.length - 1];
    }

    public static List<String> readText(FileSystem fs, String inputFile) throws IOException, FileNotFoundException {
        Path path = new Path(inputFile);
        return readText(fs, path);
    }

    public static List<String> readText(FileSystem fs, Path path) throws IOException, FileNotFoundException {
        if (!fs.exists(path)) {
            throw new FileNotFoundException("File " + path + " doesn't exist!");
        }
        BufferedReader in = new BufferedReader(new InputStreamReader(fs.open(path)));
        List<String> buf = new ArrayList<String>();
        String line = null;

        while ((line = in.readLine()) != null) {
            if (line.trim().length() > 0)
                buf.add(new String(line.trim()));
        }
        in.close();
        return buf;
    }

    public static void writeText(FileSystem fs, Path outPath, String content) throws IOException {
        long timestamp = System.currentTimeMillis();
        String localFile = "/tmp/KafkaETL_tmp_" + timestamp;
        PrintWriter writer = new PrintWriter(new FileWriter(localFile));
        writer.println(content);
        writer.close();

        Path src = new Path(localFile);
        fs.moveFromLocalFile(src, outPath);
    }

    public static Props getPropsFromJob(Configuration conf) {
        String propsString = conf.get("kafka.etl.props");
        if (propsString == null)
            throw new UndefinedPropertyException(
                    "The required property kafka.etl.props was not found in the Configuration.");
        try {
            ByteArrayInputStream input = new ByteArrayInputStream(propsString.getBytes("UTF-8"));
            Properties properties = new Properties();
            properties.load(input);
            return new Props(properties);
        } catch (IOException e) {
            throw new RuntimeException("This is not possible!", e);
        }
    }

    public static void setPropsInJob(Configuration conf, Props props) {
        ByteArrayOutputStream output = new ByteArrayOutputStream();
        try {
            props.store(output);
            conf.set("kafka.etl.props", new String(output.toByteArray(), "UTF-8"));
        } catch (IOException e) {
            throw new RuntimeException("This is not possible!", e);
        }
    }

    public static Props readProps(String file) throws IOException {
        Path path = new Path(file);
        FileSystem fs = path.getFileSystem(new Configuration());
        if (fs.exists(path)) {
            InputStream input = fs.open(path);
            try {
                // wrap it up in another layer so that the user can override
                // properties
                Props p = new Props(input);
                return new Props(p);
            } finally {
                input.close();
            }
        } else {
            return new Props();
        }
    }

    public static String findContainingJar(@SuppressWarnings("rawtypes") Class my_class, ClassLoader loader) {
        String class_file = my_class.getName().replaceAll("\\.", "/") + ".class";
        return findContainingJar(class_file, loader);
    }

    public static String findContainingJar(String fileName, ClassLoader loader) {
        try {
            for (@SuppressWarnings("rawtypes")
            Enumeration itr = loader.getResources(fileName); itr.hasMoreElements();) {
                URL url = (URL) itr.nextElement();
                // logger.info("findContainingJar finds url:" + url);
                if ("jar".equals(url.getProtocol())) {
                    String toReturn = url.getPath();
                    if (toReturn.startsWith("file:")) {
                        toReturn = toReturn.substring("file:".length());
                    }
                    toReturn = URLDecoder.decode(toReturn, "UTF-8");
                    return toReturn.replaceAll("!.*$", "");
                }
            }
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
        return null;
    }

    public static byte[] getBytes(BytesWritable val) {

        byte[] buffer = val.getBytes();

        /* FIXME: remove the following part once the below gira is fixed
         * https://issues.apache.org/jira/browse/HADOOP-6298
         */
        long len = val.getLength();
        byte[] bytes = buffer;
        if (len < buffer.length) {
            bytes = new byte[(int) len];
            System.arraycopy(buffer, 0, bytes, 0, (int) len);
        }

        return bytes;
    }

}