nur.aini.hadoop.CopyMergeRegexToLocal.java Source code

Java tutorial

Introduction

Here is the source code for nur.aini.hadoop.CopyMergeRegexToLocal.java

Source

package nur.aini.hadoop;
/**
 * Class untuk melakukan penggabungan beberapa file dalam satu direktori dengan format regex tertentu
 * @author Nur Aini Rakhmawati
 * @since 13 February 2011
 * @license GPL
 */

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.io.IOUtils;

public class CopyMergeRegexToLocal {

    private static FileSystem hdfs = null;
    private final Configuration conf = new Configuration();

    public CopyMergeRegexToLocal(String hadoopPath) {

        if (hdfs == null) {
            try {

                conf.addResource(new Path(hadoopPath + "/conf/core-site.xml"));
                conf.addResource(new Path(hadoopPath + "/conf/hdfs-site.xml"));
                hdfs = FileSystem.get(conf);
            } catch (IOException ex) {
                System.err.print(ex.getMessage());
            }
        }
    }

    public void run(String srcf, String dst) {

        final Path srcPath = new Path("./" + srcf);
        final Path desPath = new Path(dst);
        try {
            Path[] srcs = FileUtil.stat2Paths(hdfs.globStatus(srcPath), srcPath);
            OutputStream out = FileSystem.getLocal(conf).create(desPath);
            for (int i = 0; i < srcs.length; i++) {
                System.out.println(srcs[i]);
                InputStream in = hdfs.open(srcs[i]);

                IOUtils.copyBytes(in, out, conf, false);
                in.close();

            }
            out.close();

        } catch (IOException ex) {
            System.err.print(ex.getMessage());
        }
    }

    public static void main(String[] args) {
        CopyMergeRegexToLocal cp = new CopyMergeRegexToLocal("/home/iin/hadoop-20.2/");
        cp.run("dirtxt/*.txt", "/home/iin/kumpulantxt");

    }

}