org.apache.pig.piggybank.test.storage.TestMultiStorageCompression.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.pig.piggybank.test.storage.TestMultiStorageCompression.java

Source

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the
 * NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is
 * distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and limitations under the License.
 */

package org.apache.pig.piggybank.test.storage;

import static org.apache.pig.ExecType.LOCAL;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import junit.framework.TestCase;

import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.compress.BZip2Codec;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionInputStream;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.pig.PigServer;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.impl.logicalLayer.FrontendException;
import org.apache.pig.test.Util;

public class TestMultiStorageCompression extends TestCase {

    private static String patternString = "(\\d+)!+(\\w+)~+(\\w+)";
    public static ArrayList<String[]> data = new ArrayList<String[]>();
    static {
        data.add(new String[] { "f1,a,123" });
        data.add(new String[] { "f2,b,234" });
        data.add(new String[] { "f3,c,345" });
        data.add(new String[] { "f4,d,567" });
    }

    public void testMultiStorageShouldSupportBz2() throws Exception {

        String type = "bz2";
        List<String> filesToDelete = new ArrayList<String>();

        String tmpDir = System.getProperty("java.io.tmpdir");
        String outputPath = tmpDir + File.separator + "output001." + type;

        filesToDelete.add(outputPath);

        try {
            runQuery(outputPath, type);
            verifyResults(type, filesToDelete, outputPath);
        } finally {
            cleanUpDirs(filesToDelete);
        }
    }

    public void testMultiStorageShouldSupportGz() throws Exception {

        String type = "gz";
        List<String> filesToDelete = new ArrayList<String>();

        String tmpDir = System.getProperty("java.io.tmpdir");
        String outputPath = tmpDir + File.separator + "output001." + type;

        filesToDelete.add(outputPath);

        try {
            runQuery(outputPath, type);
            verifyResults(type, filesToDelete, outputPath);
        } finally {
            cleanUpDirs(filesToDelete);
        }
    }

    private void cleanUpDirs(List<String> filesToDelete) {
        // Delete files recursively
        Collections.reverse(filesToDelete);
        for (String string : filesToDelete)
            new File(string).delete();
    }

    private void verifyResults(String type, List<String> filesToDelete, String outputPath)
            throws IOException, FileNotFoundException {
        // Verify the output
        File outputDir = new File(outputPath);
        List<String> indexFolders = Arrays.asList(outputDir.list());

        // Assert whether all keys are present
        assertTrue(indexFolders.contains("f1." + type));
        assertTrue(indexFolders.contains("f2." + type));
        assertTrue(indexFolders.contains("f3." + type));
        assertTrue(indexFolders.contains("f4." + type));

        // Sort so that assertions are easy
        Collections.sort(indexFolders);

        for (int i = 0; i < indexFolders.size(); i++) {

            String indexFolder = indexFolders.get(i);
            if (indexFolder.startsWith("._SUCCESS") || indexFolder.startsWith("_SUCCESS"))
                continue;
            String topFolder = outputPath + File.separator + indexFolder;
            File indexFolderFile = new File(topFolder);
            filesToDelete.add(topFolder);
            String[] list = indexFolderFile.list();
            for (String outputFile : list) {

                String file = topFolder + File.separator + outputFile;
                filesToDelete.add(file);

                // Skip off any file starting with .
                if (outputFile.startsWith("."))
                    continue;

                // Try to read the records using the codec
                CompressionCodec codec = null;

                // Use the codec according to the test case
                if (type.equals("bz2")) {
                    codec = new BZip2Codec();
                } else if (type.equals("gz")) {
                    codec = new GzipCodec();
                }
                if (codec instanceof Configurable) {
                    ((Configurable) codec).setConf(new Configuration());
                }

                CompressionInputStream createInputStream = codec.createInputStream(new FileInputStream(file));
                int b;
                StringBuffer sb = new StringBuffer();
                while ((b = createInputStream.read()) != -1) {
                    sb.append((char) b);
                }
                createInputStream.close();

                // Assert for the number of fields and keys.
                String[] fields = sb.toString().split("\\t");
                assertEquals(3, fields.length);
                String id = indexFolder.substring(1, 2);
                assertEquals("f" + id, fields[0]);

            }

        }
    }

    private void runQuery(String outputPath, String compressionType)
            throws Exception, ExecException, IOException, FrontendException {

        // create a data file
        String filename = TestHelper.createTempFile(data, "");
        PigServer pig = new PigServer(LOCAL);
        filename = filename.replace("\\", "\\\\");
        patternString = patternString.replace("\\", "\\\\");
        String query = "A = LOAD '" + Util.encodeEscape(filename) + "' USING PigStorage(',') as (a,b,c);";

        String query2 = "STORE A INTO '" + Util.encodeEscape(outputPath)
                + "' USING org.apache.pig.piggybank.storage.MultiStorage" + "('" + Util.encodeEscape(outputPath)
                + "','0', '" + compressionType + "', '\\t');";

        // Run Pig
        pig.setBatchOn();
        pig.registerQuery(query);
        pig.registerQuery(query2);

        pig.executeBatch();
    }

}