org.apache.blur.mapreduce.lib.BlurInputFormatTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.blur.mapreduce.lib.BlurInputFormatTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.blur.mapreduce.lib;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.UUID;

import org.apache.blur.MiniCluster;
import org.apache.blur.store.buffer.BufferStore;
import org.apache.blur.thirdparty.thrift_0_9_0.TException;
import org.apache.blur.thrift.BlurClient;
import org.apache.blur.thrift.generated.Blur.Iface;
import org.apache.blur.thrift.generated.BlurException;
import org.apache.blur.thrift.generated.Column;
import org.apache.blur.thrift.generated.ColumnDefinition;
import org.apache.blur.thrift.generated.Record;
import org.apache.blur.thrift.generated.RecordMutation;
import org.apache.blur.thrift.generated.RecordMutationType;
import org.apache.blur.thrift.generated.RowMutation;
import org.apache.blur.thrift.generated.TableDescriptor;
import org.apache.blur.utils.BlurConstants;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

public class BlurInputFormatTest {

    private static Configuration conf = new Configuration();
    private static MiniCluster miniCluster;

    @BeforeClass
    public static void setupTest() throws Exception {
        setupJavaHome();
        File file = new File("./target/tmp/BlurInputFormatTest_tmp");
        String pathStr = file.getAbsoluteFile().toURI().toString();
        System.setProperty("test.build.data", pathStr + "/data");
        System.setProperty("hadoop.log.dir", pathStr + "/hadoop_log");
        miniCluster = new MiniCluster();
        miniCluster.startBlurCluster(pathStr + "/blur", 2, 2);
        miniCluster.startMrMiniCluster();
        conf = miniCluster.getMRConfiguration();

        BufferStore.initNewBuffer(128, 128 * 128);
    }

    public static void setupJavaHome() {
        String str = System.getenv("JAVA_HOME");
        if (str == null) {
            String property = System.getProperty("java.home");
            if (property != null) {
                throw new RuntimeException("JAVA_HOME not set should probably be [" + property + "].");
            }
            throw new RuntimeException("JAVA_HOME not set.");
        }
    }

    @AfterClass
    public static void teardown() throws IOException {
        if (miniCluster != null) {
            miniCluster.stopMrMiniCluster();
        }
        rm(new File("build"));
    }

    private static void rm(File file) {
        if (!file.exists()) {
            return;
        }
        if (file.isDirectory()) {
            for (File f : file.listFiles()) {
                rm(f);
            }
        }
        file.delete();
    }

    @Test
    public void testBlurInputFormatFastDisabledNoFileCache()
            throws IOException, BlurException, TException, ClassNotFoundException, InterruptedException {
        String tableName = "testBlurInputFormatFastDisabledNoFileCache";
        runTest(tableName, true, null);
    }

    @Test
    public void testBlurInputFormatFastEnabledNoFileCache()
            throws IOException, BlurException, TException, ClassNotFoundException, InterruptedException {
        String tableName = "testBlurInputFormatFastEnabledNoFileCache";
        runTest(tableName, false, null);
    }

    @Test
    public void testBlurInputFormatFastDisabledFileCache()
            throws IOException, BlurException, TException, ClassNotFoundException, InterruptedException {
        String tableName = "testBlurInputFormatFastDisabledFileCache";
        Path fileCache = new Path(miniCluster.getFileSystemUri() + "/filecache");
        runTest(tableName, true, fileCache);
    }

    @Test
    public void testBlurInputFormatFastEnabledFileCache()
            throws IOException, BlurException, TException, ClassNotFoundException, InterruptedException {
        String tableName = "testBlurInputFormatFastEnabledFileCache";
        Path fileCache = new Path(miniCluster.getFileSystemUri() + "/filecache");
        runTest(tableName, false, fileCache);
    }

    private void runTest(String tableName, boolean disableFast, Path fileCache)
            throws IOException, BlurException, TException, InterruptedException, ClassNotFoundException {
        FileSystem fileSystem = miniCluster.getFileSystem();
        Path root = new Path(fileSystem.getUri() + "/");

        creatTable(tableName, new Path(root, "tables"), disableFast);
        loadTable(tableName, 100, 100);

        Iface client = getClient();

        TableDescriptor tableDescriptor = client.describe(tableName);

        Job job = Job.getInstance(conf, "Read Data");
        job.setJarByClass(BlurInputFormatTest.class);
        job.setMapperClass(TestMapper.class);
        job.setInputFormatClass(BlurInputFormat.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setNumReduceTasks(0);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(TableBlurRecord.class);

        Path output = new Path(new Path(root, "output"), tableName);

        String snapshot = UUID.randomUUID().toString();
        client.createSnapshot(tableName, snapshot);

        if (fileCache != null) {
            BlurInputFormat.setLocalCachePath(job, fileCache);
        }

        BlurInputFormat.setMaxNumberOfMaps(job, 1);
        BlurInputFormat.setZooKeeperConnectionStr(job, miniCluster.getZkConnectionString());
        BlurInputFormat.addTable(job, tableDescriptor, snapshot);
        FileOutputFormat.setOutputPath(job, output);

        try {
            assertTrue(job.waitForCompletion(true));
            Counters counters = job.getCounters();
            assertMapTask(1, counters);

        } finally {
            client.removeSnapshot(tableName, snapshot);
        }

        final Map<Text, TableBlurRecord> results = new TreeMap<Text, TableBlurRecord>();
        walkOutput(output, conf, new ResultReader() {
            @Override
            public void read(Text rowId, TableBlurRecord tableBlurRecord) {
                results.put(new Text(rowId), new TableBlurRecord(tableBlurRecord));
            }
        });
        int rowId = 100;
        for (Entry<Text, TableBlurRecord> e : results.entrySet()) {
            Text r = e.getKey();
            assertEquals(new Text("row-" + rowId), r);
            BlurRecord blurRecord = new BlurRecord();
            blurRecord.setRowId("row-" + rowId);
            blurRecord.setRecordId("record-" + rowId);
            blurRecord.setFamily("fam0");
            blurRecord.addColumn("col0", "value-" + rowId);
            TableBlurRecord tableBlurRecord = new TableBlurRecord(new Text(tableName), blurRecord);
            assertEquals(tableBlurRecord, e.getValue());

            rowId++;
        }
        assertEquals(200, rowId);
    }

    private void assertMapTask(int i, Counters counters) {
        for (CounterGroup counterGroup : counters) {
            String name = counterGroup.getName();
            boolean jobCounterGroup = false;
            if (name.equals("org.apache.hadoop.mapreduce.JobCounter")) {
                jobCounterGroup = true;
            } else if (name.equals("org.apache.hadoop.mapred.JobInProgress$Counter")) {
                jobCounterGroup = true;
            }
            if (jobCounterGroup) {
                for (Counter counter : counterGroup) {
                    if (counter.getName().equals("TOTAL_LAUNCHED_MAPS")) {
                        assertEquals(1, counter.getValue());
                        return;
                    }
                }
            }
        }
        fail();
    }

    public interface ResultReader {

        void read(Text rowId, TableBlurRecord tableBlurRecord);

    }

    private void walkOutput(Path output, Configuration conf, ResultReader resultReader) throws IOException {
        FileSystem fileSystem = output.getFileSystem(conf);
        FileStatus fileStatus = fileSystem.getFileStatus(output);
        if (fileStatus.isDir()) {
            FileStatus[] listStatus = fileSystem.listStatus(output, new PathFilter() {
                @Override
                public boolean accept(Path path) {
                    return !path.getName().startsWith("_");
                }
            });
            for (FileStatus fs : listStatus) {
                walkOutput(fs.getPath(), conf, resultReader);
            }
        } else {
            Reader reader = new SequenceFile.Reader(fileSystem, output, conf);
            Text rowId = new Text();
            TableBlurRecord tableBlurRecord = new TableBlurRecord();
            while (reader.next(rowId, tableBlurRecord)) {
                resultReader.read(rowId, tableBlurRecord);
            }
            reader.close();
        }
    }

    private Iface getClient() {
        return BlurClient.getClientFromZooKeeperConnectionStr(miniCluster.getZkConnectionString());
    }

    private void loadTable(String tableName, int startId, int numb) throws BlurException, TException {
        Iface client = getClient();
        List<RowMutation> batch = new ArrayList<RowMutation>();
        for (int i = 0; i < numb; i++) {
            int id = startId + i;
            RowMutation rowMutation = new RowMutation();
            rowMutation.setTable(tableName);
            rowMutation.setRowId("row-" + Integer.toString(id));
            Record record = new Record();
            record.setFamily("fam0");
            record.setRecordId("record-" + id);
            record.addToColumns(new Column("col0", "value-" + id));
            rowMutation.addToRecordMutations(new RecordMutation(RecordMutationType.REPLACE_ENTIRE_RECORD, record));
            batch.add(rowMutation);
        }
        client.mutateBatch(batch);
    }

    private void creatTable(String tableName, Path tables, boolean fastDisable) throws BlurException, TException {
        Path tablePath = new Path(tables, tableName);
        Iface client = getClient();
        TableDescriptor tableDescriptor = new TableDescriptor();
        tableDescriptor.setTableUri(tablePath.toString());
        tableDescriptor.setName(tableName);
        tableDescriptor.setShardCount(2);
        tableDescriptor.putToTableProperties(BlurConstants.BLUR_TABLE_DISABLE_FAST_DIR,
                Boolean.toString(fastDisable));
        client.createTable(tableDescriptor);

        ColumnDefinition colDef = new ColumnDefinition();
        colDef.setFamily("fam0");
        colDef.setColumnName("col0");
        colDef.setFieldType("string");
        client.addColumnDefinition(tableName, colDef);
    }

    public static class TestMapper extends Mapper<Text, TableBlurRecord, Text, TableBlurRecord> {
        @Override
        protected void map(Text key, TableBlurRecord value, Context context)
                throws IOException, InterruptedException {
            context.write(key, value);
        }
    }

}