org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.mnemonic.mapreduce;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Random;
import java.util.zip.CRC32;
import java.util.zip.Checksum;

import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
import org.apache.mnemonic.DurableChunk;
import org.apache.mnemonic.DurableType;
import org.apache.mnemonic.Utils;
import org.apache.mnemonic.hadoop.MneConfigHelper;
import org.apache.mnemonic.hadoop.MneDurableInputSession;
import org.apache.mnemonic.hadoop.MneDurableInputValue;
import org.apache.mnemonic.hadoop.MneDurableOutputSession;
import org.apache.mnemonic.hadoop.MneDurableOutputValue;
import org.apache.mnemonic.hadoop.mapreduce.MneInputFormat;
import org.apache.mnemonic.hadoop.mapreduce.MneOutputFormat;
import org.apache.mnemonic.sessions.SessionIterator;
import org.testng.Assert;
import org.testng.AssertJUnit;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;

import sun.misc.Unsafe;

@SuppressWarnings("restriction")
public class MneMapreduceChunkDataTest {

    private static final String DEFAULT_BASE_WORK_DIR = "target" + File.separator + "test" + File.separator + "tmp";
    private static final String DEFAULT_WORK_DIR = DEFAULT_BASE_WORK_DIR + File.separator + "chunk-data";
    private static final String SERVICE_NAME = "pmalloc";
    private static final long SLOT_KEY_ID = 5L;
    private Path m_workdir;
    private JobConf m_conf;
    private FileSystem m_fs;
    private Random m_rand;
    private TaskAttemptID m_taid;
    private TaskAttemptContext m_tacontext;
    private long m_reccnt = 5000L;
    private volatile long m_checksum;
    private volatile long m_totalsize = 0L;
    private Unsafe unsafe;

    @BeforeClass
    public void setUp() throws Exception {
        m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR));
        m_conf = new JobConf();
        m_rand = Utils.createRandom();
        unsafe = Utils.getUnsafe();

        try {
            m_fs = FileSystem.getLocal(m_conf).getRaw();
            m_fs.delete(m_workdir, true);
            m_fs.mkdirs(m_workdir);
        } catch (IOException e) {
            throw new IllegalStateException("bad fs init", e);
        }

        m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0);
        m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid);

        MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString());
        MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data");

        MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME);
        MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID);
        MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
                new DurableType[] { DurableType.CHUNK });
        MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX,
                new Class<?>[] {});
        MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME);
        MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID);
        MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
                1024L * 1024 * 1024 * 4);
        MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
                new DurableType[] { DurableType.CHUNK });
        MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX,
                new Class<?>[] {});
    }

    @AfterClass
    public void tearDown() {

    }

    protected DurableChunk<?> genupdDurableChunk(MneDurableOutputSession<DurableChunk<?>> s, Checksum cs) {
        DurableChunk<?> ret = null;
        int sz = m_rand.nextInt(1024 * 1024) + 1024 * 1024;
        ret = s.newDurableObjectRecord(sz);
        byte b;
        if (null != ret) {
            for (int i = 0; i < ret.getSize(); ++i) {
                b = (byte) m_rand.nextInt(255);
                unsafe.putByte(ret.get() + i, b);
                cs.update(b);
            }
            m_totalsize += sz;
        }
        return ret;
    }

    @Test(enabled = true)
    public void testWriteChunkData() throws Exception {
        NullWritable nada = NullWritable.get();
        MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext,
                null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX);
        MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess);
        OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>();
        RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat
                .getRecordWriter(m_tacontext);
        DurableChunk<?> dchunk = null;
        Checksum cs = new CRC32();
        cs.reset();
        for (int i = 0; i < m_reccnt; ++i) {
            dchunk = genupdDurableChunk(sess, cs);
            Assert.assertNotNull(dchunk);
            writer.write(nada, mdvalue.of(dchunk));
        }
        m_checksum = cs.getValue();
        writer.close(m_tacontext);
        sess.close();
    }

    @Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
    public void testReadChunkData() throws Exception {
        List<String> partfns = new ArrayList<String>();
        long reccnt = 0L;
        long tsize = 0L;
        Checksum cs = new CRC32();
        cs.reset();
        File folder = new File(m_workdir.toString());
        File[] listfiles = folder.listFiles();
        for (int idx = 0; idx < listfiles.length; ++idx) {
            if (listfiles[idx].isFile()
                    && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
                    && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
                partfns.add(listfiles[idx].getName());
            }
        }
        Collections.sort(partfns); // keep the order for checksum
        for (int idx = 0; idx < partfns.size(); ++idx) {
            System.out.println(String.format("Verifying : %s", partfns.get(idx)));
            FileSplit split = new FileSplit(new Path(m_workdir, partfns.get(idx)), 0, 0L, new String[0]);
            InputFormat<NullWritable, MneDurableInputValue<DurableChunk<?>>> inputFormat = new MneInputFormat<MneDurableInputValue<DurableChunk<?>>, DurableChunk<?>>();
            RecordReader<NullWritable, MneDurableInputValue<DurableChunk<?>>> reader = inputFormat
                    .createRecordReader(split, m_tacontext);
            MneDurableInputValue<DurableChunk<?>> dchkval = null;
            while (reader.nextKeyValue()) {
                dchkval = reader.getCurrentValue();
                byte b;
                for (int j = 0; j < dchkval.getValue().getSize(); ++j) {
                    b = unsafe.getByte(dchkval.getValue().get() + j);
                    cs.update(b);
                }
                tsize += dchkval.getValue().getSize();
                ++reccnt;
            }
            reader.close();
        }
        AssertJUnit.assertEquals(m_reccnt, reccnt);
        AssertJUnit.assertEquals(m_totalsize, tsize);
        AssertJUnit.assertEquals(m_checksum, cs.getValue());
        System.out.println(String.format("The checksum of chunk is %d", m_checksum));
    }

    @Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
    public void testBatchReadChunkDataUsingInputSession() throws Exception {
        List<String> partfns = new ArrayList<String>();
        long reccnt = 0L;
        long tsize = 0L;
        Checksum cs = new CRC32();
        cs.reset();
        File folder = new File(m_workdir.toString());
        File[] listfiles = folder.listFiles();
        for (int idx = 0; idx < listfiles.length; ++idx) {
            if (listfiles[idx].isFile()
                    && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
                    && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
                partfns.add(listfiles[idx].getName());
            }
        }
        Collections.sort(partfns); // keep the order for checksum
        List<Path> paths = new ArrayList<Path>();
        for (String fns : partfns) {
            paths.add(new Path(m_workdir, fns));
            System.out.println(String.format("[Batch Mode] Added : %s", fns));
        }
        MneDurableInputSession<DurableChunk<?>> m_session = new MneDurableInputSession<DurableChunk<?>>(m_tacontext,
                null, paths.toArray(new Path[0]), MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX);
        SessionIterator<DurableChunk<?>, ?> m_iter = m_session.iterator();
        DurableChunk<?> val = null;
        while (m_iter.hasNext()) {
            val = m_iter.next();
            byte b;
            for (int j = 0; j < val.getSize(); ++j) {
                b = unsafe.getByte(val.get() + j);
                cs.update(b);
            }
            tsize += val.getSize();
            ++reccnt;
        }
        AssertJUnit.assertEquals(m_reccnt, reccnt);
        AssertJUnit.assertEquals(m_totalsize, tsize);
        AssertJUnit.assertEquals(m_checksum, cs.getValue());
        System.out.println(String.format("The checksum of chunk is %d [Batch Mode]", m_checksum));
    }
}