Java tutorial
/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.mnemonic.mapreduce; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; import java.util.zip.CRC32; import java.util.zip.Checksum; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.OutputFormat; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.RecordWriter; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.lib.input.FileSplit; import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.apache.mnemonic.DurableChunk; import org.apache.mnemonic.DurableType; import org.apache.mnemonic.Utils; import org.apache.mnemonic.hadoop.MneConfigHelper; import org.apache.mnemonic.hadoop.MneDurableInputSession; import org.apache.mnemonic.hadoop.MneDurableInputValue; import org.apache.mnemonic.hadoop.MneDurableOutputSession; import org.apache.mnemonic.hadoop.MneDurableOutputValue; import org.apache.mnemonic.hadoop.mapreduce.MneInputFormat; import org.apache.mnemonic.hadoop.mapreduce.MneOutputFormat; import org.apache.mnemonic.sessions.SessionIterator; import org.testng.Assert; import org.testng.AssertJUnit; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import sun.misc.Unsafe; @SuppressWarnings("restriction") public class MneMapreduceChunkDataTest { private static final String DEFAULT_BASE_WORK_DIR = "target" + File.separator + "test" + File.separator + "tmp"; private static final String DEFAULT_WORK_DIR = DEFAULT_BASE_WORK_DIR + File.separator + "chunk-data"; private static final String SERVICE_NAME = "pmalloc"; private static final long SLOT_KEY_ID = 5L; private Path m_workdir; private JobConf m_conf; private FileSystem m_fs; private Random m_rand; private TaskAttemptID m_taid; private TaskAttemptContext m_tacontext; private long m_reccnt = 5000L; private volatile long m_checksum; private volatile long m_totalsize = 0L; private Unsafe unsafe; @BeforeClass public void setUp() throws Exception { m_workdir = new Path(System.getProperty("test.tmp.dir", DEFAULT_WORK_DIR)); m_conf = new JobConf(); m_rand = Utils.createRandom(); unsafe = Utils.getUnsafe(); try { m_fs = FileSystem.getLocal(m_conf).getRaw(); m_fs.delete(m_workdir, true); m_fs.mkdirs(m_workdir); } catch (IOException e) { throw new IllegalStateException("bad fs init", e); } m_taid = new TaskAttemptID("jt", 0, TaskType.MAP, 0, 0); m_tacontext = new TaskAttemptContextImpl(m_conf, m_taid); MneConfigHelper.setDir(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, m_workdir.toString()); MneConfigHelper.setBaseOutputName(m_conf, null, "chunk-data"); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new DurableType[] { DurableType.CHUNK }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX, new Class<?>[] {}); MneConfigHelper.setMemServiceName(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SERVICE_NAME); MneConfigHelper.setSlotKeyId(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, SLOT_KEY_ID); MneConfigHelper.setMemPoolSize(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, 1024L * 1024 * 1024 * 4); MneConfigHelper.setDurableTypes(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new DurableType[] { DurableType.CHUNK }); MneConfigHelper.setEntityFactoryProxies(m_conf, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX, new Class<?>[] {}); } @AfterClass public void tearDown() { } protected DurableChunk<?> genupdDurableChunk(MneDurableOutputSession<DurableChunk<?>> s, Checksum cs) { DurableChunk<?> ret = null; int sz = m_rand.nextInt(1024 * 1024) + 1024 * 1024; ret = s.newDurableObjectRecord(sz); byte b; if (null != ret) { for (int i = 0; i < ret.getSize(); ++i) { b = (byte) m_rand.nextInt(255); unsafe.putByte(ret.get() + i, b); cs.update(b); } m_totalsize += sz; } return ret; } @Test(enabled = true) public void testWriteChunkData() throws Exception { NullWritable nada = NullWritable.get(); MneDurableOutputSession<DurableChunk<?>> sess = new MneDurableOutputSession<DurableChunk<?>>(m_tacontext, null, MneConfigHelper.DEFAULT_OUTPUT_CONFIG_PREFIX); MneDurableOutputValue<DurableChunk<?>> mdvalue = new MneDurableOutputValue<DurableChunk<?>>(sess); OutputFormat<NullWritable, MneDurableOutputValue<DurableChunk<?>>> outputFormat = new MneOutputFormat<MneDurableOutputValue<DurableChunk<?>>>(); RecordWriter<NullWritable, MneDurableOutputValue<DurableChunk<?>>> writer = outputFormat .getRecordWriter(m_tacontext); DurableChunk<?> dchunk = null; Checksum cs = new CRC32(); cs.reset(); for (int i = 0; i < m_reccnt; ++i) { dchunk = genupdDurableChunk(sess, cs); Assert.assertNotNull(dchunk); writer.write(nada, mdvalue.of(dchunk)); } m_checksum = cs.getValue(); writer.close(m_tacontext); sess.close(); } @Test(enabled = true, dependsOnMethods = { "testWriteChunkData" }) public void testReadChunkData() throws Exception { List<String> partfns = new ArrayList<String>(); long reccnt = 0L; long tsize = 0L; Checksum cs = new CRC32(); cs.reset(); File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { partfns.add(listfiles[idx].getName()); } } Collections.sort(partfns); // keep the order for checksum for (int idx = 0; idx < partfns.size(); ++idx) { System.out.println(String.format("Verifying : %s", partfns.get(idx))); FileSplit split = new FileSplit(new Path(m_workdir, partfns.get(idx)), 0, 0L, new String[0]); InputFormat<NullWritable, MneDurableInputValue<DurableChunk<?>>> inputFormat = new MneInputFormat<MneDurableInputValue<DurableChunk<?>>, DurableChunk<?>>(); RecordReader<NullWritable, MneDurableInputValue<DurableChunk<?>>> reader = inputFormat .createRecordReader(split, m_tacontext); MneDurableInputValue<DurableChunk<?>> dchkval = null; while (reader.nextKeyValue()) { dchkval = reader.getCurrentValue(); byte b; for (int j = 0; j < dchkval.getValue().getSize(); ++j) { b = unsafe.getByte(dchkval.getValue().get() + j); cs.update(b); } tsize += dchkval.getValue().getSize(); ++reccnt; } reader.close(); } AssertJUnit.assertEquals(m_reccnt, reccnt); AssertJUnit.assertEquals(m_totalsize, tsize); AssertJUnit.assertEquals(m_checksum, cs.getValue()); System.out.println(String.format("The checksum of chunk is %d", m_checksum)); } @Test(enabled = true, dependsOnMethods = { "testWriteChunkData" }) public void testBatchReadChunkDataUsingInputSession() throws Exception { List<String> partfns = new ArrayList<String>(); long reccnt = 0L; long tsize = 0L; Checksum cs = new CRC32(); cs.reset(); File folder = new File(m_workdir.toString()); File[] listfiles = folder.listFiles(); for (int idx = 0; idx < listfiles.length; ++idx) { if (listfiles[idx].isFile() && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null)) && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) { partfns.add(listfiles[idx].getName()); } } Collections.sort(partfns); // keep the order for checksum List<Path> paths = new ArrayList<Path>(); for (String fns : partfns) { paths.add(new Path(m_workdir, fns)); System.out.println(String.format("[Batch Mode] Added : %s", fns)); } MneDurableInputSession<DurableChunk<?>> m_session = new MneDurableInputSession<DurableChunk<?>>(m_tacontext, null, paths.toArray(new Path[0]), MneConfigHelper.DEFAULT_INPUT_CONFIG_PREFIX); SessionIterator<DurableChunk<?>, ?> m_iter = m_session.iterator(); DurableChunk<?> val = null; while (m_iter.hasNext()) { val = m_iter.next(); byte b; for (int j = 0; j < val.getSize(); ++j) { b = unsafe.getByte(val.get() + j); cs.update(b); } tsize += val.getSize(); ++reccnt; } AssertJUnit.assertEquals(m_reccnt, reccnt); AssertJUnit.assertEquals(m_totalsize, tsize); AssertJUnit.assertEquals(m_checksum, cs.getValue()); System.out.println(String.format("The checksum of chunk is %d [Batch Mode]", m_checksum)); } }