Java tutorial
/* * Copyright (c) 2007-2010 Concurrent, Inc. All Rights Reserved. * * Project and contact information: http://www.cascading.org/ * * This file is part of the Cascading project. * * Cascading is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Cascading is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Cascading. If not, see <http://www.gnu.org/licenses/>. */ package cascading.tuple.hadoop; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import cascading.CascadingTestCase; import cascading.tuple.Tuple; import cascading.tuple.TupleInputStream; import cascading.tuple.TupleOutputStream; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.serializer.WritableSerialization; import org.apache.hadoop.mapred.JobConf; /** * */ public class HadoopSerializationTest extends CascadingTestCase { String outputPath = "build/test/output/tuples/serialization"; public HadoopSerializationTest() { super("serialization tuple test"); } public void testInputOutputSerialization() throws IOException { long time = System.currentTimeMillis(); JobConf jobConf = new JobConf(); jobConf.set("io.serializations", TestSerialization.class.getName() + "," + WritableSerialization.class.getName()); // disable/replace WritableSerialization class jobConf.set("cascading.serialization.tokens", "1000=" + BooleanWritable.class.getName() + ",10001=" + Text.class.getName()); // not using Text, just testing parsing TupleSerialization tupleSerialization = new TupleSerialization(jobConf); File file = new File(outputPath); file.mkdirs(); file = new File(file, "/test.bytes"); TupleOutputStream output = new TupleOutputStream(new FileOutputStream(file, false), tupleSerialization.getElementWriter()); for (int i = 0; i < 501; i++) // 501 is arbitrary { String aString = "string number " + i; double random = Math.random(); output.writeTuple(new Tuple(i, aString, random, new TestText(aString), new Tuple("inner tuple", new BytesWritable("some string".getBytes())), new BytesWritable(Integer.toString(i).getBytes("UTF-8")), new BooleanWritable(false))); } output.close(); assertEquals("wrong size", 89967L, file.length()); // just makes sure the file size doesnt change from expected TupleInputStream input = new TupleInputStream(new FileInputStream(file), tupleSerialization.getElementReader()); int k = -1; for (int i = 0; i < 501; i++) { Tuple tuple = input.readTuple(); int value = tuple.getInteger(0); assertTrue("wrong diff", value - k == 1); assertTrue("wrong type", tuple.get(3) instanceof TestText); assertTrue("wrong type", tuple.get(4) instanceof Tuple); assertTrue("wrong type", tuple.get(5) instanceof BytesWritable); byte[] bytes = ((BytesWritable) tuple.get(5)).getBytes(); String string = new String(bytes, 0, bytes.length > 1 ? bytes.length - 1 : bytes.length, "UTF-8"); assertEquals("wrong value", Integer.parseInt(string), i); assertTrue("wrong type", tuple.get(6) instanceof BooleanWritable); k = value; } input.close(); System.out.println("time = " + (System.currentTimeMillis() - time)); } }