cascading.avro.CascadingToAvroTest.java Source code

Java tutorial

Introduction

Here is the source code for cascading.avro.CascadingToAvroTest.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package cascading.avro;

import static org.junit.Assert.*;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.*;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.Schema.Parser;
import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericArray;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericData.Fixed;
import org.apache.avro.generic.GenericData.Record;
import org.apache.hadoop.io.BytesWritable;
import org.hamcrest.core.IsNull;
import org.junit.Before;
import org.junit.Test;

import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;

public class CascadingToAvroTest {

    TupleEntry tupleEntry = null;
    Schema schema = null;

    @Before
    public void setUp() throws Exception {
        schema = new Schema.Parser().parse(getClass().getResourceAsStream("test5.avsc"));
        Fields fields = new Fields();
        for (Field avroField : schema.getFields())
            fields = fields.append(new Fields(avroField.name()));

        Tuple tuple = Tuple.size(11);
        tuple.set(0, false);
        tuple.set(1, 10);
        tuple.set(2, 5L);
        tuple.set(3, 0.6f);
        tuple.set(4, 1.01);
        tuple.set(5, "This is my string");
        byte[] buffer_value = { 0, 1, 2, 3, 0, 0, 0 };
        BytesWritable bytesWritableForFixed = new BytesWritable(
                new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 });
        tuple.set(6, new BytesWritable(buffer_value));
        tuple.set(7, bytesWritableForFixed);
        List<Integer> array = new ArrayList<Integer>();
        array.add(0);
        array.add(1);
        tuple.set(8, array);
        Map<String, Integer> myMap = new HashMap<String, Integer>();
        myMap.put("one", 1);
        myMap.put("two", 2);
        tuple.set(9, myMap);
        tuple.set(10, 5);
        tupleEntry = new TupleEntry(fields, tuple);

    }

    @Test
    public void testToArrayPrimitive() {
        Schema fieldSchema = schema.getField("aList").schema();
        List<Integer> outList = (GenericData.Array<Integer>) CascadingToAvro.toAvro(tupleEntry.getObject(8),
                fieldSchema);

        assertThat(outList.get(0), is(0));
        assertThat(outList.get(1), is(1));
    }

    @Test
    public void testFromTupleToArray() {
        Schema fieldSchema = schema.getField("aList").schema();
        Tuple tuple = new Tuple();
        tuple.add(0);
        tuple.add(1);
        List<Integer> outList = (GenericData.Array<Integer>) CascadingToAvro.toAvro(tuple, fieldSchema);

        assertThat(outList.get(0), is(0));
        assertThat(outList.get(1), is(1));
    }

    @Test
    public void testFromTupleToMap() {
        Schema fieldSchema = schema.getField("aMap").schema();
        Tuple tuple = new Tuple();
        tuple.add("one");
        tuple.add(1);
        tuple.add("two");
        tuple.add(2);
        Map<String, Integer> outMap = (Map<String, Integer>) CascadingToAvro.toAvro(tuple, fieldSchema);

        assertThat(outMap.get("one"), is(1));
        assertThat(outMap.get("two"), is(2));
    }

    @Test
    public void testFromArrayNested() {
        Schema innerSchema = Schema.createArray(Schema.create(Schema.Type.INT));
        Schema outerSchema = Schema.createArray(innerSchema);
        GenericArray<GenericArray<Integer>> array = new GenericData.Array<GenericArray<Integer>>(1, outerSchema);
        GenericArray<Integer> innerArray = new GenericData.Array<Integer>(1, innerSchema);
        innerArray.add(0);
        innerArray.add(1);
        array.add(innerArray);

        List<List<Integer>> outList = (List<List<Integer>>) CascadingToAvro.toAvro(array, outerSchema);

        assertThat(outList.get(0), is((List<Integer>) innerArray));
        assertThat(outList.get(0).get(0), is(0));
        assertThat(outList.get(0).get(1), is(1));
    }

    @Test
    public void testFromMapPrimitive() {
        Schema fieldSchema = schema.getField("aMap").schema();
        Map<String, Integer> outMap = (Map<String, Integer>) CascadingToAvro.toAvro(tupleEntry.getObject(9),
                fieldSchema);

        assertThat(outMap.get("one"), is(1));
        assertThat(outMap.get("two"), is(2));
    }

    @Test
    public void testFromMapNested() {
        Schema innerSchema = Schema.createMap(Schema.create(Schema.Type.LONG));
        Schema outerSchema = Schema.createMap(innerSchema);
        Map<String, Long> innerMap = new HashMap<String, Long>();
        innerMap.put("one", 1L);
        innerMap.put("two", 2L);
        Map<String, Map<String, Long>> outerMap = new HashMap<String, Map<String, Long>>();
        outerMap.put("map1", innerMap);
        Map<String, Map<String, Long>> outMap = (Map<String, Map<String, Long>>) CascadingToAvro.toAvro(outerMap,
                outerSchema);

        assertThat(outMap.get("map1"), is(innerMap));
        assertThat(outMap.get("map1").get("two"), is(2L));
        assertThat(outMap.get("map1").get("one"), is(1L));
    }

    @Test
    public void testFromBytes() {
        Schema fieldSchema = schema.getField("aBytes").schema();
        byte[] buffer_value = { 0, 1, 2, 3, 0, 0, 0 };
        ByteBuffer result = ByteBuffer.wrap(buffer_value);

        ByteBuffer outBytes = (ByteBuffer) CascadingToAvro.toAvro(tupleEntry.getObject("aBytes"), fieldSchema);

        assertThat(outBytes, is(result));
    }

    @Test
    public void testFromFixed() {
        Schema fieldSchema = schema.getField("aFixed").schema();
        Fixed result = new Fixed(fieldSchema, new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 });

        Fixed outFixed = (Fixed) CascadingToAvro.toAvro(tupleEntry.getObject("aFixed"), fieldSchema);

        assertThat(outFixed, is(result));
    }

    @Test
    public void testFromUnion() {
        Schema fieldSchema = schema.getField("aUnion").schema();
        Integer outInt = (Integer) CascadingToAvro.toAvro(tupleEntry.getObject(10), fieldSchema);

        assertThat(outInt, is(5));
    }

    @Test
    public void testParseTupleEntry() {
        byte[] buffer_value = { 0, 1, 2, 3, 0, 0, 0 };
        ByteBuffer buffer = ByteBuffer.wrap(buffer_value);
        Fixed fixed = new Fixed(schema.getField("aFixed").schema(),
                new byte[] { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 });

        Object[] output = CascadingToAvro.parseTupleEntry(tupleEntry, schema);

        assertThat(output.length, is(11));
        assertThat((Boolean) output[0], is(false));
        assertThat((Integer) output[1], is(10));
        assertThat((Long) output[2], is(5L));
        assertThat((Float) output[3], is(0.6f));
        assertThat((Double) output[4], is(1.01));
        assertThat((String) output[5], is("This is my string"));
        assertThat((ByteBuffer) output[6], is(buffer));
        assertThat((Fixed) output[7], is(fixed));
        List<Integer> outList = (List<Integer>) output[8];
        assertThat(outList.get(0), is(0));
        assertThat(outList.get(1), is(1));
        Map<String, Integer> outMap = (Map<String, Integer>) output[9];
        assertThat(outMap.get("one"), is(1));
        assertThat(outMap.get("two"), is(2));
        assertThat((Integer) output[10], is(5));
    }

    @Test
    public void testArraySchema() {
        List<Integer> array = new ArrayList<Integer>();
        array.add(0);
        Schema arraySchema = CascadingToAvro.generateAvroSchemaFromElement(array, "my array", false);
        Schema expected = Schema.createArray(Schema.create(Type.INT));
        assertThat(arraySchema, is(expected));
    }

    @Test
    public void testMapSchema() {
        Map<String, Double> map = new HashMap<String, Double>();
        map.put("one", 1.01);
        Schema mapSchema = CascadingToAvro.generateAvroSchemaFromElement(map, "my map", false);
        Schema expected = Schema.createMap(Schema.create(Type.DOUBLE));
        assertThat(mapSchema, is(expected));
    }

    @Test
    public void testUnionSchema() {
        Long l = 5L;
        Schema unionSchema = CascadingToAvro.generateAvroSchemaFromElement(l, "my long", true);
        List<Schema> types = new ArrayList<Schema>();
        types.add(Schema.create(Schema.Type.NULL));
        types.add(Schema.create(Schema.Type.LONG));
        Schema expected = Schema.createUnion(types);
        assertThat(unionSchema, is(expected));
    }

    @Test
    public void testGenerateSchemaFromTupleEntry() {
        Schema expected = new Schema.Parser().parse(
                "{\"type\":\"record\",\"name\":\"CascadingRecord\",\"doc\":\"auto-generated by cascading.avro\",\"fields\":[{\"name\":\"aBoolean\",\"type\":\"boolean\"},{\"name\":\"anInt\",\"type\":\"int\"},{\"name\":\"aLong\",\"type\":\"long\"},{\"name\":\"aFloat\",\"type\":\"float\"},{\"name\":\"aDouble\",\"type\":\"double\"},{\"name\":\"aString\",\"type\":\"string\"},{\"name\":\"aBytes\",\"type\":\"bytes\"},{\"name\":\"aFixed\",\"type\":\"bytes\"},{\"name\":\"aList\",\"type\":{\"type\":\"array\",\"items\":\"int\"}},{\"name\":\"aMap\",\"type\":{\"type\":\"map\",\"values\":\"int\"}},{\"name\":\"aUnion\",\"type\":\"int\"}]}");
        Schema outSchema = CascadingToAvro.generateAvroSchemaFromTupleEntry(tupleEntry, "CascadingRecord", false);
        assertThat(outSchema, is(expected));
    }

    @Test
    public void testToAvroFixedUsesValidRangeOfBytesWritable() {
        Schema fieldSchema = schema.getField("aFixed").schema();
        BytesWritable bytes = new BytesWritable();
        byte[] old_buffer_value = { 0, 1, 2, 3 };
        bytes.set(old_buffer_value, 0, old_buffer_value.length);

        byte[] buffer_value = { 4, 5, 6 };
        bytes.set(buffer_value, 0, buffer_value.length);
        byte[] outBytes = ((Fixed) CascadingToAvro.toAvroFixed(bytes, fieldSchema)).bytes();

        assertThat(outBytes, is(buffer_value));
    }

    @Test
    public void testToAvroBytesUsesValidRangeOfBytesWritable() {
        Schema fieldSchema = schema.getField("aBytes").schema();
        BytesWritable bytes = (BytesWritable) tupleEntry.getObject("aBytes");
        byte[] old_buffer_value = { 0, 1, 2, 3 };
        bytes.set(old_buffer_value, 0, old_buffer_value.length);

        byte[] buffer_value = { 4, 5, 6 };
        ByteBuffer result = ByteBuffer.wrap(buffer_value);
        bytes.set(buffer_value, 0, buffer_value.length);
        ByteBuffer outBytes = (ByteBuffer) CascadingToAvro.toAvro(bytes, fieldSchema);

        assertThat(outBytes, is(result));
    }
}