com.cloudera.cdk.morphline.json.JsonMorphlineTest.java Source code

Java tutorial

Introduction

Here is the source code for com.cloudera.cdk.morphline.json.JsonMorphlineTest.java

Source

/*
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.cdk.morphline.json;

import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import org.junit.Ignore;
import org.junit.Test;

import com.cloudera.cdk.morphline.api.AbstractMorphlineTest;
import com.cloudera.cdk.morphline.api.Record;
import com.cloudera.cdk.morphline.base.Fields;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.JsonNodeFactory;
import com.google.common.collect.ImmutableMap;
import com.google.common.io.Files;

public class JsonMorphlineTest extends AbstractMorphlineTest {

    @Test
    public void testReadJson() throws Exception {
        morphline = createMorphline("test-morphlines/readJson");
        for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
            InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/stream.json"));
            Record record = new Record();
            record.put(Fields.ATTACHMENT_BODY, in);

            collector.reset();
            startSession();
            assertEquals(1, collector.getNumStartEvents());
            assertTrue(morphline.process(record));
            Iterator<Record> iter = collector.getRecords().iterator();

            assertTrue(iter.hasNext());
            JsonNode node = (JsonNode) iter.next().getFirstValue(Fields.ATTACHMENT_BODY);
            assertEquals("foo", node.get("firstObject").asText());
            assertTrue(node.isObject());
            assertEquals(1, node.size());

            assertTrue(iter.hasNext());
            node = (JsonNode) iter.next().getFirstValue(Fields.ATTACHMENT_BODY);
            assertEquals("bar", node.get("secondObject").asText());
            assertTrue(node.isObject());
            assertEquals(1, node.size());

            assertFalse(iter.hasNext());
            in.close();
        }
    }

    @Test
    public void testReadJsonWithMap() throws Exception {
        morphline = createMorphline("test-morphlines/readJsonWithMap");
        for (int j = 0; j < 3; j++) { // also test reuse of objects and low level avro buffers
            InputStream in = new FileInputStream(new File(RESOURCES_DIR + "/test-documents/stream.json"));
            Record record = new Record();
            record.put(Fields.ATTACHMENT_BODY, in);

            collector.reset();
            startSession();
            assertEquals(1, collector.getNumStartEvents());
            assertTrue(morphline.process(record));
            Iterator<Record> iter = collector.getRecords().iterator();

            assertTrue(iter.hasNext());
            Map node = (Map) iter.next().getFirstValue(Fields.ATTACHMENT_BODY);
            assertEquals(ImmutableMap.of("firstObject", "foo"), node);

            assertTrue(iter.hasNext());
            node = (Map) iter.next().getFirstValue(Fields.ATTACHMENT_BODY);
            assertEquals(ImmutableMap.of("secondObject", "bar"), node);

            assertFalse(iter.hasNext());
            in.close();
        }
    }

    @Test
    public void testExtractJsonPaths() throws Exception {
        morphline = createMorphline("test-morphlines/extractJsonPaths");
        File file = new File(RESOURCES_DIR + "/test-documents/arrays.json");
        InputStream in = new FileInputStream(file);
        Record record = new Record();
        record.put(Fields.ATTACHMENT_BODY, in);

        startSession();
        assertEquals(1, collector.getNumStartEvents());
        assertTrue(morphline.process(record));

        assertEquals(1, collector.getRecords().size());
        JsonNode rootNode = (JsonNode) new ObjectMapper().reader(JsonNode.class).readValues(file).next();
        assertTrue(rootNode.get("price").isArray());
        List<JsonNode> expected = Arrays.asList(rootNode.get("price"));
        assertEquals(1, collector.getRecords().size());
        assertEquals(expected, collector.getFirstRecord().get("/price"));
        assertEquals(expected, collector.getFirstRecord().get("/price/[]"));
        assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));

        in.close();
    }

    @Test
    public void testExtractJsonPathsFlattened() throws Exception {
        morphline = createMorphline("test-morphlines/extractJsonPathsFlattened");
        File file = new File(RESOURCES_DIR + "/test-documents/arrays.json");
        InputStream in = new FileInputStream(file);
        Record record = new Record();
        record.put(Fields.ATTACHMENT_BODY, in);

        startSession();
        assertEquals(1, collector.getNumStartEvents());
        assertTrue(morphline.process(record));

        assertEquals(1, collector.getRecords().size());
        List expected = Arrays.asList(1, 2, 3, 4, 5, 10, 20, 100, 200);
        assertEquals(1, collector.getRecords().size());
        assertEquals(expected, collector.getFirstRecord().get("/price"));
        assertEquals(expected, collector.getFirstRecord().get("/price/[]"));
        assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));

        in.close();
    }

    @Test
    public void testComplexDocuments() throws Exception {
        morphline = createMorphline("test-morphlines/extractJsonPaths");
        File file = new File(RESOURCES_DIR + "/test-documents/complex.json");
        InputStream in = new FileInputStream(file);
        Record record = new Record();
        record.put(Fields.ATTACHMENT_BODY, in);

        startSession();
        assertEquals(1, collector.getNumStartEvents());
        assertTrue(morphline.process(record));

        assertEquals(1, collector.getRecords().size());
        JsonNode rootNode = (JsonNode) new ObjectMapper().reader(JsonNode.class).readValues(file).next();
        JsonNodeFactory factory = new JsonNodeFactory(false);

        assertEquals(Arrays.asList(10), collector.getFirstRecord().get("/docId"));
        assertEquals(Arrays.asList(rootNode.get("links")), collector.getFirstRecord().get("/links"));

        assertEquals(Arrays.asList(factory.arrayNode()), collector.getFirstRecord().get("/links/backward"));
        assertEquals(factory.arrayNode(), rootNode.get("links").get("backward"));

        List expected = Arrays.asList(factory.arrayNode().add(20).add(40).add(60).add(true).add(false).add(32767)
                .add(2147483647).add(9223372036854775807L).add(1.23).add(1.7976931348623157E308));
        assertEquals(expected, collector.getFirstRecord().get("/links/forward"));
        assertEquals(expected, collector.getFirstRecord().get("/links/forward/[]"));
        assertEquals(expected, collector.getFirstRecord().get("/links/forward[]"));
        assertEquals(Arrays.asList(rootNode.get("name")), collector.getFirstRecord().get("/name"));
        assertEquals(Arrays.asList("en-us", "en", "en-gb"),
                collector.getFirstRecord().get("/name/[]/language/[]/code"));
        assertEquals(Arrays.asList("en-us", "en", "en-gb"),
                collector.getFirstRecord().get("/name[]/language[]/code"));
        assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name/[]/language/[]/country"));
        assertEquals(Arrays.asList("us", "gb"), collector.getFirstRecord().get("/name[]/language[]/country"));
        assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));
        assertEquals(Arrays.asList(true), collector.getFirstRecord().get("/links/bool"));
        assertEquals(Arrays.asList(32767), collector.getFirstRecord().get("/links/short"));
        assertEquals(Arrays.asList(2147483647), collector.getFirstRecord().get("/links/int"));
        assertEquals(Arrays.asList(9223372036854775807L), collector.getFirstRecord().get("/links/long"));
        assertEquals(Arrays.asList(1.7976931348623157E308), collector.getFirstRecord().get("/links/double"));

        in.close();
    }

    @Test
    public void testExtractJsonPathsComplexFlattened() throws Exception {
        morphline = createMorphline("test-morphlines/extractJsonPathsFlattened");
        File file = new File(RESOURCES_DIR + "/test-documents/complex.json");
        InputStream in = new FileInputStream(file);
        Record record = new Record();
        record.put(Fields.ATTACHMENT_BODY, in);

        startSession();
        assertEquals(1, collector.getNumStartEvents());
        assertTrue(morphline.process(record));
        assertEquals(1, collector.getRecords().size());

        List expected = Arrays.asList(20, 40, 60, true, false, 32767, 2147483647, 9223372036854775807L, 1.23,
                1.7976931348623157E308, true, 32767, 2147483647, 9223372036854775807L, 1.7976931348623157E308);
        assertEquals(expected, collector.getFirstRecord().get("/links"));
        expected = Arrays.asList(20, 40, 60, true, false, 32767, 2147483647, 9223372036854775807L, 1.23,
                1.7976931348623157E308);
        assertEquals(expected, collector.getFirstRecord().get("/links/forward"));

        expected = Arrays.asList("en-us", "us", "en", "http://A", "http://B", "en-gb", "gb");
        assertEquals(expected, collector.getFirstRecord().get("/name"));

        assertEquals(Arrays.asList(), collector.getFirstRecord().get("/unknownField"));

        assertEquals(Arrays.asList(true), collector.getFirstRecord().get("/links/bool"));
        assertEquals(Arrays.asList(32767), collector.getFirstRecord().get("/links/short"));
        assertEquals(Arrays.asList(2147483647), collector.getFirstRecord().get("/links/int"));
        assertEquals(Arrays.asList(9223372036854775807L), collector.getFirstRecord().get("/links/long"));
        assertEquals(Arrays.asList(1.7976931348623157E308), collector.getFirstRecord().get("/links/double"));

        in.close();
    }

    @Test
    @Ignore
    public void benchmarkJson() throws Exception {
        String morphlineConfigFile = "test-morphlines/readJson";
        long durationSecs = 10;
        //File file = new File(RESOURCES_DIR + "/test-documents/stream.json");
        File file = new File(RESOURCES_DIR + "/test-documents/sample-statuses-20120906-141433.json");
        System.out.println("Now benchmarking " + morphlineConfigFile + " ...");
        morphline = createMorphline(morphlineConfigFile);
        byte[] bytes = Files.toByteArray(file);
        long start = System.currentTimeMillis();
        long duration = durationSecs * 1000;
        int iters = 0;
        while (System.currentTimeMillis() < start + duration) {
            Record record = new Record();
            record.put(Fields.ATTACHMENT_BODY, bytes);
            collector.reset();
            startSession();
            assertEquals(1, collector.getNumStartEvents());
            assertTrue(morphline.process(record));
            iters++;
        }
        float secs = (System.currentTimeMillis() - start) / 1000.0f;
        System.out.println("Results: iters=" + iters + ", took[secs]=" + secs + ", iters/secs=" + (iters / secs));
    }

}