org.apache.atlas.discovery.DataSetLineageServiceTest.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.atlas.discovery.DataSetLineageServiceTest.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.atlas.discovery;

import com.google.common.collect.ImmutableList;
import org.apache.atlas.AtlasClient;
import org.apache.atlas.AtlasException;
import org.apache.atlas.BaseRepositoryTest;
import org.apache.atlas.TestOnlyModule;
import org.apache.atlas.query.QueryParams;
import org.apache.atlas.typesystem.ITypedReferenceableInstance;
import org.apache.atlas.typesystem.Referenceable;
import org.apache.atlas.typesystem.Struct;
import org.apache.atlas.typesystem.exception.EntityNotFoundException;
import org.apache.atlas.typesystem.json.InstanceSerialization;
import org.apache.atlas.typesystem.persistence.Id;
import org.apache.commons.collections.ArrayStack;
import org.apache.commons.lang.RandomStringUtils;
import org.codehaus.jettison.json.JSONArray;
import org.codehaus.jettison.json.JSONException;
import org.codehaus.jettison.json.JSONObject;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Guice;
import org.testng.annotations.Test;

import javax.inject.Inject;
import java.util.Arrays;
import java.util.List;
import java.util.Map;

import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;

/**
 * Unit tests for Hive LineageService.
 */
@Guice(modules = TestOnlyModule.class)
public class DataSetLineageServiceTest extends BaseRepositoryTest {

    @Inject
    private DiscoveryService discoveryService;

    @Inject
    private DataSetLineageService lineageService;

    @BeforeClass
    public void setUp() throws Exception {
        super.setUp();
    }

    @AfterClass
    public void tearDown() throws Exception {
        super.tearDown();
    }

    @DataProvider(name = "dslQueriesProvider")
    private Object[][] createDSLQueries() {
        return new String[][] {
                // joins
                { "hive_table where name=\"sales_fact\", columns" },
                { "hive_table where name=\"sales_fact\", columns select name, dataType, comment" },
                { "hive_table where name=\"sales_fact\", columns as c select c.name, c.dataType, c.comment" },
                //            {"hive_db as db where (db.name=\"Reporting\"), hive_table as table select db.name,
                // table.name"},
                { "from hive_db" }, { "hive_db" }, { "hive_db where hive_db.name=\"Reporting\"" },
                { "hive_db hive_db.name = \"Reporting\"" },
                { "hive_db where hive_db.name=\"Reporting\" select name, owner" }, { "hive_db has name" },
                //            {"hive_db, hive_table"},
                //            {"hive_db, hive_process has name"},
                //            {"hive_db as db1, hive_table where db1.name = \"Reporting\""},
                //            {"hive_db where hive_db.name=\"Reporting\" and hive_db.createTime < " + System
                // .currentTimeMillis()},
                { "from hive_table" }, { "hive_table" }, { "hive_table is Dimension" },
                { "hive_column where hive_column isa PII" },
                //            {"hive_column where hive_column isa PII select hive_column.name"},
                { "hive_column select hive_column.name" }, { "hive_column select name" },
                { "hive_column where hive_column.name=\"customer_id\"" },
                { "from hive_table select hive_table.name" }, { "hive_db where (name = \"Reporting\")" },
                { "hive_db where (name = \"Reporting\") select name as _col_0, owner as _col_1" },
                { "hive_db where hive_db has name" },
                //            {"hive_db hive_table"},
                { "hive_db where hive_db has name" },
                //            {"hive_db as db1 hive_table where (db1.name = \"Reporting\")"},
                { "hive_db where (name = \"Reporting\") select name as _col_0, (createTime + 1) as _col_1 " },
                //            {"hive_db where (name = \"Reporting\") and ((createTime + 1) > 0)"},
                //            {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name =
                // \"Reporting\") select db1.name as dbName, tab.name as tabName"},
                //            {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) or (db1.name =
                // \"Reporting\") select db1.name as dbName, tab.name as tabName"},
                //            {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name =
                // \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName"},
                //            {"hive_db as db1 hive_table as tab where ((db1.createTime + 1) > 0) and (db1.name =
                // \"Reporting\") or db1 has owner select db1.name as dbName, tab.name as tabName"},
                // trait searches
                { "Dimension" }, { "Fact" }, { "ETL" }, { "Metric" }, { "PII" }, };
    }

    @Test(enabled = false)
    public void testSearchByDSLQueries(String dslQuery) throws Exception {
        System.out.println("Executing dslQuery = " + dslQuery);
        String jsonResults = discoveryService.searchByDSL(dslQuery, new QueryParams(100, 0));
        assertNotNull(jsonResults);

        JSONObject results = new JSONObject(jsonResults);
        Assert.assertEquals(results.length(), 3);
        System.out.println("results = " + results);

        Object query = results.get("query");
        assertNotNull(query);

        JSONObject dataType = results.getJSONObject("dataType");
        assertNotNull(dataType);
        String typeName = dataType.getString("typeName");
        assertNotNull(typeName);

        JSONArray rows = results.getJSONArray("rows");
        assertNotNull(rows);
        Assert.assertTrue(rows.length() >= 0); // some queries may not have any results
        System.out.println("query [" + dslQuery + "] returned [" + rows.length() + "] rows");
    }

    @Test(enabled = false)
    public void testGetInputsGraphInvalidArguments(final String tableName, String expectedException)
            throws Exception {
        testInvalidArguments(expectedException, new Invoker() {
            @Override
            void run() throws AtlasException {
                lineageService.getInputsGraph(tableName);
            }
        });
    }

    @Test(enabled = false)
    public void testGetInputsGraphForEntityInvalidArguments(final String tableName, String expectedException)
            throws Exception {
        testInvalidArguments(expectedException, new Invoker() {
            @Override
            void run() throws AtlasException {
                lineageService.getInputsGraph(tableName);
            }
        });
    }

    @Test(enabled = false)
    public void testGetInputsGraph() throws Exception {
        JSONObject results = getInputsGraph("sales_fact_monthly_mv");
        assertNotNull(results);
        System.out.println("inputs graph = " + results);

        JSONObject values = results.getJSONObject("values");
        assertNotNull(values);

        final JSONObject vertices = values.getJSONObject("vertices");
        Assert.assertEquals(vertices.length(), 4);

        final JSONObject edges = values.getJSONObject("edges");
        Assert.assertEquals(edges.length(), 4);
    }

    @Test(enabled = false)
    public void testCircularLineage() throws Exception {
        JSONObject results = getInputsGraph("table2");
        assertNotNull(results);
        System.out.println("inputs graph = " + results);

        JSONObject values = results.getJSONObject("values");
        assertNotNull(values);

        final JSONObject vertices = values.getJSONObject("vertices");
        Assert.assertEquals(vertices.length(), 2);

        final JSONObject edges = values.getJSONObject("edges");
        Assert.assertEquals(edges.length(), 4);
    }

    @Test(enabled = false)
    public void testGetInputsGraphForEntity() throws Exception {
        ITypedReferenceableInstance entity = repository.getEntityDefinition(HIVE_TABLE_TYPE, "name",
                "sales_fact_monthly_mv");

        JSONObject results = new JSONObject(lineageService.getInputsGraphForEntity(entity.getId()._getId()));
        assertNotNull(results);
        System.out.println("inputs graph = " + results);

        JSONObject values = results.getJSONObject("values");
        assertNotNull(values);

        final JSONObject vertices = values.getJSONObject("vertices");
        Assert.assertEquals(vertices.length(), 4);

        final JSONObject edges = values.getJSONObject("edges");
        Assert.assertEquals(edges.length(), 4);
    }

    @Test(enabled = false)
    public void testGetOutputsGraphInvalidArguments(final String tableName, String expectedException)
            throws Exception {
        testInvalidArguments(expectedException, new Invoker() {
            @Override
            void run() throws AtlasException {
                lineageService.getOutputsGraph(tableName);
            }
        });
    }

    @Test(enabled = false)
    public void testGetOutputsGraphForEntityInvalidArguments(final String tableId, String expectedException)
            throws Exception {
        testInvalidArguments(expectedException, new Invoker() {
            @Override
            void run() throws AtlasException {
                lineageService.getOutputsGraphForEntity(tableId);
            }
        });
    }

    @Test(enabled = false)
    public void testGetOutputsGraph() throws Exception {
        JSONObject results = getOutputsGraph("sales_fact");
        assertNotNull(results);
        System.out.println("outputs graph = " + results);

        JSONObject values = results.getJSONObject("values");
        assertNotNull(values);

        final JSONObject vertices = values.getJSONObject("vertices");
        Assert.assertEquals(vertices.length(), 3);

        final JSONObject edges = values.getJSONObject("edges");
        Assert.assertEquals(edges.length(), 4);
    }

    @Test(enabled = false)
    public void testGetOutputsGraphForEntity() throws Exception {
        ITypedReferenceableInstance entity = repository.getEntityDefinition(HIVE_TABLE_TYPE, "name", "sales_fact");

        JSONObject results = new JSONObject(lineageService.getOutputsGraphForEntity(entity.getId()._getId()));
        assertNotNull(results);
        System.out.println("outputs graph = " + results);

        JSONObject values = results.getJSONObject("values");
        assertNotNull(values);

        final JSONObject vertices = values.getJSONObject("vertices");
        Assert.assertEquals(vertices.length(), 3);

        final JSONObject edges = values.getJSONObject("edges");
        Assert.assertEquals(edges.length(), 4);
    }

    @DataProvider(name = "tableNamesProvider")
    private Object[][] tableNames() {
        return new String[][] { { "sales_fact", "4" }, { "time_dim", "3" }, { "sales_fact_daily_mv", "4" },
                { "sales_fact_monthly_mv", "4" } };
    }

    @Test(enabled = false)
    public void testGetSchema(String tableName, String expected) throws Exception {
        JSONObject results = getSchema(tableName);
        assertNotNull(results);
        System.out.println("columns = " + results);

        JSONArray rows = results.getJSONArray("rows");
        Assert.assertEquals(rows.length(), Integer.parseInt(expected));

        for (int index = 0; index < rows.length(); index++) {
            assertColumn(rows.getJSONObject(index));
        }
    }

    @Test(enabled = false)
    public void testGetSchemaForEntity(String tableName, String expected) throws Exception {
        ITypedReferenceableInstance entity = repository.getEntityDefinition(HIVE_TABLE_TYPE, "name", tableName);

        JSONObject results = new JSONObject(lineageService.getSchemaForEntity(entity.getId()._getId()));
        assertNotNull(results);
        System.out.println("columns = " + results);

        JSONArray rows = results.getJSONArray("rows");
        Assert.assertEquals(rows.length(), Integer.parseInt(expected));

        for (int index = 0; index < rows.length(); index++) {
            assertColumn(rows.getJSONObject(index));
        }
    }

    private void assertColumn(JSONObject jsonObject) throws JSONException {
        assertNotNull(jsonObject.getString("name"));
        assertNotNull(jsonObject.getString("comment"));
        assertNotNull(jsonObject.getString("dataType"));
        Assert.assertEquals(jsonObject.getString("$typeName$"), "hive_column");
    }

    @Test(enabled = false)
    public void testGetSchemaForDBEntity() throws Exception {
        String dbId = getEntityId(DATASET_SUBTYPE, "name", "dataSetSubTypeInst1");
        JSONObject results = new JSONObject(lineageService.getSchemaForEntity(dbId));
    }

    @DataProvider(name = "invalidArgumentsProvider")
    private Object[][] arguments() {
        return new String[][] { { null, IllegalArgumentException.class.getName() },
                { "", IllegalArgumentException.class.getName() },
                { "blah", EntityNotFoundException.class.getName() } };
    }

    abstract class Invoker {
        abstract void run() throws AtlasException;
    }

    public void testInvalidArguments(String expectedException, Invoker invoker) throws Exception {
        try {
            invoker.run();
            fail("Expected " + expectedException);
        } catch (Exception e) {
            assertEquals(e.getClass().getName(), expectedException);
        }
    }

    @Test(enabled = false)
    public void testGetSchemaInvalidArguments(final String tableName, String expectedException) throws Exception {
        testInvalidArguments(expectedException, new Invoker() {
            @Override
            void run() throws AtlasException {
                lineageService.getSchema(tableName);
            }
        });
    }

    @Test(enabled = false)
    public void testGetSchemaForEntityInvalidArguments(final String entityId, String expectedException)
            throws Exception {
        testInvalidArguments(expectedException, new Invoker() {
            @Override
            void run() throws AtlasException {
                lineageService.getSchemaForEntity(entityId);
            }
        });
    }

    private JSONObject getSchema(String tableName) throws Exception {
        return new JSONObject(lineageService.getSchema("qualified:" + tableName));
    }

    private JSONObject getInputsGraph(String tableName) throws Exception {
        return new JSONObject(lineageService.getInputsGraph("qualified:" + tableName));
    }

    private JSONObject getOutputsGraph(String tableName) throws Exception {
        return new JSONObject(lineageService.getOutputsGraph("qualified:" + tableName));
    }

    @Test(enabled = false)
    public void testLineageWithDelete() throws Exception {
        String tableName = "table" + random();
        createTable(tableName, 3, true);
        String tableId = getEntityId(HIVE_TABLE_TYPE, "name", tableName);

        JSONObject results = getSchema(tableName);
        assertEquals(results.getJSONArray("rows").length(), 3);

        results = getInputsGraph(tableName);
        Struct resultInstance = InstanceSerialization.fromJsonStruct(results.toString(), true);
        Map<String, Struct> vertices = (Map) resultInstance.get("vertices");
        assertEquals(vertices.size(), 2);
        Struct vertex = vertices.get(tableId);
        assertEquals(((Struct) vertex.get("vertexId")).get("state"), Id.EntityState.ACTIVE.name());

        results = getOutputsGraph(tableName);
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 2);

        results = new JSONObject(lineageService.getSchemaForEntity(tableId));
        assertEquals(results.getJSONArray("rows").length(), 3);

        results = new JSONObject(lineageService.getInputsGraphForEntity(tableId));
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 2);

        results = new JSONObject(lineageService.getOutputsGraphForEntity(tableId));
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 2);

        //Delete the entity. Lineage for entity returns the same results as before.
        //Lineage for table name throws EntityNotFoundException
        AtlasClient.EntityResult deleteResult = repository.deleteEntities(Arrays.asList(tableId));
        assertTrue(deleteResult.getDeletedEntities().contains(tableId));

        results = new JSONObject(lineageService.getSchemaForEntity(tableId));
        assertEquals(results.getJSONArray("rows").length(), 3);

        results = new JSONObject(lineageService.getInputsGraphForEntity(tableId));
        resultInstance = InstanceSerialization.fromJsonStruct(results.toString(), true);
        vertices = (Map) resultInstance.get("vertices");
        assertEquals(vertices.size(), 2);
        vertex = vertices.get(tableId);
        assertEquals(((Struct) vertex.get("vertexId")).get("state"), Id.EntityState.DELETED.name());

        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 2);

        results = new JSONObject(lineageService.getOutputsGraphForEntity(tableId));
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 2);

        try {
            getSchema(tableName);
            fail("Expected EntityNotFoundException");
        } catch (EntityNotFoundException e) {
            //expected
        }

        try {
            getInputsGraph(tableName);
            fail("Expected EntityNotFoundException");
        } catch (EntityNotFoundException e) {
            //expected
        }

        try {
            getOutputsGraph(tableName);
            fail("Expected EntityNotFoundException");
        } catch (EntityNotFoundException e) {
            //expected
        }

        //Create table again should show new lineage
        createTable(tableName, 2, false);
        results = getSchema(tableName);
        assertEquals(results.getJSONArray("rows").length(), 2);

        results = getOutputsGraph(tableName);
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 0);

        results = getInputsGraph(tableName);
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 0);

        tableId = getEntityId(HIVE_TABLE_TYPE, "name", tableName);

        results = new JSONObject(lineageService.getSchemaForEntity(tableId));
        assertEquals(results.getJSONArray("rows").length(), 2);

        results = new JSONObject(lineageService.getInputsGraphForEntity(tableId));
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 0);

        results = new JSONObject(lineageService.getOutputsGraphForEntity(tableId));
        assertEquals(results.getJSONObject("values").getJSONObject("vertices").length(), 0);
    }

    private void createTable(String tableName, int numCols, boolean createLineage) throws Exception {
        String dbId = getEntityId(DATABASE_TYPE, "name", "Sales");
        Id salesDB = new Id(dbId, 0, DATABASE_TYPE);

        //Create the entity again and schema should return the new schema
        List<Referenceable> columns = new ArrayStack();
        for (int i = 0; i < numCols; i++) {
            columns.add(column("col" + random(), "int", "column descr"));
        }

        Referenceable sd = storageDescriptor("hdfs://host:8000/apps/warehouse/sales", "TextInputFormat",
                "TextOutputFormat", true, ImmutableList.of(column("time_id", "int", "time id")));

        Id table = table(tableName, "test table", salesDB, sd, "fetl", "External", columns);
        if (createLineage) {
            Id inTable = table("table" + random(), "test table", salesDB, sd, "fetl", "External", columns);
            Id outTable = table("table" + random(), "test table", salesDB, sd, "fetl", "External", columns);
            loadProcess("process" + random(), "hive query for monthly summary", "Tim ETL",
                    ImmutableList.of(inTable), ImmutableList.of(table), "create table as select ", "plan", "id",
                    "graph", "ETL");
            loadProcess("process" + random(), "hive query for monthly summary", "Tim ETL", ImmutableList.of(table),
                    ImmutableList.of(outTable), "create table as select ", "plan", "id", "graph", "ETL");
        }
    }

    private String random() {
        return RandomStringUtils.randomAlphanumeric(5);
    }

    private String getEntityId(String typeName, String attributeName, String attributeValue) throws Exception {
        return repository.getEntityDefinition(typeName, attributeName, attributeValue).getId()._getId();
    }
}