com.cloudera.navigator.navigator_partner.extraction.HiveMetadataExtraction.java Source code

Introduction

Here is the source code for com.cloudera.navigator.navigator_partner.extraction.HiveMetadataExtraction.java
Source

/*
 * Copyright (c) 2016 Cloudera, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.cloudera.navigator.navigator_partner.extraction;

import com.cloudera.nav.sdk.client.MetadataExtractor;
import com.cloudera.nav.sdk.client.MetadataResultSet;
import com.cloudera.nav.sdk.client.NavApiCient;
import com.cloudera.nav.sdk.client.NavigatorPlugin;
import com.cloudera.nav.sdk.client.QueryUtils;
import com.cloudera.nav.sdk.client.writer.ResultSet;
import com.cloudera.nav.sdk.model.Source;
import com.cloudera.nav.sdk.model.SourceType;
import com.cloudera.nav.sdk.model.entities.HiveTable;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
 * This is a sample program that runs different metadata extractions and 
 * updates on Hive entities. It shows how to create tags and custom properties.
 *
 * Program arguments:
 * 1. path to config file: see examples/src/main/resources/sample.conf
 * 2. output path: where to write the extracted marker for next run
 *
 */
public class HiveMetadataExtraction {

    public static void main(String[] args) throws IOException {
        // handle arguments
        Preconditions.checkArgument(args.length >= 2);
        String configFilePath = args[0];
        String markerPath = args[1];
        String marker = args.length > 2 ? HiveMetadataExtraction.readFileArg(args[2]) : null;

        NavigatorPlugin navPlugin = NavigatorPlugin.fromConfigFile(configFilePath);
        NavApiCient client = navPlugin.getClient();

        MetadataExtractor extractor = new MetadataExtractor(client, null);
        addCustomTags(navPlugin, extractor, marker);

        // Run filtered examples
        getHive(extractor, marker, "salary");

    }

    /**
     * How to retrieve various Hive entities. 
     * Comments describe how to do the equivalent functions using the REST APIs. 
     * 
     * @param NavApiCient - Navigator client used to communicate with Navigator service
     * @param extractor - used to extract the data
     * @param marker - is set to null, but is a string cursor return by the server when extraction occurs
     */
    public static void getHive(MetadataExtractor extractor, String marker, String colName) {

        // REST API:
        //curl 'http://fkader-nav-1.vpc.cloudera.com:7187/api/v9/entities?query=((type:DATABASE)AND(sourceType:HIVE))' -u admin:admin -X GET  
        Iterable<Map<String, Object>> hiveDb = extractor
                .extractMetadata(marker, null, "sourceType:HIVE AND type:DATABASE", null).getEntities();
        getFirstResult(hiveDb);

        // REST API:
        //curl 'http://fkader-nav-1.vpc.cloudera.com:7187/api/v9/entities?query=((type:TABLE)AND(sourceType:HIVE))' -u admin:admin -X GET      
        MetadataResultSet hiveTableExtract = extractor.extractMetadata(marker, null,
                "sourceType:HIVE AND type:TABLE", null);

        Iterable<Map<String, Object>> hiveTable = hiveTableExtract.getEntities();
        getFirstResult(hiveTable);

        // REST API:
        //curl 'http://fkader-nav-1.vpc.cloudera.com:7187/api/v9/entities?query=((type:VIEW)AND(sourceType:HIVE))' -u admin:admin -X GET      
        Iterable<Map<String, Object>> hiveView = extractor
                .extractMetadata(marker, null, "sourceType:HIVE AND type:VIEW", null).getEntities();
        getFirstResult(hiveView);

        // REST API:
        //curl 'http://fkader-nav-1.vpc.cloudera.com:7187/api/v9/entities?query=((type:FIELD)AND(sourceType:HIVE)AND(originalName:xxcolNamexx))' -u admin:admin -X GET      
        Iterable<Map<String, Object>> hiveColumn = extractor.extractMetadata(marker, null,
                "sourceType:HIVE AND type:FIELD " + "AND originalName:" + colName, null).getEntities();
        getFirstResult(hiveColumn);

        //get the marker to save for next run (will use this as the starting point)
        String hiveExtractMarker = hiveTableExtract.getMarker();

        //run the same query again and will see that no data because no changes since run
        MetadataResultSet hiveTableExtractAgain = extractor.extractMetadata(hiveExtractMarker, null,
                "sourceType:HIVE AND type:TABLE", null);

        Iterable<Map<String, Object>> hiveTableAgain = hiveTableExtractAgain.getEntities();
        System.out.println("query the data again - should return no results");
        getFirstResult(hiveTableAgain);

    }

    /**
     * Create custom metadata as tags and key:value properties for Hive entries. 
     * Comments describe how to do the equivalent functions using the REST APIs. 
     * 
     * @param navPlugin - Navigator Plugin to communicate with Navigator service
     * @param extractor - used to extract the data
     * @param marker - is set to null, but is a string cursor return by the server when extraction occurs
     */
    public static void addCustomTags(NavigatorPlugin navPlugin, MetadataExtractor extractor, String marker) {

        //REST API - see HDFSMetadataExtraction for example.. 
        //add tags to the cart_items table
        Iterable<Map<String, Object>> sampleTable = extractor
                .extractMetadata(marker, null, "sourceType:HIVE AND type:TABLE AND originalName:cart_items", null)
                .getEntities();

        Iterator<Map<String, Object>> iterSampleTable = sampleTable.iterator();

        if (iterSampleTable.hasNext()) {
            Map<String, Object> result = iterSampleTable.next();

            HiveTable modifiedSampleTable = new HiveTable();
            modifiedSampleTable.setSourceId(result.get("sourceId").toString());
            modifiedSampleTable.setDatabaseName("default");
            modifiedSampleTable.setTableName(result.get("originalName").toString());
            modifiedSampleTable.addTags("sampletabletag1");

            //key:value properties tag
            Map<String, String> props = new HashMap<String, String>();
            props.put("sampleKeyProp", "sampleValueProp");
            props.put("creator", "test");
            modifiedSampleTable.addProperties(props);
            ResultSet results = navPlugin.write(modifiedSampleTable);
            if (results.hasErrors()) {
                throw new RuntimeException(results.toString());
            }
            System.out.println("successfully updated table");
        }

    }

    private static void getFirstResult(Iterable<Map<String, Object>> iterable) {
        // In real usage, iterate through results and process each metadata object
        Iterator<Map<String, Object>> iterator = iterable.iterator();
        if (iterator.hasNext()) {
            Map<String, Object> result = iterator.next();
            System.out.println("source: " + result.get("sourceType") + "  type: " + result.get("type")
                    + " originalName " + result.get("originalName") + " file location "
                    + result.get("fileSystemPath") + " parent path " + result.get("parentPath"));
        } else {
            System.out.println("no elements found");
        }
    }

    static String readFileArg(String path) throws IOException {
        try (BufferedReader reader = new BufferedReader(new FileReader(path))) {
            return reader.readLine();
        }
    }

}