com.wipro.ats.bdre.dq.DQMapper.java Source code

Java tutorial

Introduction

Here is the source code for com.wipro.ats.bdre.dq.DQMapper.java

Source

/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.wipro.ats.bdre.dq;

import com.wipro.ats.bdre.IMConfig;
import com.wipro.ats.bdre.md.api.GetProperties;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs;
import org.apache.log4j.Logger;
import org.drools.KnowledgeBase;
import org.drools.builder.KnowledgeBuilder;
import org.drools.builder.KnowledgeBuilderFactory;
import org.drools.builder.ResourceType;
import org.drools.io.impl.UrlResource;
import org.drools.runtime.StatefulKnowledgeSession;

import java.io.IOException;
import java.util.Properties;

/**
 * @author Satish Kumar
 *         <p/>
 *         Mapper Class create the Drool knowledgeBase and Validate the each record and
 *         Emits the record as GOOD or BAD based on rules defined in drool.
 */
public class DQMapper extends Mapper<LongWritable, Text, Text, Text> {

    private static final Logger LOGGER = Logger.getLogger(DQMapper.class);
    Text mOutputKey = new Text();
    Text mOutputValue = new Text();
    private int goodRecords;
    private int badRecords;
    private MultipleOutputs<Text, NullWritable> mos;
    private Properties props = null;
    private GetProperties getProperties = new GetProperties();
    private KnowledgeBase knowledgeBase = null;

    @Override
    public void setup(org.apache.hadoop.mapreduce.Mapper.Context context) throws IOException, InterruptedException {
        LOGGER.info("START :: DQMapper.setup(Context context)");
        Configuration conf = context.getConfiguration();
        props = getProperties.getProperties(conf.get("dq.process.id"), "dq");
        knowledgeBase = buildKnowledgeBase();
        LOGGER.debug("The Value of props is" + props.toString() + "\n package name is"
                + props.getProperty("rules.package"));
        mos = new MultipleOutputs<Text, NullWritable>(context);
        goodRecords = 0;
        badRecords = 0;
    }

    private KnowledgeBase buildKnowledgeBase() {
        KnowledgeBuilder kbuilder = KnowledgeBuilderFactory.newKnowledgeBuilder();
        UrlResource urlResource = new UrlResource(
                IMConfig.getProperty("dq.drools-url-prefix") + props.getProperty("rules.package") + "/binary");
        LOGGER.info("urlResource=" + urlResource);
        urlResource.setBasicAuthentication("enabled");
        LOGGER.info("props = " + props);
        urlResource.setUsername(props.getProperty("rules.username"));
        urlResource.setPassword(props.getProperty("rules.password"));
        kbuilder.add(urlResource, ResourceType.PKG);
        return kbuilder.newKnowledgeBase();
    }

    private DQDataModel validateRecord(String record) {
        StatefulKnowledgeSession session = knowledgeBase.newStatefulKnowledgeSession();
        try {
            DQDataModel dataModel = new DQDataModel(record, props.getProperty("file.delimiter.regex"));
            session.insert(dataModel);
            session.fireAllRules();
            return dataModel;
        } finally {
            session.dispose();
        }
    }

    @Override
    public void map(LongWritable key, Text value, org.apache.hadoop.mapreduce.Mapper.Context context)
            throws IOException, InterruptedException {
        DQDataModel dqDataModel = validateRecord(value.toString());
        LOGGER.trace("map() :: " + value.toString() + " = " + dqDataModel.getmInvalidRecordMessage());
        if (dqDataModel.isValidRecord()) {
            goodRecords++;
            mOutputKey.set(dqDataModel.getmRecord());
            mos.write(DQConstants.GOOD_RECORDS_FILE, mOutputKey, NullWritable.get(),
                    DQConstants.INTERMEDIATE_GOOD_RECORD_OUTPUT_DIR);
        } else {
            badRecords++;
            mOutputKey.set(dqDataModel.getmInvalidRecordMessage());
            mOutputValue.set(value.toString());
            mos.write(DQConstants.BAD_RECORDS_FILE, mOutputKey, mOutputValue,
                    DQConstants.INTERMEDIATE_BAD_RECORD_OUTPUT_DIR);
        }
    }

    @Override
    public void cleanup(org.apache.hadoop.mapreduce.Mapper.Context context)
            throws IOException, InterruptedException {
        try {
            mos.write(DQConstants.FILE_REPORT_FILE, new Text(DQConstants.GOOD_RECORDS_FILE + " : " + goodRecords),
                    NullWritable.get(), DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR);
            mos.write(DQConstants.FILE_REPORT_FILE, new Text(DQConstants.BAD_RECORDS_FILE + " : " + badRecords),
                    NullWritable.get(), DQConstants.INTERMEDIATE_REPORT_OUTPUT_DIR);
        } catch (Exception e) {
            LOGGER.info(e);
            LOGGER.info("cleanup : " + e.toString());
        } finally {
            mos.close();
        }
    }

}