planattributescleaning.PlanAttributesCleaning.java Source code

Introduction

Here is the source code for planattributescleaning.PlanAttributesCleaning.java
Source

/*
 * To change this license header, choose License Headers in Project Properties.
 * To change this template file, choose Tools | Templates
 * and open the template in the editor.
 */
package planattributescleaning;

import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
//import org.apache.hadoop.mapred.FileInputFormat;
//import org.apache.hadoop.mapred.FileOutputFormat;
//import org.apache.hadoop.mapred.JobClient;
//import org.apache.hadoop.mapred.JobConf;
//import org.apache.hadoop.mapred.MapReduceBase;
//import org.apache.hadoop.mapred.Mapper;
//import org.apache.hadoop.mapred.OutputCollector;
//import org.apache.hadoop.mapred.Reducer;
//import org.apache.hadoop.mapred.Reporter;
//import org.apache.hadoop.mapred.TextOutputFormat;
//import org.apache.hadoop.mapred.lib.MultipleOutputs;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 *
 * @author Fei
 */
public class PlanAttributesCleaning {

    /**
     * @param args the command line arguments
     */
    /*
    public static class Events_Mapper1  extends MapReduceBase
    implements Mapper<Object, Text, Text, NullWritable> {
      private MultipleOutputs mos;
       // private final static SimpleDateFormat frmt = new SimpleDateFormat("'['dd/MMM/yyyy:HH:mm:ss");
    @Override
    public void configure(JobConf conf) {
        mos = new MultipleOutputs(conf);
    }
        
    @Override
    public void map(Object key, Text values, OutputCollector<Text, NullWritable> oc, Reporter reporter) throws IOException {
            
            String [] wordList = values.toString().split(",");
            try{
            String id = wordList[114].substring(0, 13);
            if(!wordList[43].equalsIgnoreCase("Yes")){
                    
                   
                //for separated plan, get the planid, businessyear, deductiables, moop, combined, inntier1, and outnet, and copaytier1.
                //tier2 doesn't matter and rare in us medical plan system
                if(!wordList[12].isEmpty()||!wordList[16].isEmpty()||!wordList[21].isEmpty()){
                    mos.getCollector("Attributes","separated", reporter).collect(new Text(id+wordList[12]+wordList[16]
                            +wordList[17]+wordList[21]
                    +wordList[22]+wordList[26]+wordList[30]+wordList[34]+wordList[38]+wordList[42]+wordList[69]+wordList[73]+
                            wordList[74]+wordList[78]+wordList[79]+wordList[83]+wordList[87]+wordList[91]+wordList[95]+wordList[99]+wordList[103]), NullWritable.get());
                }
                    
                if(!wordList[139].isEmpty()||!wordList[143].isEmpty()||!wordList[148].isEmpty()){
                    mos.getCollector("Attributes","combined", reporter).collect(new Text(id+wordList[139]
                            +wordList[143]+wordList[144]+wordList[148]+wordList[149]+wordList[153]+wordList[157]+wordList[161]
                    +wordList[165]+wordList[169]+wordList[103]), NullWritable.get());
                }
            }
                if(wordList[43].equalsIgnoreCase("yes")){
                    mos.getCollector("Attributes","dental",reporter).collect(new Text(id+wordList[69]+wordList[73]+
                            wordList[74]+wordList[78]+wordList[79]+wordList[83]+wordList[87]+wordList[91]+wordList[95]+wordList[99]+wordList[103]+wordList[63]), NullWritable.get());
                }
                        
            }
                
                
        catch(Exception e){
    }           
    }
        
    @Override
    public void close() throws IOException {
        mos.close(); //To change body of generated methods, choose Tools | Templates.
    }
        
        
        
    }
        
        
    public static class Events_Reducer1 extends MapReduceBase implements Reducer<Text, NullWritable, Text, NullWritable> {
           
         
      @Override
    public void reduce(Text key, Iterator<NullWritable> values, OutputCollector<Text, NullWritable> oc, Reporter rprtr) throws IOException{
            
        oc.collect(key, NullWritable.get());
    }
        
        
        
           
        
    }
        
    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
       JobConf conf = new JobConf(PlanAttributesCleaning.class);
    conf.setJobName("AttributesCleaning");
        
        
    conf.setMapperClass(Events_Mapper1.class);
        
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(NullWritable.class);
    conf.setNumReduceTasks(0);
    MultipleOutputs.addNamedOutput(conf, "Attributes", TextOutputFormat.class, Text.class, NullWritable.class);
        
    //job.setNumReduceTasks(0);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(NullWritable.class);
    FileInputFormat.addInputPath(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    //        boolean success = job.waitForCompletion(true);
    //        Configuration conf1 = new Configuration();
    //        Job job1 = new Job(conf1,"count total events happend one day in each country");
    //        job1.setJarByClass(PlanAttributesCleaning.class);
    //        if(success){
    //            
    //        }
    //        job.setReducerClass(Events_Reducer1.class);
    JobClient.runJob(conf);
        
    }
        
    }
        
    */
    public static class Events_Mapper1 extends Mapper<Object, Text, Text, NullWritable> {

        @Override
        public void map(Object key, Text values, Context context) throws IOException, InterruptedException {
            try {
                String[] wordList = values.toString().split(",(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)", -1);
                String id = wordList[114].substring(0, 14);
                int i = 0;
                if (wordList[103].equalsIgnoreCase("low") || wordList[103].equalsIgnoreCase("high"))//dental
                {

                    context.write(
                            new Text(String.valueOf(i) + id + "," + wordList[69] + "," + wordList[73] + ","
                                    + wordList[74] + "," + wordList[78] + "," + wordList[87] + "," + wordList[91]
                                    + "," + wordList[99] + "," + wordList[103] + "," + wordList[63]),
                            NullWritable.get());
                } else if (!wordList[0].equalsIgnoreCase("AVCalculatorOutputNumber")) {
                    if (!wordList[139].isEmpty() || !wordList[143].isEmpty() || !wordList[144].isEmpty()
                            || !wordList[148].isEmpty() || !wordList[157].isEmpty() || !wordList[161].isEmpty()
                            || !wordList[169].isEmpty()) {
                        i = 1;
                        context.write(
                                new Text(String.valueOf(i) + id + "," + wordList[139] + "," + wordList[143] + ","
                                        + wordList[144] + "," + wordList[148] + "," + wordList[157] + ","
                                        + wordList[161] + "," + wordList[169] + "," + wordList[103]),
                                NullWritable.get());
                    }
                    if (!wordList[12].isEmpty() || !wordList[16].isEmpty() || !wordList[17].isEmpty()
                            || !wordList[21].isEmpty() || !wordList[30].isEmpty() || !wordList[34].isEmpty()
                            || !wordList[42].isEmpty()) {
                        i = 2;
                        context.write(
                                new Text(String.valueOf(i) + id + "," + wordList[12] + "," + wordList[16] + ","
                                        + wordList[17] + "," + wordList[21] + "," + wordList[30] + ","
                                        + wordList[34] + "," + wordList[42] + "," + wordList[103]),
                                NullWritable.get());

                    }
                    if (!wordList[69].isEmpty() || !wordList[73].isEmpty() || !wordList[74].isEmpty()
                            || !wordList[78].isEmpty() || !wordList[87].isEmpty() || !wordList[91].isEmpty()
                            || !wordList[99].isEmpty())//dental
                    {
                        i = 3;

                        context.write(
                                new Text(String.valueOf(i) + id + "," + wordList[69] + "," + wordList[73] + ","
                                        + wordList[74] + "," + wordList[78] + "," + wordList[87] + ","
                                        + wordList[91] + "," + wordList[99] + "," + wordList[103]),
                                NullWritable.get());
                    }

                }

            } catch (Exception e) {
            }
        }

    }

    public static class GroupPartitioner extends Partitioner<Text, NullWritable> {

        @Override
        public int getPartition(Text key, NullWritable value, int i) {
            return Integer.parseInt(key.toString().substring(0, 1)) % i;
        }
    }

    public static class Events_Reducer1
            extends org.apache.hadoop.mapreduce.Reducer<Text, NullWritable, Text, NullWritable> {

        @Override
        protected void reduce(Text key, Iterable<NullWritable> values, Context context)
                throws IOException, InterruptedException {

            context.write(new Text(key.toString().substring(1)), NullWritable.get());
        }

    }

    public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
        Configuration conf = new Configuration();
        Job job = new Job(conf, "attributes cleaning");
        job.setJarByClass(PlanAttributesCleaning.class);

        job.setMapperClass(Events_Mapper1.class);

        job.setReducerClass(Events_Reducer1.class);
        job.setPartitionerClass(GroupPartitioner.class);

        job.setNumReduceTasks(4);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NullWritable.class);
        org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));
        org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);

    }

}