Java tutorial
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.pooledtimeseries.healthcheck; import java.io.IOException; import java.util.Iterator; import java.util.List; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.SequenceFileInputFormat; import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; import org.pooledtimeseries.FeatureVector; import org.pooledtimeseries.cartesian.CartesianInputFormat; import org.pooledtimeseries.seqfile.TextVectorsToSequenceFile; import org.pooledtimeseries.util.PoTSerialiser; import org.pooledtimeseries.util.ReadSeqFileUtil; /** * Program for verifying Sequence File generated by {@link TextVectorsToSequenceFile}<br/> * If SeqFile is correct logs for this job will have printed correct keys and Size<br/> * Output of this job will have 2 records- <br/> * - Number of pairs with similar key * - Number of pairs with different keys */ public class CheckCartesianProductSeqFile { public static class CartesianMapper extends MapReduceBase implements Mapper<Text, BytesWritable, Text, IntWritable> { private Text simkey = new Text("simkey"); private Text diskey = new Text("diskey"); private static final IntWritable one = new IntWritable(1); public void map(Text key, BytesWritable value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException { // System.out.println(value); System.out.println(key); System.out.println(""); System.out.println("Size- " + ((List<FeatureVector>) PoTSerialiser.getObject(value.getBytes())).size()); System.out.println(); // If the two values are equal add one to output String[] files = ReadSeqFileUtil.getFileNames(key); if (files[0].equals(files[1])) { output.collect(simkey, one); } else { output.collect(diskey, one); } } } public static class CartesianReducer extends MapReduceBase implements Reducer<Text, IntWritable, Text, Text> { private Text outputVal = new Text(); public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, Text> output, Reporter reporter) throws IOException { int sum = 0; while (values.hasNext()) { sum += values.next().get(); } outputVal.set("" + sum); output.collect(key, outputVal); } } public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { long start = System.currentTimeMillis(); JobConf conf = new JobConf("Cartesian Product"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>"); System.exit(1); } // Configure the join type conf.setJarByClass(CheckCartesianProductSeqFile.class); conf.setMapperClass(CartesianMapper.class); conf.setReducerClass(CartesianReducer.class); conf.setInputFormat(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]); CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]); TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { Thread.sleep(1000); } long finish = System.currentTimeMillis(); System.out.println("Time in ms: " + (finish - start)); System.exit(job.isSuccessful() ? 0 : 2); } }