Example usage for org.apache.hadoop.mapreduce Job setJobName

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setJobName.

Prototype

public void setJobName(String name) throws IllegalStateException

Source Link

Document

Set the user-specified job name.

Usage

From source file:goraci.Verify.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("Usage : " + Verify.class.getSimpleName() + " <output dir> <num reducers>");
        return 0;
    }//from  w ww  .  j a v a  2s .co  m

    DataStore<Long, CINode> store = DataStoreFactory.getDataStore(Long.class, CINode.class,
            new Configuration());

    String outputDir = args[0];
    int numReducers = Integer.parseInt(args[1]);

    Job job = new Job(getConf());

    if (!job.getConfiguration().get("io.serializations")
            .contains("org.apache.hadoop.io.serializer.JavaSerialization")) {
        job.getConfiguration().set("io.serializations", job.getConfiguration().get("io.serializations")
                + ",org.apache.hadoop.io.serializer.JavaSerialization");
    }

    job.setJobName("Link Verifier");
    job.setNumReduceTasks(numReducers);
    job.setJarByClass(getClass());

    Query<Long, CINode> query = store.newQuery();
    query.setFields("prev");

    GoraMapper.initMapperJob(job, query, store, LongWritable.class, VLongWritable.class, VerifyMapper.class,
            true);

    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);

    job.setReducerClass(VerifyReducer.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, new Path(outputDir));

    boolean success = job.waitForCompletion(true);

    store.close();

    return success ? 0 : 1;
}

From source file:gov.nasa.jpl.memex.pooledtimeseries.MeanChiSquareDistanceCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(96);//  ww  w.j av  a  2  s  .c  o  m
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    job.setJarByClass(MeanChiSquareDistanceCalculation.class);

    job.setJobName("mean_chi_square_calculation");
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Track:" + baseConf.get("mapred.job.tracker"));
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    System.out.println("Caching video-metric-bak.tgz");
    job.addCacheArchive(new URI("/user/pts/video-metric-bak.tgz"));
    URI[] cacheFiles = job.getCacheFiles();
    if (cacheFiles != null && cacheFiles.length > 0) {
        System.out.println("Cache file ->" + cacheFiles[0]);
    }
    System.out.println("Cached video-metric-bak.tgz");

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(DoubleWritable.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(DoubleWritable.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.waitForCompletion(true);

}

From source file:gov.nasa.jpl.memex.pooledtimeseries.SimilarityCalculation.java

License:Apache License

public static void main(String[] args) throws Exception {
    System.loadLibrary(Core.NATIVE_LIBRARY_NAME);

    Configuration baseConf = new Configuration();
    baseConf.set("mapreduce.job.maps", "96");
    baseConf.set("mapreduce.job.reduces", "0");
    baseConf.set("mapred.tasktracker.map.tasks.maximum", "96");
    baseConf.set("meanDistsFilePath", args[2]);

    JobConf conf = new JobConf();
    System.out.println("Before Map:" + conf.getNumMapTasks());
    conf.setNumMapTasks(196);/*from w ww .j  av  a  2 s  .co  m*/
    System.out.println("After Map:" + conf.getNumMapTasks());

    Job job = Job.getInstance(baseConf);
    System.out.println("Track: " + baseConf.get("mapred.job.tracker"));
    System.out.println("Job ID" + job.getJobID());
    System.out.println("Job Name" + job.getJobName());
    System.out.println(baseConf.get("mapreduce.job.maps"));
    job.setJarByClass(SimilarityCalculation.class);

    job.setJobName("similarity_calc");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    job.setMapperClass(Map.class);

    job.waitForCompletion(true);
}

From source file:gr.ntua.h2rdf.inputFormat.Example.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Job job = new Job();
    job.setJobName("nikos");

    // disable speculative execution
    job.setJarByClass(Example.class);

    // Set the table name to separate index rows based on where content is
    // stored//from  ww  w.java 2s  . co m
    job.getConfiguration().set("TextIndexer.library", "spo");
    // Set the number of reducers for the job
    //job.setNumReduceTasks(numReducers);
    // important! xoris ayto to setting, kollane oi reducers!!!!!
    //job.getConfiguration().setInt("io.sort.mb", 20);
    // space delimined string of column families to scan

    job.setReducerClass(SimpleReducer.class);
    // job.setSortComparatorClass(KeyValue.KeyComparator.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setOutputFormatClass(TextOutputFormat.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapperClass(Map.class);
    job.setInputFormatClass(FileTableInputFormat.class);
    //job.setInputFormatClass(HFileInputFormat.class);

    FileOutputFormat.setOutputPath(job, new Path("output3"));

    Scan scan = new Scan();
    scan.setStartRow(Bytes.toBytes("873847660^^"));
    scan.setStopRow(Bytes.toBytes("873847660^^999999999"));
    scan.addFamily(Bytes.toBytes("A"));
    HBaseConfiguration HBconf = new HBaseConfiguration();
    HTable table = new HTable(HBconf, "osp");
    ResultScanner resultScanner = table.getScanner(scan);
    Result result;
    while ((result = resultScanner.next()) != null) {
        System.out.println(result.toString());
        //System.out.println("hjkhjokhftyfyfgufghjghkgghfghfjgfhj");
    }
    //System.out.println("iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii");
    // System.out.println("scan is: " +
    // TableMapReduceUtil.convertScanToString(scan));
    //MyTableMapReduceUtil.addCol("?x", "P0", "spo", "-1496159132", "A", "huihui", job);
    TableMapReduceUtil.newJob();
    //MyTableMapReduceUtil.addRow("?w ?z", "P2", "osp", "982", "982", "A", job);
    //TableMapReduceUtil.addCol("?x", "P0", "spo", "561203963^^", "561203963^^999999999", "A:2086497232", job);
    //TableMapReduceUtil.addRow("?x ?y", "P1", "spo", "947805029^^", "947805029^^999999999", "A", job);
    //TableMapReduceUtil.addRow("?w ?z", "P2", "osp", "893972985^^", "893972985^^999999999", "A", job);
    //TableMapReduceUtil.addRow("?w ?z", "P24", "osp", "9947^^", "9947^^999999999", "A", job);
    MyFileInputFormat.addInputPath(job, new Path("output/BGP1"));
    //MyFileInputFormat.addInputPath(job, new Path("output/BGP0"));
    job.waitForCompletion(true);

}

From source file:gr.ntua.h2rdf.LoadTriples.DistinctIds.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //io.compression.codecs
    Job job = new Job();

    job.setInputFormatClass(TextInputFormat.class);
    Configuration conf = new Configuration();
    Path blockProjection = new Path("blockIds/");
    Path translations = new Path("translations/");
    Path sample = new Path("sample/");
    Path temp = new Path("temp/");
    Path uniqueIds = new Path("uniqueIds/");
    FileSystem fs;//from w w  w.j a va  2 s .co m
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(uniqueIds)) {
            fs.delete(uniqueIds, true);
        }
        if (fs.exists(translations)) {
            fs.delete(translations, true);
        }
        if (fs.exists(blockProjection)) {
            fs.delete(blockProjection, true);
        }
        if (fs.exists(sample)) {
            fs.delete(sample, true);
        }
        if (fs.exists(temp)) {
            fs.delete(temp, true);
        }

        FileOutputFormat.setOutputPath(job, uniqueIds);
        Path inp = new Path(args[0]);
        FileInputFormat.setInputPaths(job, inp);

        double type = 1;
        double datasetSize = 0;
        if (fs.isFile(inp)) {
            datasetSize = fs.getFileStatus(inp).getLen();
        } else if (fs.isDirectory(inp)) {
            FileStatus[] s = fs.listStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        } else {
            FileStatus[] s = fs.globStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        }
        datasetSize = datasetSize * type;
        System.out.println("type: " + type);
        System.out.println("datasetSize: " + datasetSize);
        samplingRate = (double) sampleChunk / (double) datasetSize;
        if (samplingRate >= 0.1) {
            samplingRate = 0.1;
        }
        if (samplingRate <= 0.001) {
            samplingRate = 0.001;
        }
        numReducers = (int) (datasetSize / ReducerChunk);
        if (numReducers == 0)
            numReducers = 1;
        numReducers++;
    } catch (IOException e) {
        e.printStackTrace();
    }

    HBaseAdmin hadmin = new HBaseAdmin(conf);
    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("counter");
    desc.addFamily(family);
    if (!hadmin.tableExists(TABLE_NAME)) {
        hadmin.createTable(desc);
    }

    job.setNumReduceTasks(numReducers);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(ImmutableBytesWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(DistinctIds.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(SamplingPartitioner.class);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");

    //job.setCombinerClass(Combiner.class);
    job.setJobName("Distinct Id Wordcount");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job;

}

From source file:gr.ntua.h2rdf.loadTriples.SortIds.java

License:Apache License

public static Job createSubmittableJob(String[] args, Counters counters, int numReducers) throws IOException {
    //numReducers=52;
    Job job = new Job();
    TABLE_NAME = args[1];/*from   www. j  a  v  a 2s . c  om*/
    Configuration conf = job.getConfiguration();
    long sum = 0, maxCommon = Integer.MAX_VALUE;
    try {
        HTable table = new HTable(HBaseConfiguration.create(), "Counters");

        for (int i = 1; i < numReducers; i++) {
            Get get = new Get(Bytes.toBytes("count." + i));
            get.addColumn(Bytes.toBytes("counter"), new byte[0]);
            Result res = table.get(get);
            if (!res.isEmpty()) {
                long v = Bytes.toLong(res.raw()[0].getValue());
                //long v = counters.findCounter("Countergroup", "count."+i).getValue();
                if (v < maxCommon) {
                    maxCommon = v;
                }
                //conf.setLong("count."+i, v);
                //System.out.println(v);
                sum += v;
            }
        }
        System.out.println("maxCommon: " + maxCommon);
        job.getConfiguration().setLong("count.MaxCommon", maxCommon);
        job.getConfiguration().setInt("count.numReducers", numReducers - 1);
        job.getConfiguration().setInt("count.sum", (int) sum);

        Get get = new Get(Bytes.toBytes("count.chunks"));
        get.addColumn(Bytes.toBytes("counter"), new byte[0]);
        Result res = table.get(get);
        int stringReducers = 0;
        if (!res.isEmpty()) {
            stringReducers = (int) Bytes.toLong(res.raw()[0].getValue());
        }
        //int stringReducers = (int) counters.findCounter("Countergroup", "count.chunks").getValue();
        int intReducers = (int) Math.ceil((double) sum / (double) bucket);
        sum = maxCommon * (numReducers - 1);
        for (int i = 1; i < numReducers; i++) {
            get = new Get(Bytes.toBytes("count." + i));
            get.addColumn(Bytes.toBytes("counter"), new byte[0]);
            res = table.get(get);
            if (!res.isEmpty()) {
                long v = Bytes.toLong(res.raw()[0].getValue());
                //long v = counters.findCounter("Countergroup", "count."+i).getValue();
                job.getConfiguration().setLong("count." + (i - 1), sum);
                //System.out.println("count."+i+" "+sum);
                sum += v - maxCommon;
            }

        }

        System.out
                .println("stringReducers: " + stringReducers + " sum: " + sum + " intReducers: " + intReducers);

        job.getConfiguration().setInt("count.stringReducers", stringReducers);
        job.getConfiguration().setInt("count.intReducers", intReducers);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setNumReduceTasks(stringReducers + intReducers);

        Path out = new Path(args[1]);
        FileSystem fs;
        try {
            fs = FileSystem.get(conf);
            if (fs.exists(out)) {
                fs.delete(out, true);
            }
            if (fs.exists(new Path("temp")))
                fs.delete(new Path("temp"), true);
        } catch (IOException e) {
            e.printStackTrace();
        }
        FileOutputFormat.setOutputPath(job, out);
        FileInputFormat.addInputPath(job, new Path("uniqueIds"));
        FileInputFormat.addInputPath(job, new Path("blockIds"));

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputFormatClass(HFileOutputFormat.class);
        job.setJarByClass(SortIds.class);

        //configure compression
        StringBuilder compressionConfigValue = new StringBuilder();
        compressionConfigValue.append(URLEncoder.encode("1", "UTF-8"));
        compressionConfigValue.append('=');
        compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8"));
        compressionConfigValue.append('&');
        compressionConfigValue.append(URLEncoder.encode("2", "UTF-8"));
        compressionConfigValue.append('=');
        compressionConfigValue.append(URLEncoder.encode(Algorithm.GZ.getName(), "UTF-8"));
        job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
                compressionConfigValue.toString());

        job.getConfiguration().set("mapred.compress.map.output", "true");
        job.getConfiguration().set("mapred.map.output.compression.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setPartitionerClass(TwoTotalOrderPartitioner.class);
        TwoTotalOrderPartitioner.setPartitionFile(job.getConfiguration(),
                new Path("partition/stringIdPartition"));
        //job.setCombinerClass(Combiner.class);
        job.setJobName("SortIds");
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.Translate.java

License:Apache License

public static Job createSubmittableJob(String[] args) throws IOException {

    Job job = new Job();

    Configuration conf = job.getConfiguration();
    FileSystem fs;//from www.j av  a2s .co  m
    int reducers = 0;
    try {
        fs = FileSystem.get(conf);
        FileStatus[] p = fs.listStatus(new Path("blockIds/"));
        reducers = p.length;
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setNumReduceTasks(reducers);

        Path out = new Path("translations");
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
        FileOutputFormat.setOutputPath(job, out);
        FileInputFormat.addInputPath(job, new Path("temp"));

        FileOutputFormat.setCompressOutput(job, true);
        FileOutputFormat.setOutputCompressorClass(job, SnappyCodec.class);

        job.setMapOutputKeyClass(ImmutableBytesWritable.class);
        job.setMapOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputKeyClass(ImmutableBytesWritable.class);
        job.setOutputValueClass(ImmutableBytesWritable.class);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setJarByClass(Translate.class);

        job.setMapperClass(Map.class);
        job.setReducerClass(Reduce.class);

        job.setPartitionerClass(IdPartitioner.class);

        job.setJobName("Translate");
        job.getConfiguration().set("mapred.compress.map.output", "true");
        job.getConfiguration().set("mapred.map.output.compression.codec",
                "org.apache.hadoop.io.compress.SnappyCodec");
        job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
        job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
        job.getConfiguration().setInt("io.sort.mb", 100);
        job.getConfiguration().setInt("io.file.buffer.size", 131072);
        job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);
        job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
        //job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    } catch (IOException e) {
        e.printStackTrace();
    }
    return job;
}

From source file:gr.ntua.h2rdf.loadTriples.TranslateAndImport.java

License:Apache License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //compute sample partitions
    FileSystem fs;//  w w  w  . ja  va  2s  .c  o m
    Configuration conf = new Configuration();
    int collected = 0, chunks = 0;
    try {
        fs = FileSystem.get(conf);
        Path sampleDir = new Path("sample");
        FileStatus[] samples = fs.listStatus(sampleDir);
        TreeSet<String> set = new TreeSet<String>();
        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                //System.out.println(tr[0].toN3());
                set.add(tr[0].toN3());
                set.add(tr[1].toN3());
                set.add(tr[2].toN3());
            }
            in1.close();
            in.close();
        }

        IndexTranslator translator = new IndexTranslator(TABLE_NAME + "_Index");
        HashMap<String, Long> index = translator.translate(set);
        set.clear();
        TreeSet<ImmutableBytesWritable> set1 = new TreeSet<ImmutableBytesWritable>(
                new ImmutableBytesWritable.Comparator());

        for (FileStatus sample : samples) {
            FSDataInputStream in = fs.open(sample.getPath());
            CompressionCodec codec = (CompressionCodec) ReflectionUtils.newInstance(GzipCodec.class, conf);
            CompressionInputStream in1 = codec.createInputStream(in);
            NxParser nxp = new NxParser(in1);
            Iterator<Node[]> it = nxp.iterator();
            while (it.hasNext()) {
                Node[] tr = it.next();
                ByteTriple btr = new ByteTriple(index.get(tr[0].toN3()), index.get(tr[1].toN3()),
                        index.get(tr[2].toN3()));
                set1.add(new ImmutableBytesWritable(btr.getSPOByte()));
                set1.add(new ImmutableBytesWritable(btr.getSOPByte()));
                set1.add(new ImmutableBytesWritable(btr.getOPSByte()));
                set1.add(new ImmutableBytesWritable(btr.getOSPByte()));
                set1.add(new ImmutableBytesWritable(btr.getPOSByte()));
                set1.add(new ImmutableBytesWritable(btr.getPSOByte()));
            }
            in1.close();
            in.close();
        }
        index.clear();

        Path p = new Path("hexastorePartition");
        if (fs.exists(p)) {
            fs.delete(p, true);
        }
        SequenceFile.Writer partitionWriter = SequenceFile.createWriter(fs, conf, p,
                ImmutableBytesWritable.class, NullWritable.class);

        double chunkSize = bucketSampledTriples * DistinctIds.samplingRate;
        System.out.println("chunkSize: " + chunkSize);
        Iterator<ImmutableBytesWritable> it = set1.iterator();
        while (it.hasNext()) {
            ImmutableBytesWritable key = it.next();
            if (collected > chunkSize) {
                partitionWriter.append(key, NullWritable.get());
                //System.out.println(Bytes.toStringBinary(key.get()));
                collected = 0;
                chunks++;
            } else {
                collected++;
            }
        }
        System.out.println("chunks: " + chunks);
        partitionWriter.close();

    } catch (IOException e) {
        e.printStackTrace();
    }

    Job job = new Job();
    job = new Job(conf, "Import Hexastore");

    FileInputFormat.setInputPaths(job, new Path(args[0]));
    Path out = new Path("out");
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(out)) {
            fs.delete(out, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(job, out);

    job.setPartitionerClass(TotalOrderPartitioner.class);
    TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path("hexastorePartition"));
    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(HFileOutputFormat.class);

    StringBuilder compressionConfigValue = new StringBuilder();
    compressionConfigValue.append(URLEncoder.encode("I", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("S", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    compressionConfigValue.append('&');
    compressionConfigValue.append(URLEncoder.encode("T", "UTF-8"));
    compressionConfigValue.append('=');
    compressionConfigValue.append(URLEncoder.encode(Algorithm.SNAPPY.getName(), "UTF-8"));
    job.getConfiguration().set("hbase.hfileoutputformat.families.compression",
            compressionConfigValue.toString());
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",262144);
    //job.getConfiguration().setInt("hbase.mapreduce.hfileoutputformat.blocksize",16384);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);
    job.setJarByClass(TranslateAndImport.class);
    job.setMapperClass(Map.class);
    //job.setReducerClass(HexaStoreHistogramsReduce.class);
    job.setReducerClass(HexaStoreReduce.class);

    job.getConfiguration().set("h2rdf.tableName", TABLE_NAME);
    job.getConfiguration().setInt("mapred.reduce.tasks", chunks + 1);
    //job.setCombinerClass(Combiner.class);
    job.setJobName("Translate Projections");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 268435456);
    //job.getConfiguration().setInt("hbase.hregion.max.filesize", 67108864);
    job.getConfiguration().setInt("hbase.hregion.max.filesize", 33554432);

    return job;

}

From source file:gr.ntua.h2rdf.sampler.TotalOrderPrep.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {

    Job sample_job = new Job();

    // Remember the real input format so the sampling input format can use
    // it under the hood

    sample_job.getConfiguration().setBoolean(ARG_INPUTFORMAT, true);
    sample_job.setInputFormatClass(TextInputFormat.class);

    //sample_job.getConfiguration().set("mapred.fairscheduler.pool", "pool9");
    // Base the sample size on the number of reduce tasks that will be used
    // by the real job, but only use 1 reducer for this job (maps output very
    // little)//w ww.  ja  v  a2s. c  om
    sample_job.setNumReduceTasks(1);

    // Make this job's output a temporary filethe input file for the real job's
    // TotalOrderPartitioner
    Path partition = new Path("partitions/");
    //partition.getFileSystem(job.getConfiguration()).deleteOnExit(partition);

    conf = new Configuration();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
        if (fs.exists(partition)) {
            fs.delete(partition, true);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
    FileOutputFormat.setOutputPath(sample_job, partition);
    FileInputFormat.setInputPaths(sample_job, new Path(args[0]));
    //TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), new Path(partition, "part-r-00000"));
    //job.setPartitionerClass(TotalOrderPartitioner.class);

    // If there's a combiner, turn it into an identity reducer to prevent
    // destruction of keys.

    sample_job.setCombinerClass(Combiner.class);

    sample_job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setMapOutputValueClass(ImmutableBytesWritable.class);
    sample_job.setOutputKeyClass(ImmutableBytesWritable.class);
    sample_job.setOutputValueClass(NullWritable.class);
    sample_job.setPartitionerClass(HashPartitioner.class);
    sample_job.setOutputFormatClass(SequenceFileOutputFormat.class);
    sample_job.setJarByClass(TotalOrderPrep.class);
    sample_job.setMapperClass(Map.class);
    sample_job.setReducerClass(PartitioningReducer.class);
    sample_job.setJobName("(Sampler)");
    sample_job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    sample_job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    return sample_job;

}

From source file:hadoop.CountMinorKeys.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {

    @SuppressWarnings("deprecation")
    Job job = new Job(getConf());
    job.setJarByClass(CountMinorKeys.class);
    job.setJobName("Count Minor Keys");

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setInputFormatClass(KVInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    KVInputFormat.setKVStoreName(args[0]);
    KVInputFormat.setKVHelperHosts(new String[] { args[1] });
    FileOutputFormat.setOutputPath(job, new Path(args[2]));

    /*/* ww w  . jav a2  s  .co m*/
     * Load KVLoginFile if specified, otherwise try to load via reading
     * system property of oracle.kv.login.
     */
    if (args.length >= 4) {
        KVInputFormat.setKVSecurity(args[3]);
    }

    boolean success = job.waitForCompletion(true);
    return success ? 0 : 1;
}