Example usage for org.apache.hadoop.mapreduce Counter getValue

List of usage examples for org.apache.hadoop.mapreduce Counter getValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Counter getValue.

Prototype

long getValue();

Source Link

Document

What is the current value of this counter?

Usage

From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java

License:Open Source License

public static void main(String[] args) throws Exception {

    //      args = new String[8];
    //      args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife";
    //      args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result";
    //      args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory";
    //      args[3] = "interval:2008-05-01,2008-05-30";
    //      args[4] = "time:month";
    //      args[5] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883";
    //      args[6] = "-overwrite";
    //      args[7] = "-local";//"-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();//  ww  w.j av a2  s.c  o  m
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    if (params.get("traj") == null) {
        System.err.println("Trajectory query is missing");
        printUsage();
        System.exit(1);
    }

    // Invoke method to compute the trajectory MBR. 
    String rectangle = getTrajectoryRectangle(params.get("traj"));
    params.set("rect", rectangle);

    if (params.get("rect") == null) {
        System.err.println("You must provide a Trajectory Query");
        printUsage();
        System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint)) {
        LOG.error("Shape is not instance of STPoint");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:gaffer.accumulo.splitpoints.EstimateSplitPointsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 5) {
        System.err.println("Usage: " + this.getClass().getName()
                + " <mapred_output_directory> <proportion_to_sample> <number_of_tablet_servers> <resulting_split_file> <input_path1>...");
        return 1;
    }/* w w  w  .j  av  a 2  s . c  om*/

    // Parse arguments
    Path outputPath = new Path(args[0]);
    float proportionToSample = Float.parseFloat(args[1]);
    int numberTabletServers = Integer.parseInt(args[2]);
    Path resultingSplitsFile = new Path(args[3]);
    Path[] inputPaths = new Path[args.length - 4];
    for (int i = 0; i < inputPaths.length; i++) {
        inputPaths[i] = new Path(args[i + 4]);
    }

    // Conf and job
    Configuration conf = getConf();
    conf.setFloat("proportion_to_sample", proportionToSample);
    String jobName = "Estimate split points: input = ";
    for (int i = 0; i < inputPaths.length; i++) {
        jobName += inputPaths[i] + ", ";
    }
    jobName += "output = " + outputPath;
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClass(getClass());

    // Input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    for (int i = 0; i < inputPaths.length; i++) {
        SequenceFileInputFormat.addInputPath(job, inputPaths[i]);
    }

    // Mapper
    job.setMapperClass(EstimateSplitPointsMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    // Reducer
    job.setReducerClass(EstimateSplitPointsReducer.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    job.setNumReduceTasks(1);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    @SuppressWarnings("deprecation")
    Counter counter = job.getCounters()
            .findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
    long recordsOutput = counter.getValue();
    System.out.println("Number of records output = " + recordsOutput);

    // Work out when to output a split point. The number of split points
    // needed is the number of tablet servers minus 1 (because you don't
    // have to output the start of the first tablet or the end of the
    // last tablet).
    long outputEveryNthRecord = recordsOutput / (numberTabletServers - 1);

    // Read through resulting file, pick out the split points and write to
    // file.
    FileSystem fs = FileSystem.get(conf);
    Path resultsFile = new Path(outputPath, "part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
    PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(resultingSplitsFile, true)));
    Key key = new Key();
    Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
        count++;
        if (count % outputEveryNthRecord == 0) {
            numberSplitPointsOutput++;
            splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes())));
            System.out.println("Written split point: " + key.getRow());
        }
    }
    reader.close();
    splitsWriter.close();
    System.out.println("Number of split points output = " + numberSplitPointsOutput);
    return 0;
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.tool.SampleDataAndCreateSplitsFileTool.java

License:Apache License

@Override
public int run(final String[] strings) throws OperationException {
    try {/*  w  w w . j a  v a 2 s  .  c o  m*/
        LOGGER.info("Creating job using SampleDataForSplitPointsJobFactory");
        job = new SampleDataForSplitPointsJobFactory().createJob(operation, store);
    } catch (final IOException e) {
        LOGGER.error("Failed to create Hadoop job: {}", e.getMessage());
        throw new OperationException("Failed to create the Hadoop job: " + e.getMessage(), e);
    }
    try {
        LOGGER.info("Running SampleDataForSplitPoints job (job name is {})", job.getJobName());
        job.waitForCompletion(true);
    } catch (final IOException | InterruptedException | ClassNotFoundException e) {
        LOGGER.error("Exception running job: {}", e.getMessage());
        throw new OperationException("Error while waiting for job to complete: " + e.getMessage(), e);
    }

    try {
        if (!job.isSuccessful()) {
            LOGGER.error("Job was not successful (job name is {})", job.getJobName());
            throw new OperationException("Error running job");
        }
    } catch (final IOException e) {
        LOGGER.error("Exception running job: {}", e.getMessage());
        throw new OperationException("Error running job" + e.getMessage(), e);
    }

    // Find the number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    Counter counter;
    try {
        counter = job.getCounters().findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
        LOGGER.info("Number of records output = {}", counter);
    } catch (final IOException e) {
        LOGGER.error(
                "Failed to get counter org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS from job: {}",
                e.getMessage());
        throw new OperationException(
                "Failed to get counter: " + org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS, e);
    }

    int numberTabletServers;
    try {
        numberTabletServers = store.getConnection().instanceOperations().getTabletServers().size();
        LOGGER.info("Number of tablet servers is {}", numberTabletServers);
    } catch (final StoreException e) {
        LOGGER.error("Exception thrown getting number of tablet servers: {}", e.getMessage());
        throw new OperationException(e.getMessage(), e);
    }

    long outputEveryNthRecord = counter.getValue() / (numberTabletServers - 1);
    final Path resultsFile = new Path(operation.getOutputPath(), "part-r-00000");
    LOGGER.info("Will output every {}-th record from {}", outputEveryNthRecord, resultsFile);

    // Read through resulting file, pick out the split points and write to file.
    final Configuration conf = getConf();
    final FileSystem fs;
    try {
        fs = FileSystem.get(conf);
    } catch (final IOException e) {
        LOGGER.error("Exception getting filesystem: {}", e.getMessage());
        throw new OperationException("Failed to get filesystem from configuration: " + e.getMessage(), e);
    }
    LOGGER.info("Writing splits to {}", operation.getResultingSplitsFilePath());
    final Key key = new Key();
    final Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    try (final SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
            final PrintStream splitsWriter = new PrintStream(
                    new BufferedOutputStream(fs.create(new Path(operation.getResultingSplitsFilePath()), true)),
                    false, CommonConstants.UTF_8)) {
        while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
            count++;
            if (count % outputEveryNthRecord == 0) {
                LOGGER.debug("Outputting split point number {} ({})", numberSplitPointsOutput,
                        Base64.encodeBase64(key.getRow().getBytes()));
                numberSplitPointsOutput++;
                splitsWriter.println(
                        new String(Base64.encodeBase64(key.getRow().getBytes()), CommonConstants.UTF_8));
            }
        }
        LOGGER.info("Total number of records read was {}", count);
    } catch (final IOException e) {
        LOGGER.error("Exception reading results file and outputting split points: {}", e.getMessage());
        throw new OperationException(e.getMessage(), e);
    }

    try {
        fs.delete(resultsFile, true);
        LOGGER.info("Deleted the results file {}", resultsFile);
    } catch (final IOException e) {
        LOGGER.error("Failed to delete the results file {}", resultsFile);
        throw new OperationException("Failed to delete the results file: " + e.getMessage(), e);
    }

    return SUCCESS_RESPONSE;
}

From source file:gaffer.accumulostore.operation.hdfs.handler.tool.SampleDataAndCreateSplitsFileTool.java

License:Apache License

@Override
public int run(final String[] strings) throws OperationException {
    try {//from   w w w.j av  a2s.  c o m
        job = new SampleDataForSplitPointsJobFactory().createJob(operation, store);
    } catch (IOException e) {
        throw new OperationException("Failed to create the hadoop job : " + e.getMessage(), e);
    }
    try {
        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        throw new OperationException("Erorr while waiting for job to complete : " + e.getMessage(), e);
    }

    try {
        if (!job.isSuccessful()) {
            throw new OperationException("Error running job");
        }
    } catch (IOException e) {
        throw new OperationException("Error running job" + e.getMessage(), e);
    }

    // Number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    Counter counter;
    try {
        counter = job.getCounters().findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
    } catch (IOException e) {
        throw new OperationException(
                "Failed to get counter: " + org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS, e);
    }

    int numberTabletServers;
    try {
        numberTabletServers = store.getConnection().instanceOperations().getTabletServers().size();
    } catch (StoreException e) {
        throw new OperationException(e.getMessage(), e);
    }

    long outputEveryNthRecord = counter.getValue() / (numberTabletServers - 1);

    // Read through resulting file, pick out the split points and write to file.
    Configuration conf = getConf();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
    } catch (IOException e) {
        throw new OperationException("Failed to get Filesystem from configuraiton : " + e.getMessage(), e);
    }
    Path resultsFile = new Path(operation.getInputPath(), "part-r-00000");
    Key key = new Key();
    Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
            PrintStream splitsWriter = new PrintStream(
                    new BufferedOutputStream(fs.create(operation.getResultingSplitsFilePath(), true)), false,
                    CommonConstants.UTF_8)) {
        while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
            count++;
            if (count % outputEveryNthRecord == 0) {
                numberSplitPointsOutput++;
                splitsWriter.println(
                        new String(Base64.encodeBase64(key.getRow().getBytes()), CommonConstants.UTF_8));
            }
        }
    } catch (IOException e) {
        throw new OperationException(e.getMessage(), e);
    }

    try {
        fs.delete(resultsFile, true);
    } catch (IOException e) {
        throw new OperationException("Failed to delete the mapreduce result file : " + e.getMessage(), e);
    }

    return SUCCESS_RESPONSE;
}

From source file:gobblin.compaction.event.CompactionSlaEventHelper.java

License:Apache License

private static long getRecordCount(Optional<Job> job) {

    if (!job.isPresent()) {
        return -1l;
    }/* w  w w  .  j  a v a2  s .c  om*/

    Counters counters = null;
    try {
        counters = job.get().getCounters();
    } catch (IOException e) {
        LOG.debug("Failed to get job counters. Record count will not be set. ", e);
        return -1l;
    }

    Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);

    if (recordCounter != null && recordCounter.getValue() != 0) {
        return recordCounter.getValue();
    }

    recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);

    if (recordCounter != null && recordCounter.getValue() != 0) {
        return recordCounter.getValue();
    }

    LOG.debug("Non zero record count not found in both mapper and reducer counters");

    return -1l;
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

/**
 * Create a {@link gobblin.metrics.GobblinMetrics} instance for this job run from the Hadoop counters.
 *///www.  j  av a 2s.c o  m
@VisibleForTesting
void countersToMetrics(GobblinMetrics metrics) throws IOException {
    Optional<Counters> counters = Optional.fromNullable(this.job.getCounters());

    if (counters.isPresent()) {
        // Write job-level counters
        CounterGroup jobCounterGroup = counters.get().getGroup(MetricGroup.JOB.name());
        for (Counter jobCounter : jobCounterGroup) {
            metrics.getCounter(jobCounter.getName()).inc(jobCounter.getValue());
        }

        // Write task-level counters
        CounterGroup taskCounterGroup = counters.get().getGroup(MetricGroup.TASK.name());
        for (Counter taskCounter : taskCounterGroup) {
            metrics.getCounter(taskCounter.getName()).inc(taskCounter.getValue());
        }
    }
}

From source file:io.covert.dns.collection.CollectionJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        usage("");
    }//from  w ww. java  2  s  .  c om

    String dclass = args[0];
    String types = args[1];
    String inDir = args[2];
    String outDir = args[3];

    Configuration conf = getConf();

    if (conf.get("dns.collection.num.resolvers") == null)
        conf.setInt("dns.collection.num.resolvers", 50);
    if (conf.get("dns.collection.nameservers") == null)
        conf.set("dns.collection.nameservers", "127.0.0.1");

    Job job = new Job(conf);
    job.setJobName(CollectionJob.class.getSimpleName() + ": types=" + types + ", dclass=" + dclass + " inDir="
            + inDir + ", outDir=" + outDir + ", resolvers=" + conf.get("dns.collection.nameservers"));
    job.setJarByClass(getClass());

    job.setMapperClass(CollectionMapper.class);
    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormatClass(DnsRequestInputFormat.class);
    DnsRequestInputFormat.setInputPaths(job, new Path(inDir));
    DnsRequestInputFormat.configure(job, dclass.toUpperCase(), Arrays.asList(types.split(",")),
            Arrays.asList(""));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;

    CounterGroup counters = job.getCounters().getGroup(CollectionMapper.RESOLVER_GROUP);
    Counter constructMessageMS = counters.findCounter(CollectionMapper.CONSTRUCT_MESSAGE_MS);
    Counter parseResponseMS = counters.findCounter(CollectionMapper.PARSE_RESPONSE_MS);
    Counter performRequestMS = counters.findCounter(CollectionMapper.PERFORM_REQUEST_MS);
    Counter totalRequestHandlingMS = counters.findCounter(CollectionMapper.TOTAL_REQUEST_HANDLING_MS);

    Log.info("Total ConstructMessage percent: "
            + (double) (constructMessageMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));
    Log.info("Total ParseResponse percent:    "
            + (double) (parseResponseMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));
    Log.info("Total PerformRequest percent:   "
            + (double) (performRequestMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));

    return retVal;
}

From source file:io.druid.indexer.IndexGeneratorJob.java

License:Apache License

public boolean run() {
    try {/*from w  w w . jav a 2s. co  m*/
        Job job = Job.getInstance(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        JobHelper.injectSystemProperties(job);
        config.addJobProperties(job);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(BytesWritable.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        int numReducers = Iterables.size(config.getAllBuckets().get());
        if (numReducers == 0) {
            throw new RuntimeException("No buckets?? seems there is no data to index.");
        }

        if (config.getSchema().getTuningConfig().getUseCombiner()) {
            job.setCombinerClass(IndexGeneratorCombiner.class);
            job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
        }

        job.setNumReduceTasks(numReducers);
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        setReducerClass(job);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);

        // hack to get druid.processing.bitmap property passed down to hadoop job.
        // once IndexIO doesn't rely on globally injected properties, we can move this into the HadoopTuningConfig.
        final String bitmapProperty = "druid.processing.bitmap.type";
        final String bitmapType = HadoopDruidIndexerConfig.properties.getProperty(bitmapProperty);
        if (bitmapType != null) {
            for (String property : new String[] { "mapreduce.reduce.java.opts", "mapreduce.map.java.opts" }) {
                // prepend property to allow overriding using hadoop.xxx properties by JobHelper.injectSystemProperties above
                String value = Strings.nullToEmpty(job.getConfiguration().get(property));
                job.getConfiguration().set(property,
                        String.format("-D%s=%s %s", bitmapProperty, bitmapType, value));
            }
        }

        config.intoConfiguration(job);

        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                JobHelper.distributedClassPath(config.makeIntermediatePath()), job);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:kogiri.mapreduce.preprocess.indexing.stage3.KmerStatisticsBuilder.java

License:Open Source License

private int runJob(PreprocessorConfig ppConfig) throws Exception {
    // check config
    validatePreprocessorConfig(ppConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    ppConfig.getClusterConfiguration().configureTo(conf);
    ppConfig.saveTo(conf);/*from   w ww.  j av a2s.c o m*/

    Path[] inputFiles = KmerIndexHelper.getAllKmerIndexIndexFilePath(conf, ppConfig.getKmerIndexPath());

    for (Path inputFile : inputFiles) {
        LOG.info(inputFile);
    }

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < inputFiles.length; round++) {
        Path roundInputFile = inputFiles[round];
        Path[] roundInputKmerIndexPartFiles = KmerIndexHelper.getKmerIndexPartFilePath(conf, roundInputFile);

        Job job = new Job(conf,
                "Kogiri Preprocessor - Computing Kmer Statistics (" + round + " of " + inputFiles.length + ")");
        job.setJarByClass(KmerStatisticsBuilder.class);

        // Mapper
        job.setMapperClass(KmerStatisticsBuilderMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        // Specify key / value
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);

        // Inputs
        Path[] kmerIndexPartDataFiles = KmerIndexHelper.getAllKmerIndexPartDataFilePath(conf,
                roundInputKmerIndexPartFiles);
        SequenceFileInputFormat.addInputPaths(job, FileSystemHelper.makeCommaSeparated(kmerIndexPartDataFiles));

        LOG.info("Input file : ");
        LOG.info("> " + roundInputFile.toString());

        // Outputs
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setNumReduceTasks(0);

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // check results
        if (result) {
            CounterGroup uniqueGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameUnique());
            CounterGroup totalGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameTotal());
            CounterGroup squareGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameSquare());
            CounterGroup logTFSquareGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameLogTFSquare());

            Iterator<Counter> uniqueIterator = uniqueGroup.iterator();
            while (uniqueIterator.hasNext()) {
                long count = 0;
                long length = 0;
                long square = 0;
                double logTFSquare = 0;
                double real_mean = 0;
                double stddev = 0;
                double tf_cosnorm_base = 0;

                Counter uniqueCounter = uniqueIterator.next();
                Counter totalCounter = totalGroup.findCounter(uniqueCounter.getName());
                Counter squareCounter = squareGroup.findCounter(uniqueCounter.getName());
                Counter logTFSquareCounter = logTFSquareGroup.findCounter(uniqueCounter.getName());

                count = uniqueCounter.getValue();
                length = totalCounter.getValue();
                square = squareCounter.getValue();
                logTFSquare = logTFSquareCounter.getValue() / 1000.0;

                tf_cosnorm_base = Math.sqrt(logTFSquare);

                real_mean = (double) length / (double) count;
                // stddev = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2)
                double mean = Math.pow(real_mean, 2);
                double term = (double) square / (double) count;
                stddev = Math.sqrt(term - mean);

                LOG.info("distinct k-mers " + uniqueCounter.getName() + " : " + count);
                LOG.info("total k-mers " + uniqueCounter.getName() + " : " + length);
                LOG.info("average " + uniqueCounter.getName() + " : " + real_mean);
                LOG.info("std-deviation " + uniqueCounter.getName() + " : " + stddev);
                LOG.info("tf-cos-norm-base " + uniqueCounter.getName() + " : " + tf_cosnorm_base);

                Path outputHadoopPath = new Path(ppConfig.getKmerStatisticsPath(),
                        KmerStatisticsHelper.makeKmerStatisticsFileName(uniqueCounter.getName()));
                FileSystem fs = outputHadoopPath.getFileSystem(conf);

                KmerStatistics statistics = new KmerStatistics();
                statistics.setSampleName(uniqueCounter.getName());
                statistics.setKmerSize(ppConfig.getKmerSize());
                statistics.setUniqueKmers(count);
                statistics.setTotalKmers(length);
                statistics.setAverageFrequency(real_mean);
                statistics.setStdDeviation(stddev);
                statistics.setTFCosineNormBase(tf_cosnorm_base);

                statistics.saveTo(fs, outputHadoopPath);
            }
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + inputFiles.length);
            job_result = false;
            break;
        }
    }

    // report
    if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(ppConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:libra.preprocess.stage2.KmerIndexBuilder.java

License:Apache License

private void createStatisticsOfIndex(Path statisticsPath, Path inputPath, Configuration conf, Counters counters,
        int kmerSize) throws IOException {
    CounterGroup logTFSquareGroup = counters.getGroup(KmerStatisticsHelper.getCounterGroupNameLogTFSquare());

    Iterator<Counter> logTFSquareGroupIterator = logTFSquareGroup.iterator();
    while (logTFSquareGroupIterator.hasNext()) {
        Counter logTFSquareCounter = logTFSquareGroupIterator.next();
        if (logTFSquareCounter.getName().equals(inputPath.getName())) {
            double logTFSquare = 0;
            double tf_cosnorm_base = 0;

            logTFSquare = logTFSquareCounter.getValue() / 1000.0;

            tf_cosnorm_base = Math.sqrt(logTFSquare);
            LOG.info("tf-cos-norm-base " + logTFSquareCounter.getName() + " : " + tf_cosnorm_base);

            Path outputHadoopPath = new Path(statisticsPath,
                    KmerStatisticsHelper.makeKmerStatisticsFileName(logTFSquareCounter.getName()));
            FileSystem fs = outputHadoopPath.getFileSystem(conf);

            KmerStatistics statistics = new KmerStatistics();
            statistics.setSampleName(logTFSquareCounter.getName());
            statistics.setKmerSize(kmerSize);
            statistics.setTFCosineNormBase(tf_cosnorm_base);

            statistics.saveTo(fs, outputHadoopPath);
        }/*  ww w. j  a  va 2  s . co  m*/
    }
}