Example usage for org.apache.hadoop.mapreduce Counter getValue

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Counter getValue.

Prototype

long getValue();

Source Link

Document

What is the current value of this counter?

Usage

From source file:edu.umn.cs.sthadoop.trajectory.TrajectoryOverlap.java

License:Open Source License

public static void main(String[] args) throws Exception {

    //      args = new String[8];
    //      args[0] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/index_geolife";
    //      args[1] = "/export/scratch/mntgData/geolifeGPS/geolife_Trajectories_1.3/HDFS/knn-dis-result";
    //      args[2] = "shape:edu.umn.cs.sthadoop.trajectory.GeolifeTrajectory";
    //      args[3] = "interval:2008-05-01,2008-05-30";
    //      args[4] = "time:month";
    //      args[5] = "traj:39.9119983,116.606835;39.9119783,116.6065483;39.9119599,116.6062649;39.9119416,116.6059899;39.9119233,116.6057282;39.9118999,116.6054783;39.9118849,116.6052366;39.9118666,116.6050099;39.91185,116.604775;39.9118299,116.604525;39.9118049,116.6042649;39.91177,116.6040166;39.9117516,116.6037583;39.9117349,116.6035066;39.9117199,116.6032666;39.9117083,116.6030232;39.9117,116.6027566;39.91128,116.5969383;39.9112583,116.5966766;39.9112383,116.5964232;39.9112149,116.5961699;39.9111933,116.5959249;39.9111716,116.5956883";
    //      args[6] = "-overwrite";
    //      args[7] = "-local";//"-no-local";

    final OperationsParams params = new OperationsParams(new GenericOptionsParser(args));

    final Path[] paths = params.getPaths();
    if (paths.length <= 1 && !params.checkInput()) {
        printUsage();//  ww  w.j av a2  s.c  o  m
        System.exit(1);
    }
    if (paths.length >= 2 && !params.checkInputOutput()) {
        printUsage();
        System.exit(1);
    }

    if (params.get("traj") == null) {
        System.err.println("Trajectory query is missing");
        printUsage();
        System.exit(1);
    }

    // Invoke method to compute the trajectory MBR. 
    String rectangle = getTrajectoryRectangle(params.get("traj"));
    params.set("rect", rectangle);

    if (params.get("rect") == null) {
        System.err.println("You must provide a Trajectory Query");
        printUsage();
        System.exit(1);
    }

    if (params.get("interval") == null) {
        System.err.println("Temporal range missing");
        printUsage();
        System.exit(1);
    }

    TextSerializable inObj = params.getShape("shape");
    if (!(inObj instanceof STPoint)) {
        LOG.error("Shape is not instance of STPoint");
        printUsage();
        System.exit(1);
    }

    // Get spatio-temporal slices.
    List<Path> STPaths = getIndexedSlices(params);
    final Path outPath = params.getOutputPath();
    final Rectangle[] queryRanges = params.getShapes("rect", new Rectangle());

    // All running jobs
    final Vector<Long> resultsCounts = new Vector<Long>();
    Vector<Job> jobs = new Vector<Job>();
    Vector<Thread> threads = new Vector<Thread>();

    long t1 = System.currentTimeMillis();
    for (Path stPath : STPaths) {
        final Path inPath = stPath;
        for (int i = 0; i < queryRanges.length; i++) {
            final OperationsParams queryParams = new OperationsParams(params);
            OperationsParams.setShape(queryParams, "rect", queryRanges[i]);
            if (OperationsParams.isLocal(new JobConf(queryParams), inPath)) {
                // Run in local mode
                final Rectangle queryRange = queryRanges[i];
                final Shape shape = queryParams.getShape("shape");
                final Path output = outPath == null ? null
                        : (queryRanges.length == 1 ? outPath : new Path(outPath, String.format("%05d", i)));
                Thread thread = new Thread() {
                    @Override
                    public void run() {
                        FSDataOutputStream outFile = null;
                        final byte[] newLine = System.getProperty("line.separator", "\n").getBytes();
                        try {
                            ResultCollector<Shape> collector = null;
                            if (output != null) {
                                FileSystem outFS = output.getFileSystem(queryParams);
                                final FSDataOutputStream foutFile = outFile = outFS.create(output);
                                collector = new ResultCollector<Shape>() {
                                    final Text tempText = new Text2();

                                    @Override
                                    public synchronized void collect(Shape r) {
                                        try {
                                            tempText.clear();
                                            r.toText(tempText);
                                            foutFile.write(tempText.getBytes(), 0, tempText.getLength());
                                            foutFile.write(newLine);
                                        } catch (IOException e) {
                                            e.printStackTrace();
                                        }
                                    }
                                };
                            } else {
                                outFile = null;
                            }
                            long resultCount = rangeQueryLocal(inPath, queryRange, shape, queryParams,
                                    collector);
                            resultsCounts.add(resultCount);
                        } catch (IOException e) {
                            e.printStackTrace();
                        } catch (InterruptedException e) {
                            e.printStackTrace();
                        } finally {
                            try {
                                if (outFile != null)
                                    outFile.close();
                            } catch (IOException e) {
                                e.printStackTrace();
                            }
                        }
                    }
                };
                thread.start();
                threads.add(thread);
            } else {
                // Run in MapReduce mode
                Path outTempPath = outPath == null ? null
                        : new Path(outPath, String.format("%05d", i) + "-" + inPath.getName());
                queryParams.setBoolean("background", true);
                Job job = rangeQueryMapReduce(inPath, outTempPath, queryParams);
                jobs.add(job);
            }
        }
    }

    while (!jobs.isEmpty()) {
        Job firstJob = jobs.firstElement();
        firstJob.waitForCompletion(false);
        if (!firstJob.isSuccessful()) {
            System.err.println("Error running job " + firstJob);
            System.err.println("Killing all remaining jobs");
            for (int j = 1; j < jobs.size(); j++)
                jobs.get(j).killJob();
            System.exit(1);
        }
        Counters counters = firstJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        resultsCounts.add(outputRecordCounter.getValue());
        jobs.remove(0);
    }
    while (!threads.isEmpty()) {
        try {
            Thread thread = threads.firstElement();
            thread.join();
            threads.remove(0);
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }
    long t2 = System.currentTimeMillis();
    System.out.println("QueryPlan:");
    for (Path stPath : STPaths) {
        System.out.println(stPath.getName());
    }
    System.out.println("Time for " + queryRanges.length + " jobs is " + (t2 - t1) + " millis");
    System.out.println("Results counts: " + resultsCounts);
}

From source file:gaffer.accumulo.splitpoints.EstimateSplitPointsDriver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length < 5) {
        System.err.println("Usage: " + this.getClass().getName()
                + " <mapred_output_directory> <proportion_to_sample> <number_of_tablet_servers> <resulting_split_file> <input_path1>...");
        return 1;
    }/* w w  w  .j  av  a 2  s . c  om*/

    // Parse arguments
    Path outputPath = new Path(args[0]);
    float proportionToSample = Float.parseFloat(args[1]);
    int numberTabletServers = Integer.parseInt(args[2]);
    Path resultingSplitsFile = new Path(args[3]);
    Path[] inputPaths = new Path[args.length - 4];
    for (int i = 0; i < inputPaths.length; i++) {
        inputPaths[i] = new Path(args[i + 4]);
    }

    // Conf and job
    Configuration conf = getConf();
    conf.setFloat("proportion_to_sample", proportionToSample);
    String jobName = "Estimate split points: input = ";
    for (int i = 0; i < inputPaths.length; i++) {
        jobName += inputPaths[i] + ", ";
    }
    jobName += "output = " + outputPath;
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClass(getClass());

    // Input
    job.setInputFormatClass(SequenceFileInputFormat.class);
    for (int i = 0; i < inputPaths.length; i++) {
        SequenceFileInputFormat.addInputPath(job, inputPaths[i]);
    }

    // Mapper
    job.setMapperClass(EstimateSplitPointsMapper.class);
    job.setMapOutputKeyClass(Key.class);
    job.setMapOutputValueClass(Value.class);

    // Reducer
    job.setReducerClass(EstimateSplitPointsReducer.class);
    job.setOutputKeyClass(Key.class);
    job.setOutputValueClass(Value.class);
    job.setNumReduceTasks(1);

    // Output
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, outputPath);
    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);

    // Run job
    job.waitForCompletion(true);

    // Successful?
    if (!job.isSuccessful()) {
        System.err.println("Error running job");
        return 1;
    }

    // Number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    @SuppressWarnings("deprecation")
    Counter counter = job.getCounters()
            .findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
    long recordsOutput = counter.getValue();
    System.out.println("Number of records output = " + recordsOutput);

    // Work out when to output a split point. The number of split points
    // needed is the number of tablet servers minus 1 (because you don't
    // have to output the start of the first tablet or the end of the
    // last tablet).
    long outputEveryNthRecord = recordsOutput / (numberTabletServers - 1);

    // Read through resulting file, pick out the split points and write to
    // file.
    FileSystem fs = FileSystem.get(conf);
    Path resultsFile = new Path(outputPath, "part-r-00000");
    @SuppressWarnings("deprecation")
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
    PrintStream splitsWriter = new PrintStream(new BufferedOutputStream(fs.create(resultingSplitsFile, true)));
    Key key = new Key();
    Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
        count++;
        if (count % outputEveryNthRecord == 0) {
            numberSplitPointsOutput++;
            splitsWriter.println(new String(Base64.encodeBase64(key.getRow().getBytes())));
            System.out.println("Written split point: " + key.getRow());
        }
    }
    reader.close();
    splitsWriter.close();
    System.out.println("Number of split points output = " + numberSplitPointsOutput);
    return 0;
}

From source file:gaffer.accumulostore.operation.hdfs.handler.job.tool.SampleDataAndCreateSplitsFileTool.java

License:Apache License

@Override
public int run(final String[] strings) throws OperationException {
    try {/*  w  w w . j a  v a 2 s  .  c o  m*/
        LOGGER.info("Creating job using SampleDataForSplitPointsJobFactory");
        job = new SampleDataForSplitPointsJobFactory().createJob(operation, store);
    } catch (final IOException e) {
        LOGGER.error("Failed to create Hadoop job: {}", e.getMessage());
        throw new OperationException("Failed to create the Hadoop job: " + e.getMessage(), e);
    }
    try {
        LOGGER.info("Running SampleDataForSplitPoints job (job name is {})", job.getJobName());
        job.waitForCompletion(true);
    } catch (final IOException | InterruptedException | ClassNotFoundException e) {
        LOGGER.error("Exception running job: {}", e.getMessage());
        throw new OperationException("Error while waiting for job to complete: " + e.getMessage(), e);
    }

    try {
        if (!job.isSuccessful()) {
            LOGGER.error("Job was not successful (job name is {})", job.getJobName());
            throw new OperationException("Error running job");
        }
    } catch (final IOException e) {
        LOGGER.error("Exception running job: {}", e.getMessage());
        throw new OperationException("Error running job" + e.getMessage(), e);
    }

    // Find the number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    Counter counter;
    try {
        counter = job.getCounters().findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
        LOGGER.info("Number of records output = {}", counter);
    } catch (final IOException e) {
        LOGGER.error(
                "Failed to get counter org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS from job: {}",
                e.getMessage());
        throw new OperationException(
                "Failed to get counter: " + org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS, e);
    }

    int numberTabletServers;
    try {
        numberTabletServers = store.getConnection().instanceOperations().getTabletServers().size();
        LOGGER.info("Number of tablet servers is {}", numberTabletServers);
    } catch (final StoreException e) {
        LOGGER.error("Exception thrown getting number of tablet servers: {}", e.getMessage());
        throw new OperationException(e.getMessage(), e);
    }

    long outputEveryNthRecord = counter.getValue() / (numberTabletServers - 1);
    final Path resultsFile = new Path(operation.getOutputPath(), "part-r-00000");
    LOGGER.info("Will output every {}-th record from {}", outputEveryNthRecord, resultsFile);

    // Read through resulting file, pick out the split points and write to file.
    final Configuration conf = getConf();
    final FileSystem fs;
    try {
        fs = FileSystem.get(conf);
    } catch (final IOException e) {
        LOGGER.error("Exception getting filesystem: {}", e.getMessage());
        throw new OperationException("Failed to get filesystem from configuration: " + e.getMessage(), e);
    }
    LOGGER.info("Writing splits to {}", operation.getResultingSplitsFilePath());
    final Key key = new Key();
    final Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    try (final SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
            final PrintStream splitsWriter = new PrintStream(
                    new BufferedOutputStream(fs.create(new Path(operation.getResultingSplitsFilePath()), true)),
                    false, CommonConstants.UTF_8)) {
        while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
            count++;
            if (count % outputEveryNthRecord == 0) {
                LOGGER.debug("Outputting split point number {} ({})", numberSplitPointsOutput,
                        Base64.encodeBase64(key.getRow().getBytes()));
                numberSplitPointsOutput++;
                splitsWriter.println(
                        new String(Base64.encodeBase64(key.getRow().getBytes()), CommonConstants.UTF_8));
            }
        }
        LOGGER.info("Total number of records read was {}", count);
    } catch (final IOException e) {
        LOGGER.error("Exception reading results file and outputting split points: {}", e.getMessage());
        throw new OperationException(e.getMessage(), e);
    }

    try {
        fs.delete(resultsFile, true);
        LOGGER.info("Deleted the results file {}", resultsFile);
    } catch (final IOException e) {
        LOGGER.error("Failed to delete the results file {}", resultsFile);
        throw new OperationException("Failed to delete the results file: " + e.getMessage(), e);
    }

    return SUCCESS_RESPONSE;
}

From source file:gaffer.accumulostore.operation.hdfs.handler.tool.SampleDataAndCreateSplitsFileTool.java

License:Apache License

@Override
public int run(final String[] strings) throws OperationException {
    try {//from   w w w.j av  a2s.  c o m
        job = new SampleDataForSplitPointsJobFactory().createJob(operation, store);
    } catch (IOException e) {
        throw new OperationException("Failed to create the hadoop job : " + e.getMessage(), e);
    }
    try {
        job.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        throw new OperationException("Erorr while waiting for job to complete : " + e.getMessage(), e);
    }

    try {
        if (!job.isSuccessful()) {
            throw new OperationException("Error running job");
        }
    } catch (IOException e) {
        throw new OperationException("Error running job" + e.getMessage(), e);
    }

    // Number of records output
    // NB In the following line use mapred.Task.Counter.REDUCE_OUTPUT_RECORDS rather than
    // mapreduce.TaskCounter.REDUCE_OUTPUT_RECORDS as this is more compatible with earlier
    // versions of Hadoop.
    Counter counter;
    try {
        counter = job.getCounters().findCounter(org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS);
    } catch (IOException e) {
        throw new OperationException(
                "Failed to get counter: " + org.apache.hadoop.mapred.Task.Counter.REDUCE_OUTPUT_RECORDS, e);
    }

    int numberTabletServers;
    try {
        numberTabletServers = store.getConnection().instanceOperations().getTabletServers().size();
    } catch (StoreException e) {
        throw new OperationException(e.getMessage(), e);
    }

    long outputEveryNthRecord = counter.getValue() / (numberTabletServers - 1);

    // Read through resulting file, pick out the split points and write to file.
    Configuration conf = getConf();
    FileSystem fs;
    try {
        fs = FileSystem.get(conf);
    } catch (IOException e) {
        throw new OperationException("Failed to get Filesystem from configuraiton : " + e.getMessage(), e);
    }
    Path resultsFile = new Path(operation.getInputPath(), "part-r-00000");
    Key key = new Key();
    Value value = new Value();
    long count = 0;
    int numberSplitPointsOutput = 0;
    try (SequenceFile.Reader reader = new SequenceFile.Reader(fs, resultsFile, conf);
            PrintStream splitsWriter = new PrintStream(
                    new BufferedOutputStream(fs.create(operation.getResultingSplitsFilePath(), true)), false,
                    CommonConstants.UTF_8)) {
        while (reader.next(key, value) && numberSplitPointsOutput < numberTabletServers - 1) {
            count++;
            if (count % outputEveryNthRecord == 0) {
                numberSplitPointsOutput++;
                splitsWriter.println(
                        new String(Base64.encodeBase64(key.getRow().getBytes()), CommonConstants.UTF_8));
            }
        }
    } catch (IOException e) {
        throw new OperationException(e.getMessage(), e);
    }

    try {
        fs.delete(resultsFile, true);
    } catch (IOException e) {
        throw new OperationException("Failed to delete the mapreduce result file : " + e.getMessage(), e);
    }

    return SUCCESS_RESPONSE;
}

From source file:gobblin.compaction.event.CompactionSlaEventHelper.java

License:Apache License

private static long getRecordCount(Optional<Job> job) {

    if (!job.isPresent()) {
        return -1l;
    }/* w  w w  .  j  a v a2  s .c  om*/

    Counters counters = null;
    try {
        counters = job.get().getCounters();
    } catch (IOException e) {
        LOG.debug("Failed to get job counters. Record count will not be set. ", e);
        return -1l;
    }

    Counter recordCounter = counters.findCounter(AvroKeyDedupReducer.EVENT_COUNTER.RECORD_COUNT);

    if (recordCounter != null && recordCounter.getValue() != 0) {
        return recordCounter.getValue();
    }

    recordCounter = counters.findCounter(AvroKeyMapper.EVENT_COUNTER.RECORD_COUNT);

    if (recordCounter != null && recordCounter.getValue() != 0) {
        return recordCounter.getValue();
    }

    LOG.debug("Non zero record count not found in both mapper and reducer counters");

    return -1l;
}

From source file:gobblin.runtime.mapreduce.MRJobLauncher.java

License:Apache License

/**
 * Create a {@link gobblin.metrics.GobblinMetrics} instance for this job run from the Hadoop counters.
 *///www.  j  av a 2s.c o  m
@VisibleForTesting
void countersToMetrics(GobblinMetrics metrics) throws IOException {
    Optional<Counters> counters = Optional.fromNullable(this.job.getCounters());

    if (counters.isPresent()) {
        // Write job-level counters
        CounterGroup jobCounterGroup = counters.get().getGroup(MetricGroup.JOB.name());
        for (Counter jobCounter : jobCounterGroup) {
            metrics.getCounter(jobCounter.getName()).inc(jobCounter.getValue());
        }

        // Write task-level counters
        CounterGroup taskCounterGroup = counters.get().getGroup(MetricGroup.TASK.name());
        for (Counter taskCounter : taskCounterGroup) {
            metrics.getCounter(taskCounter.getName()).inc(taskCounter.getValue());
        }
    }
}

From source file:io.covert.dns.collection.CollectionJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        usage("");
    }//from  w ww. java  2  s  .  c om

    String dclass = args[0];
    String types = args[1];
    String inDir = args[2];
    String outDir = args[3];

    Configuration conf = getConf();

    if (conf.get("dns.collection.num.resolvers") == null)
        conf.setInt("dns.collection.num.resolvers", 50);
    if (conf.get("dns.collection.nameservers") == null)
        conf.set("dns.collection.nameservers", "127.0.0.1");

    Job job = new Job(conf);
    job.setJobName(CollectionJob.class.getSimpleName() + ": types=" + types + ", dclass=" + dclass + " inDir="
            + inDir + ", outDir=" + outDir + ", resolvers=" + conf.get("dns.collection.nameservers"));
    job.setJarByClass(getClass());

    job.setMapperClass(CollectionMapper.class);
    job.setNumReduceTasks(0);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.setInputFormatClass(DnsRequestInputFormat.class);
    DnsRequestInputFormat.setInputPaths(job, new Path(inDir));
    DnsRequestInputFormat.configure(job, dclass.toUpperCase(), Arrays.asList(types.split(",")),
            Arrays.asList(""));

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));
    SequenceFileOutputFormat.setCompressOutput(job, true);
    job.submit();

    int retVal = job.waitForCompletion(true) ? 0 : 1;

    CounterGroup counters = job.getCounters().getGroup(CollectionMapper.RESOLVER_GROUP);
    Counter constructMessageMS = counters.findCounter(CollectionMapper.CONSTRUCT_MESSAGE_MS);
    Counter parseResponseMS = counters.findCounter(CollectionMapper.PARSE_RESPONSE_MS);
    Counter performRequestMS = counters.findCounter(CollectionMapper.PERFORM_REQUEST_MS);
    Counter totalRequestHandlingMS = counters.findCounter(CollectionMapper.TOTAL_REQUEST_HANDLING_MS);

    Log.info("Total ConstructMessage percent: "
            + (double) (constructMessageMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));
    Log.info("Total ParseResponse percent:    "
            + (double) (parseResponseMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));
    Log.info("Total PerformRequest percent:   "
            + (double) (performRequestMS.getValue() * 100L) / ((double) totalRequestHandlingMS.getValue()));

    return retVal;
}

From source file:io.druid.indexer.IndexGeneratorJob.java

License:Apache License

public boolean run() {
    try {/*from w  w w . jav a 2s. co  m*/
        Job job = Job.getInstance(new Configuration(),
                String.format("%s-index-generator-%s", config.getDataSource(), config.getIntervals()));

        job.getConfiguration().set("io.sort.record.percent", "0.23");

        JobHelper.injectSystemProperties(job);
        config.addJobProperties(job);

        job.setMapperClass(IndexGeneratorMapper.class);
        job.setMapOutputValueClass(BytesWritable.class);

        SortableBytes.useSortableBytesAsMapOutputKey(job);

        int numReducers = Iterables.size(config.getAllBuckets().get());
        if (numReducers == 0) {
            throw new RuntimeException("No buckets?? seems there is no data to index.");
        }

        if (config.getSchema().getTuningConfig().getUseCombiner()) {
            job.setCombinerClass(IndexGeneratorCombiner.class);
            job.setCombinerKeyGroupingComparatorClass(BytesWritable.Comparator.class);
        }

        job.setNumReduceTasks(numReducers);
        job.setPartitionerClass(IndexGeneratorPartitioner.class);

        setReducerClass(job);
        job.setOutputKeyClass(BytesWritable.class);
        job.setOutputValueClass(Text.class);
        job.setOutputFormatClass(IndexGeneratorOutputFormat.class);
        FileOutputFormat.setOutputPath(job, config.makeIntermediatePath());

        config.addInputPaths(job);

        // hack to get druid.processing.bitmap property passed down to hadoop job.
        // once IndexIO doesn't rely on globally injected properties, we can move this into the HadoopTuningConfig.
        final String bitmapProperty = "druid.processing.bitmap.type";
        final String bitmapType = HadoopDruidIndexerConfig.properties.getProperty(bitmapProperty);
        if (bitmapType != null) {
            for (String property : new String[] { "mapreduce.reduce.java.opts", "mapreduce.map.java.opts" }) {
                // prepend property to allow overriding using hadoop.xxx properties by JobHelper.injectSystemProperties above
                String value = Strings.nullToEmpty(job.getConfiguration().get(property));
                job.getConfiguration().set(property,
                        String.format("-D%s=%s %s", bitmapProperty, bitmapType, value));
            }
        }

        config.intoConfiguration(job);

        JobHelper.setupClasspath(JobHelper.distributedClassPath(config.getWorkingPath()),
                JobHelper.distributedClassPath(config.makeIntermediatePath()), job);

        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());

        boolean success = job.waitForCompletion(true);

        Counter invalidRowCount = job.getCounters()
                .findCounter(HadoopDruidIndexerConfig.IndexJobCounters.INVALID_ROW_COUNTER);
        jobStats.setInvalidRowCount(invalidRowCount.getValue());

        return success;
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:kogiri.mapreduce.preprocess.indexing.stage3.KmerStatisticsBuilder.java

License:Open Source License

private int runJob(PreprocessorConfig ppConfig) throws Exception {
    // check config
    validatePreprocessorConfig(ppConfig);

    // configuration
    Configuration conf = this.getConf();

    // set user configuration
    ppConfig.getClusterConfiguration().configureTo(conf);
    ppConfig.saveTo(conf);/*from   w ww.  j av a2s.c o m*/

    Path[] inputFiles = KmerIndexHelper.getAllKmerIndexIndexFilePath(conf, ppConfig.getKmerIndexPath());

    for (Path inputFile : inputFiles) {
        LOG.info(inputFile);
    }

    boolean job_result = true;
    List<Job> jobs = new ArrayList<Job>();

    for (int round = 0; round < inputFiles.length; round++) {
        Path roundInputFile = inputFiles[round];
        Path[] roundInputKmerIndexPartFiles = KmerIndexHelper.getKmerIndexPartFilePath(conf, roundInputFile);

        Job job = new Job(conf,
                "Kogiri Preprocessor - Computing Kmer Statistics (" + round + " of " + inputFiles.length + ")");
        job.setJarByClass(KmerStatisticsBuilder.class);

        // Mapper
        job.setMapperClass(KmerStatisticsBuilderMapper.class);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(NullWritable.class);

        // Specify key / value
        job.setOutputKeyClass(NullWritable.class);
        job.setOutputValueClass(NullWritable.class);

        // Inputs
        Path[] kmerIndexPartDataFiles = KmerIndexHelper.getAllKmerIndexPartDataFilePath(conf,
                roundInputKmerIndexPartFiles);
        SequenceFileInputFormat.addInputPaths(job, FileSystemHelper.makeCommaSeparated(kmerIndexPartDataFiles));

        LOG.info("Input file : ");
        LOG.info("> " + roundInputFile.toString());

        // Outputs
        job.setOutputFormatClass(NullOutputFormat.class);

        job.setNumReduceTasks(0);

        // Execute job and return status
        boolean result = job.waitForCompletion(true);

        jobs.add(job);

        // check results
        if (result) {
            CounterGroup uniqueGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameUnique());
            CounterGroup totalGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameTotal());
            CounterGroup squareGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameSquare());
            CounterGroup logTFSquareGroup = job.getCounters()
                    .getGroup(KmerStatisticsHelper.getCounterGroupNameLogTFSquare());

            Iterator<Counter> uniqueIterator = uniqueGroup.iterator();
            while (uniqueIterator.hasNext()) {
                long count = 0;
                long length = 0;
                long square = 0;
                double logTFSquare = 0;
                double real_mean = 0;
                double stddev = 0;
                double tf_cosnorm_base = 0;

                Counter uniqueCounter = uniqueIterator.next();
                Counter totalCounter = totalGroup.findCounter(uniqueCounter.getName());
                Counter squareCounter = squareGroup.findCounter(uniqueCounter.getName());
                Counter logTFSquareCounter = logTFSquareGroup.findCounter(uniqueCounter.getName());

                count = uniqueCounter.getValue();
                length = totalCounter.getValue();
                square = squareCounter.getValue();
                logTFSquare = logTFSquareCounter.getValue() / 1000.0;

                tf_cosnorm_base = Math.sqrt(logTFSquare);

                real_mean = (double) length / (double) count;
                // stddev = sqrt((sum(lengths ^ 2)/count) - (mean ^ 2)
                double mean = Math.pow(real_mean, 2);
                double term = (double) square / (double) count;
                stddev = Math.sqrt(term - mean);

                LOG.info("distinct k-mers " + uniqueCounter.getName() + " : " + count);
                LOG.info("total k-mers " + uniqueCounter.getName() + " : " + length);
                LOG.info("average " + uniqueCounter.getName() + " : " + real_mean);
                LOG.info("std-deviation " + uniqueCounter.getName() + " : " + stddev);
                LOG.info("tf-cos-norm-base " + uniqueCounter.getName() + " : " + tf_cosnorm_base);

                Path outputHadoopPath = new Path(ppConfig.getKmerStatisticsPath(),
                        KmerStatisticsHelper.makeKmerStatisticsFileName(uniqueCounter.getName()));
                FileSystem fs = outputHadoopPath.getFileSystem(conf);

                KmerStatistics statistics = new KmerStatistics();
                statistics.setSampleName(uniqueCounter.getName());
                statistics.setKmerSize(ppConfig.getKmerSize());
                statistics.setUniqueKmers(count);
                statistics.setTotalKmers(length);
                statistics.setAverageFrequency(real_mean);
                statistics.setStdDeviation(stddev);
                statistics.setTFCosineNormBase(tf_cosnorm_base);

                statistics.saveTo(fs, outputHadoopPath);
            }
        }

        if (!result) {
            LOG.error("job failed at round " + round + " of " + inputFiles.length);
            job_result = false;
            break;
        }
    }

    // report
    if (ppConfig.getReportPath() != null && !ppConfig.getReportPath().isEmpty()) {
        Report report = new Report();
        report.addJob(jobs);
        report.writeTo(ppConfig.getReportPath());
    }

    return job_result ? 0 : 1;
}

From source file:libra.preprocess.stage2.KmerIndexBuilder.java

License:Apache License

private void createStatisticsOfIndex(Path statisticsPath, Path inputPath, Configuration conf, Counters counters,
        int kmerSize) throws IOException {
    CounterGroup logTFSquareGroup = counters.getGroup(KmerStatisticsHelper.getCounterGroupNameLogTFSquare());

    Iterator<Counter> logTFSquareGroupIterator = logTFSquareGroup.iterator();
    while (logTFSquareGroupIterator.hasNext()) {
        Counter logTFSquareCounter = logTFSquareGroupIterator.next();
        if (logTFSquareCounter.getName().equals(inputPath.getName())) {
            double logTFSquare = 0;
            double tf_cosnorm_base = 0;

            logTFSquare = logTFSquareCounter.getValue() / 1000.0;

            tf_cosnorm_base = Math.sqrt(logTFSquare);
            LOG.info("tf-cos-norm-base " + logTFSquareCounter.getName() + " : " + tf_cosnorm_base);

            Path outputHadoopPath = new Path(statisticsPath,
                    KmerStatisticsHelper.makeKmerStatisticsFileName(logTFSquareCounter.getName()));
            FileSystem fs = outputHadoopPath.getFileSystem(conf);

            KmerStatistics statistics = new KmerStatistics();
            statistics.setSampleName(logTFSquareCounter.getName());
            statistics.setKmerSize(kmerSize);
            statistics.setTFCosineNormBase(tf_cosnorm_base);

            statistics.saveTo(fs, outputHadoopPath);
        }/*  ww w. j  a  va 2  s . co  m*/
    }
}