Example usage for org.apache.hadoop.mapreduce Job getCounters

List of usage examples for org.apache.hadoop.mapreduce Job getCounters

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job getCounters.

Prototype

public Counters getCounters() throws IOException 

Source Link

Document

Gets the counters for this job.

Usage

From source file:org.apache.blur.mapreduce.lib.BlurInputFormatTest.java

License:Apache License

private void runTest(String tableName, boolean disableFast, Path fileCache)
        throws IOException, BlurException, TException, InterruptedException, ClassNotFoundException {
    FileSystem fileSystem = miniCluster.getFileSystem();
    Path root = new Path(fileSystem.getUri() + "/");

    creatTable(tableName, new Path(root, "tables"), disableFast);
    loadTable(tableName, 100, 100);//from   ww w.ja  v  a2 s  .c om

    Iface client = getClient();

    TableDescriptor tableDescriptor = client.describe(tableName);

    Job job = Job.getInstance(conf, "Read Data");
    job.setJarByClass(BlurInputFormatTest.class);
    job.setMapperClass(TestMapper.class);
    job.setInputFormatClass(BlurInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setNumReduceTasks(0);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(TableBlurRecord.class);

    Path output = new Path(new Path(root, "output"), tableName);

    String snapshot = UUID.randomUUID().toString();
    client.createSnapshot(tableName, snapshot);

    if (fileCache != null) {
        BlurInputFormat.setLocalCachePath(job, fileCache);
    }

    BlurInputFormat.setMaxNumberOfMaps(job, 1);
    BlurInputFormat.setZooKeeperConnectionStr(job, miniCluster.getZkConnectionString());
    BlurInputFormat.addTable(job, tableDescriptor, snapshot);
    FileOutputFormat.setOutputPath(job, output);

    try {
        assertTrue(job.waitForCompletion(true));
        Counters counters = job.getCounters();
        assertMapTask(1, counters);

    } finally {
        client.removeSnapshot(tableName, snapshot);
    }

    final Map<Text, TableBlurRecord> results = new TreeMap<Text, TableBlurRecord>();
    walkOutput(output, conf, new ResultReader() {
        @Override
        public void read(Text rowId, TableBlurRecord tableBlurRecord) {
            results.put(new Text(rowId), new TableBlurRecord(tableBlurRecord));
        }
    });
    int rowId = 100;
    for (Entry<Text, TableBlurRecord> e : results.entrySet()) {
        Text r = e.getKey();
        assertEquals(new Text("row-" + rowId), r);
        BlurRecord blurRecord = new BlurRecord();
        blurRecord.setRowId("row-" + rowId);
        blurRecord.setRecordId("record-" + rowId);
        blurRecord.setFamily("fam0");
        blurRecord.addColumn("col0", "value-" + rowId);
        TableBlurRecord tableBlurRecord = new TableBlurRecord(new Text(tableName), blurRecord);
        assertEquals(tableBlurRecord, e.getValue());

        rowId++;
    }
    assertEquals(200, rowId);
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatMiniClusterTest.java

License:Apache License

@Test
public void testBlurOutputFormat()
        throws IOException, InterruptedException, ClassNotFoundException, BlurException, TException {
    fileSystem.delete(inDir, true);/*ww  w. jav  a  2  s . com*/
    String tableName = "testBlurOutputFormat";
    writeRecordsFile("in/part1", 1, 1, 1, 1, "cf1");
    writeRecordsFile("in/part2", 1, 1, 2, 1, "cf1");

    Job job = Job.getInstance(conf, "blur index");
    job.setJarByClass(BlurOutputFormatMiniClusterTest.class);
    job.setMapperClass(CsvBlurMapper.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(TEST_ROOT_DIR + "/in"));
    String tableUri = new Path(TEST_ROOT_DIR + "/blur/" + tableName)
            .makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()).toString();
    CsvBlurMapper.addColumns(job, "cf1", "col");

    TableDescriptor tableDescriptor = new TableDescriptor();
    tableDescriptor.setShardCount(1);
    tableDescriptor.setTableUri(tableUri);
    tableDescriptor.setName(tableName);

    Iface client = getClient();
    client.createTable(tableDescriptor);

    BlurOutputFormat.setupJob(job, tableDescriptor);
    Path output = new Path(TEST_ROOT_DIR + "/out");
    BlurOutputFormat.setOutputPath(job, output);

    Path tablePath = new Path(tableUri);
    Path shardPath = new Path(tablePath, ShardUtil.getShardName(0));
    FileStatus[] listStatus = fileSystem.listStatus(shardPath);
    assertEquals(3, listStatus.length);
    System.out.println("======" + listStatus.length);
    for (FileStatus fileStatus : listStatus) {
        System.out.println(fileStatus.getPath());
    }

    assertTrue(job.waitForCompletion(true));
    Counters ctrs = job.getCounters();
    System.out.println("Counters: " + ctrs);

    client.loadData(tableName, output.toString());

    while (true) {
        TableStats tableStats = client.tableStats(tableName);
        System.out.println(tableStats);
        if (tableStats.getRowCount() > 0) {
            break;
        }
        Thread.sleep(100);
    }

    assertTrue(fileSystem.exists(tablePath));
    assertFalse(fileSystem.isFile(tablePath));

    FileStatus[] listStatusAfter = fileSystem.listStatus(shardPath);

    assertEquals(11, listStatusAfter.length);

}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java

License:Apache License

@Test
public void testBlurOutputFormat() throws IOException, InterruptedException, ClassNotFoundException {
    Path input = getInDir();/*from   www . j  av a  2s.c o m*/
    Path output = getOutDir();
    _fileSystem.delete(input, true);
    _fileSystem.delete(output, true);
    writeRecordsFile(new Path(input, "part1"), 1, 1, 1, 1, "cf1");
    writeRecordsFile(new Path(input, "part2"), 1, 1, 2, 1, "cf1");

    Job job = Job.getInstance(_conf, "blur index");
    job.setJarByClass(BlurOutputFormatTest.class);
    job.setMapperClass(CsvBlurMapper.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, input);
    CsvBlurMapper.addColumns(job, "cf1", "col");

    Path tablePath = new Path(new Path(_root, "table"), "test");

    TableDescriptor tableDescriptor = new TableDescriptor();
    tableDescriptor.setShardCount(1);
    tableDescriptor.setTableUri(tablePath.toString());
    tableDescriptor.setName("test");

    createShardDirectories(tablePath, 1);

    BlurOutputFormat.setupJob(job, tableDescriptor);
    BlurOutputFormat.setOutputPath(job, output);

    assertTrue(job.waitForCompletion(true));
    Counters ctrs = job.getCounters();
    System.out.println("Counters: " + ctrs);

    Path path = new Path(output, ShardUtil.getShardName(0));
    dump(path, _conf);
    Collection<Path> commitedTasks = getCommitedTasks(path);
    assertEquals(1, commitedTasks.size());
    DirectoryReader reader = DirectoryReader.open(new HdfsDirectory(_conf, commitedTasks.iterator().next()));
    assertEquals(2, reader.numDocs());
    reader.close();
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java

License:Apache License

@Test
public void testBlurOutputFormatOverFlowTest()
        throws IOException, InterruptedException, ClassNotFoundException {
    Path input = getInDir();//from ww w . j av  a2  s  .c om
    Path output = getOutDir();
    _fileSystem.delete(input, true);
    _fileSystem.delete(output, true);
    // 1500 * 50 = 75,000
    writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1");
    // 100 * 50 = 5,000
    writeRecordsFile(new Path(input, "part2"), 1, 50, 2000, 100, "cf1");

    Job job = Job.getInstance(_conf, "blur index");
    job.setJarByClass(BlurOutputFormatTest.class);
    job.setMapperClass(CsvBlurMapper.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, input);
    CsvBlurMapper.addColumns(job, "cf1", "col");

    Path tablePath = new Path(new Path(_root, "table"), "test");

    TableDescriptor tableDescriptor = new TableDescriptor();
    tableDescriptor.setShardCount(1);
    tableDescriptor.setTableUri(tablePath.toString());
    tableDescriptor.setName("test");

    createShardDirectories(tablePath, 1);

    BlurOutputFormat.setupJob(job, tableDescriptor);
    BlurOutputFormat.setOutputPath(job, output);
    BlurOutputFormat.setIndexLocally(job, true);
    BlurOutputFormat.setOptimizeInFlight(job, false);

    assertTrue(job.waitForCompletion(true));
    Counters ctrs = job.getCounters();
    System.out.println("Counters: " + ctrs);

    Path path = new Path(output, ShardUtil.getShardName(0));
    Collection<Path> commitedTasks = getCommitedTasks(path);
    assertEquals(1, commitedTasks.size());

    DirectoryReader reader = DirectoryReader.open(new HdfsDirectory(_conf, commitedTasks.iterator().next()));
    assertEquals(80000, reader.numDocs());
    reader.close();
}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java

License:Apache License

@Test
public void testBlurOutputFormatOverFlowMultipleReducersTest()
        throws IOException, InterruptedException, ClassNotFoundException {
    Path input = getInDir();/*from   w  ww.  ja v  a 2  s . c  o  m*/
    Path output = getOutDir();
    _fileSystem.delete(input, true);
    _fileSystem.delete(output, true);
    // 1500 * 50 = 75,000
    writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1");
    // 100 * 50 = 5,000
    writeRecordsFile(new Path(input, "part2"), 1, 50, 2000, 100, "cf1");

    Job job = Job.getInstance(_conf, "blur index");
    job.setJarByClass(BlurOutputFormatTest.class);
    job.setMapperClass(CsvBlurMapper.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, input);
    CsvBlurMapper.addColumns(job, "cf1", "col");

    Path tablePath = new Path(new Path(_root, "table"), "test");

    TableDescriptor tableDescriptor = new TableDescriptor();
    tableDescriptor.setShardCount(2);
    tableDescriptor.setTableUri(tablePath.toString());
    tableDescriptor.setName("test");

    createShardDirectories(output, 2);

    BlurOutputFormat.setupJob(job, tableDescriptor);
    BlurOutputFormat.setOutputPath(job, output);
    BlurOutputFormat.setIndexLocally(job, false);
    BlurOutputFormat.setDocumentBufferStrategy(job, DocumentBufferStrategyHeapSize.class);
    BlurOutputFormat.setMaxDocumentBufferHeapSize(job, 128 * 1024);

    assertTrue(job.waitForCompletion(true));
    Counters ctrs = job.getCounters();
    System.out.println("Counters: " + ctrs);

    long total = 0;
    for (int i = 0; i < tableDescriptor.getShardCount(); i++) {
        Path path = new Path(output, ShardUtil.getShardName(i));
        Collection<Path> commitedTasks = getCommitedTasks(path);
        assertEquals(1, commitedTasks.size());

        DirectoryReader reader = DirectoryReader
                .open(new HdfsDirectory(_conf, commitedTasks.iterator().next()));
        total += reader.numDocs();
        reader.close();
    }
    assertEquals(80000, total);

}

From source file:org.apache.blur.mapreduce.lib.BlurOutputFormatTest.java

License:Apache License

@Test
public void testBlurOutputFormatOverFlowMultipleReducersWithReduceMultiplierTest()
        throws IOException, InterruptedException, ClassNotFoundException {
    Path input = getInDir();/*from w w  w . j  av a2s.  co  m*/
    Path output = getOutDir();
    _fileSystem.delete(input, true);
    _fileSystem.delete(output, true);

    // 1500 * 50 = 75,000
    writeRecordsFile(new Path(input, "part1"), 1, 50, 1, 1500, "cf1");
    // 100 * 50 = 5,000
    writeRecordsFile(new Path(input, "part2"), 1, 50, 2000, 100, "cf1");

    Job job = Job.getInstance(_conf, "blur index");
    job.setJarByClass(BlurOutputFormatTest.class);
    job.setMapperClass(CsvBlurMapper.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, input);
    CsvBlurMapper.addColumns(job, "cf1", "col");

    Path tablePath = new Path(new Path(_root, "table"), "test");

    TableDescriptor tableDescriptor = new TableDescriptor();
    tableDescriptor.setShardCount(7);
    tableDescriptor.setTableUri(tablePath.toString());
    tableDescriptor.setName("test");

    createShardDirectories(output, 7);

    BlurOutputFormat.setupJob(job, tableDescriptor);
    BlurOutputFormat.setOutputPath(job, output);
    int multiple = 2;
    BlurOutputFormat.setReducerMultiplier(job, multiple);

    assertTrue(job.waitForCompletion(true));
    Counters ctrs = job.getCounters();
    System.out.println("Counters: " + ctrs);

    long total = 0;
    for (int i = 0; i < tableDescriptor.getShardCount(); i++) {
        Path path = new Path(output, ShardUtil.getShardName(i));
        Collection<Path> commitedTasks = getCommitedTasks(path);
        assertTrue(commitedTasks.size() >= multiple);
        for (Path p : commitedTasks) {
            DirectoryReader reader = DirectoryReader.open(new HdfsDirectory(_conf, p));
            total += reader.numDocs();
            reader.close();
        }
    }
    assertEquals(80000, total);

}

From source file:org.apache.blur.mapreduce.lib.update.Driver.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    int c = 0;//from www.j  a v  a  2  s .c om
    if (args.length < 5) {
        System.err.println(
                "Usage Driver <table> <mr inc working path> <output path> <zk connection> <reducer multipler> <extra config files...>");
    }
    String table = args[c++];
    String mrIncWorkingPathStr = args[c++];
    String outputPathStr = args[c++];
    String blurZkConnection = args[c++];
    int reducerMultipler = Integer.parseInt(args[c++]);
    for (; c < args.length; c++) {
        String externalConfigFileToAdd = args[c];
        getConf().addResource(new Path(externalConfigFileToAdd));
    }

    Path outputPath = new Path(outputPathStr);
    Path mrIncWorkingPath = new Path(mrIncWorkingPathStr);
    FileSystem fileSystem = mrIncWorkingPath.getFileSystem(getConf());

    Path newData = new Path(mrIncWorkingPath, NEW);
    Path inprogressData = new Path(mrIncWorkingPath, INPROGRESS);
    Path completeData = new Path(mrIncWorkingPath, COMPLETE);
    Path fileCache = new Path(mrIncWorkingPath, CACHE);

    fileSystem.mkdirs(newData);
    fileSystem.mkdirs(inprogressData);
    fileSystem.mkdirs(completeData);
    fileSystem.mkdirs(fileCache);

    List<Path> srcPathList = new ArrayList<Path>();
    for (FileStatus fileStatus : fileSystem.listStatus(newData)) {
        srcPathList.add(fileStatus.getPath());
    }
    if (srcPathList.isEmpty()) {
        return 0;
    }

    List<Path> inprogressPathList = new ArrayList<Path>();
    boolean success = false;
    Iface client = null;
    try {
        inprogressPathList = movePathList(fileSystem, inprogressData, srcPathList);

        Job job = Job.getInstance(getConf(), "Blur Row Updater for table [" + table + "]");
        client = BlurClient.getClientFromZooKeeperConnectionStr(blurZkConnection);
        waitForOtherSnapshotsToBeRemoved(client, table, MRUPDATE_SNAPSHOT);
        client.createSnapshot(table, MRUPDATE_SNAPSHOT);
        TableDescriptor descriptor = client.describe(table);
        Path tablePath = new Path(descriptor.getTableUri());

        BlurInputFormat.setLocalCachePath(job, fileCache);
        BlurInputFormat.addTable(job, descriptor, MRUPDATE_SNAPSHOT);
        MultipleInputs.addInputPath(job, tablePath, BlurInputFormat.class, MapperForExistingData.class);
        for (Path p : inprogressPathList) {
            FileInputFormat.addInputPath(job, p);
            MultipleInputs.addInputPath(job, p, SequenceFileInputFormat.class, MapperForNewData.class);
        }

        BlurOutputFormat.setOutputPath(job, outputPath);
        BlurOutputFormat.setupJob(job, descriptor);

        job.setReducerClass(UpdateReducer.class);
        job.setMapOutputKeyClass(IndexKey.class);
        job.setMapOutputValueClass(IndexValue.class);
        job.setPartitionerClass(IndexKeyPartitioner.class);
        job.setGroupingComparatorClass(IndexKeyWritableComparator.class);

        BlurOutputFormat.setReducerMultiplier(job, reducerMultipler);

        success = job.waitForCompletion(true);
        Counters counters = job.getCounters();
        LOG.info("Counters [" + counters + "]");

    } finally {
        if (success) {
            LOG.info("Indexing job succeeded!");
            movePathList(fileSystem, completeData, inprogressPathList);
        } else {
            LOG.error("Indexing job failed!");
            movePathList(fileSystem, newData, inprogressPathList);
        }
        if (client != null) {
            client.removeSnapshot(table, MRUPDATE_SNAPSHOT);
        }
    }

    if (success) {
        return 0;
    } else {
        return 1;
    }

}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//from   w w w  .  j  av  a 2  s  .c  o  m
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.falcon.job.JobCounters.java

License:Apache License

public void obtainJobCounters(Configuration conf, Job job, boolean isDistCp) throws IOException {
    try {/* www  .  j ava  2 s  . c  o m*/
        long timeTaken = job.getFinishTime() - job.getStartTime();
        countersMap.put(ReplicationJobCountersList.TIMETAKEN.getName(), timeTaken);
        Counters jobCounters = job.getCounters();
        parseJob(job, jobCounters, isDistCp);
    } catch (Exception e) {
        LOG.info("Exception occurred while obtaining job counters: {}", e);
    }
}

From source file:org.apache.giraph.utils.CounterUtils.java

License:Apache License

/**
 * Wait for a counter to appear in a group and then return the name of that
 * counter. If job finishes before counter appears, return null.
 *
 * @param job   Job// w w w . j a v a2s .co  m
 * @param group Name of the counter group
 * @return Name of the counter inside of the group, or null if job finishes
 *         before counter appears
 */
public static String waitAndGetCounterNameFromGroup(Job job, String group) {
    try {
        while (job.getCounters().getGroup(group).size() == 0) {
            if (job.isComplete()) {
                return null;
            }
            Thread.sleep(SLEEP_MSECS);
        }
        return job.getCounters().getGroup(group).iterator().next().getName();
    } catch (IOException | InterruptedException e) {
        throw new IllegalStateException("waitAndGetCounterNameFromGroup: Exception occurred", e);
    }
}