Example usage for org.apache.hadoop.mapreduce Job setMaxMapAttempts

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setMaxMapAttempts.

Prototype

public void setMaxMapAttempts(int n)

Source Link

Document

Expert: Set the number of maximum attempts that will be made to run a map task.

Usage

From source file:com.hhscyber.nl.tweets.hbase2.Hbase2.java

@Override
public int run(String[] args) throws Exception {
    Job client = new Job(getConf(), "hbasetest");
    client.setSpeculativeExecution(false);
    client.setMaxMapAttempts(2);
    client.setJarByClass(Hbase2.class);
    client.setOutputKeyClass(Text.class);
    client.setOutputValueClass(Text.class);
    client.setInputFormatClass(TextInputFormat.class);
    TextInputFormat.addInputPath(client, new Path("input/1441737001"));//test one folder
    TextOutputFormat.setOutputPath(client, new Path("output4"));

    client.setMapperClass(Hbase2Mapper.class);
    client.setReducerClass(Hbase2Reducer.class);

    try {/*from www  .  ja  va  2s .  c  o  m*/
        client.waitForCompletion(true);
    } catch (IOException | InterruptedException | ClassNotFoundException e) {
        System.out.println(e);
    }
    return 0;
}

From source file:com.linkedin.pinot.hadoop.job.SegmentCreationJob.java

License:Apache License

public void run() throws Exception {
    LOGGER.info("Starting {}", getClass().getSimpleName());

    FileSystem fs = FileSystem.get(getConf());
    Path inputPathPattern = new Path(_inputSegmentDir);

    if (fs.exists(new Path(_stagingDir))) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(new Path(_stagingDir), true);
    }/* w w w . ja v a  2  s .  c  o m*/
    fs.mkdirs(new Path(_stagingDir));
    fs.mkdirs(new Path(_stagingDir + "/input/"));

    if (fs.exists(new Path(_outputDir))) {
        LOGGER.warn("Found the output folder, deleting it");
        fs.delete(new Path(_outputDir), true);
    }
    fs.mkdirs(new Path(_outputDir));

    List<FileStatus> inputDataFiles = new ArrayList<FileStatus>();
    FileStatus[] fileStatusArr = fs.globStatus(inputPathPattern);
    for (FileStatus fileStatus : fileStatusArr) {
        inputDataFiles.addAll(getDataFilesFromPath(fs, fileStatus.getPath()));
    }

    for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
        FileStatus file = inputDataFiles.get(seqId);
        String completeFilePath = " " + file.getPath().toString() + " " + seqId;
        Path newOutPutFile = new Path((_stagingDir + "/input/"
                + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_') + ".txt"));
        FSDataOutputStream stream = fs.create(newOutPutFile);
        stream.writeUTF(completeFilePath);
        stream.flush();
        stream.close();
    }

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationJob.class);
    job.setJobName(_jobName);

    job.setMapperClass(HadoopSegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(_stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(_stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set("data.schema", new ObjectMapper().writeValueAsString(_dataSchema));

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : _properties.keySet()) {
        job.getConfiguration().set(key.toString(), _properties.getProperty(key.toString()));
    }

    if (_depsJarPath != null && _depsJarPath.length() > 0) {
        addDepsJarToDistributedCache(new Path(_depsJarPath), job);
    }

    // Submit the job for execution.
    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", _stagingDir + "/output/segmentTar", _outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(_stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(_outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", _stagingDir);
    fs.delete(new Path(_stagingDir), true);
}

From source file:com.linkedin.thirdeye.bootstrap.segment.create.SegmentCreationPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationPhaseJob.class);
    job.setJobName(name);//w ww . ja va2s  . c  om

    FileSystem fs = FileSystem.get(getConf());

    Configuration configuration = job.getConfiguration();

    String schemaPath = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEMA_PATH);
    LOGGER.info("Schema path : {}", schemaPath);
    String configPath = getAndSetConfiguration(configuration, SEGMENT_CREATION_CONFIG_PATH);
    LOGGER.info("Config path : {}", configPath);
    Schema dataSchema = createSchema(configPath);
    LOGGER.info("Data schema : {}", dataSchema);
    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
    LOGGER.info("Input path : {}", inputSegmentDir);
    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputDir);
    String stagingDir = new File(outputDir, TEMP).getAbsolutePath();
    LOGGER.info("Staging dir : {}", stagingDir);
    String tableName = getAndSetConfiguration(configuration, SEGMENT_CREATION_SEGMENT_TABLE_NAME);
    LOGGER.info("Segment table name : {}", tableName);

    // Create temporary directory
    if (fs.exists(new Path(stagingDir))) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(new Path(stagingDir), true);
    }
    fs.mkdirs(new Path(stagingDir));
    fs.mkdirs(new Path(stagingDir + "/input/"));

    if (fs.exists(new Path(outputDir))) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(new Path(outputDir), true);
    }
    fs.mkdirs(new Path(outputDir));

    Path inputPathPattern = new Path(inputSegmentDir);
    List<FileStatus> inputDataFiles = Arrays.asList(fs.listStatus(inputPathPattern));
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((stagingDir + "/input/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set("data.schema", OBJECT_MAPPER.writeValueAsString(dataSchema));
    if (!fs.exists(new Path(schemaPath))) {
        OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValue(fs.create(new Path(schemaPath), false),
                dataSchema);
    }

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", stagingDir);
    fs.delete(new Path(stagingDir), true);

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.backfill.BackfillPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());
    job.setJarByClass(BackfillPhaseJob.class);
    job.setJobName(name);/*from  www.java2  s  .  co m*/

    FileSystem fs = FileSystem.get(getConf());
    Configuration configuration = job.getConfiguration();

    LOGGER.info("*******************************************************************************");
    String controllerHost = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_HOST);
    String controllerPort = getAndSetConfiguration(configuration, BACKFILL_PHASE_CONTROLLER_PORT);
    LOGGER.info("Controller Host : {} Controller Port : {}", controllerHost, controllerPort);
    String segmentStartTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_START_TIME);
    String segmentEndTime = getAndSetConfiguration(configuration, BACKFILL_PHASE_END_TIME);
    long startTime = Long.valueOf(segmentStartTime);
    long endTime = Long.valueOf(segmentEndTime);
    if (Long.valueOf(segmentStartTime) > Long.valueOf(segmentEndTime)) {
        throw new IllegalStateException("Start time cannot be greater than end time");
    }
    String tableName = getAndSetConfiguration(configuration, BACKFILL_PHASE_TABLE_NAME);
    LOGGER.info("Start time : {} End time : {} Table name : {}", segmentStartTime, segmentEndTime, tableName);

    String outputPath = getAndSetConfiguration(configuration, BACKFILL_PHASE_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputPath);
    Path backfillDir = new Path(outputPath);
    if (fs.exists(backfillDir)) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(backfillDir, true);
    }
    Path downloadDir = new Path(backfillDir, DOWNLOAD);
    LOGGER.info("Creating download dir : {}", downloadDir);
    fs.mkdirs(downloadDir);
    Path inputDir = new Path(backfillDir, INPUT);
    LOGGER.info("Creating input dir : {}", inputDir);
    fs.mkdirs(inputDir);
    Path outputDir = new Path(backfillDir, OUTPUT);
    LOGGER.info("Creating output dir : {}", outputDir);

    BackfillControllerAPIs backfillControllerAPIs = new BackfillControllerAPIs(controllerHost,
            Integer.valueOf(controllerPort), tableName);

    LOGGER.info("Downloading segments in range {} to {}", startTime, endTime);
    List<String> allSegments = backfillControllerAPIs.getAllSegments(tableName);
    List<String> segmentsToDownload = backfillControllerAPIs.findSegmentsInRange(tableName, allSegments,
            startTime, endTime);
    for (String segmentName : segmentsToDownload) {
        backfillControllerAPIs.downloadSegment(segmentName, downloadDir);
    }

    LOGGER.info("Reading downloaded segment input files");
    List<FileStatus> inputDataFiles = new ArrayList<>();
    inputDataFiles.addAll(Lists.newArrayList(fs.listStatus(downloadDir)));
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        LOGGER.info("Creating input files at {} for segment input files", inputDir);
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((inputDir + "/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(BackfillPhaseMapJob.BackfillMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);

    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Cleanup the working directory");
    LOGGER.info("Deleting the dir: {}", downloadDir);
    fs.delete(downloadDir, true);
    LOGGER.info("Deleting the dir: {}", inputDir);
    fs.delete(inputDir, true);
    LOGGER.info("Deleting the dir: {}", outputDir);
    fs.delete(outputDir, true);

    return job;
}

From source file:com.linkedin.thirdeye.hadoop.segment.creation.SegmentCreationPhaseJob.java

License:Apache License

public Job run() throws Exception {

    Job job = Job.getInstance(getConf());

    job.setJarByClass(SegmentCreationPhaseJob.class);
    job.setJobName(name);//from  ww w. j  a va2s .  com

    FileSystem fs = FileSystem.get(getConf());

    Configuration configuration = job.getConfiguration();

    String inputSegmentDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_INPUT_PATH);
    LOGGER.info("Input path : {}", inputSegmentDir);
    Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputSegmentDir);
    LOGGER.info("Schema : {}", avroSchema);
    String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty(
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()),
            props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema);
    props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty);
    ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props);
    LOGGER.info("ThirdEyeConfig {}", thirdeyeConfig.encode());
    String outputDir = getAndSetConfiguration(configuration, SEGMENT_CREATION_OUTPUT_PATH);
    LOGGER.info("Output path : {}", outputDir);
    Path stagingDir = new Path(outputDir, TEMP);
    LOGGER.info("Staging dir : {}", stagingDir);
    String segmentWallClockStart = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_START_TIME);
    LOGGER.info("Segment wallclock start time : {}", segmentWallClockStart);
    String segmentWallClockEnd = getAndSetConfiguration(configuration, SEGMENT_CREATION_WALLCLOCK_END_TIME);
    LOGGER.info("Segment wallclock end time : {}", segmentWallClockEnd);
    String schedule = getAndSetConfiguration(configuration, SEGMENT_CREATION_SCHEDULE);
    LOGGER.info("Segment schedule : {}", schedule);
    String isBackfill = props.getProperty(SEGMENT_CREATION_BACKFILL.toString(), DEFAULT_BACKFILL);
    configuration.set(SEGMENT_CREATION_BACKFILL.toString(), isBackfill);
    LOGGER.info("Is Backfill : {}", configuration.get(SEGMENT_CREATION_BACKFILL.toString()));

    // Create temporary directory
    if (fs.exists(stagingDir)) {
        LOGGER.warn("Found the temp folder, deleting it");
        fs.delete(stagingDir, true);
    }
    fs.mkdirs(stagingDir);
    fs.mkdirs(new Path(stagingDir + "/input/"));

    // Create output directory
    if (fs.exists(new Path(outputDir))) {
        LOGGER.warn("Found the output folder deleting it");
        fs.delete(new Path(outputDir), true);
    }
    fs.mkdirs(new Path(outputDir));

    // Read input files
    List<FileStatus> inputDataFiles = new ArrayList<>();
    for (String input : inputSegmentDir.split(",")) {
        Path inputPathPattern = new Path(input);
        inputDataFiles.addAll(Arrays.asList(fs.listStatus(inputPathPattern)));
    }
    LOGGER.info("size {}", inputDataFiles.size());

    try {
        for (int seqId = 0; seqId < inputDataFiles.size(); ++seqId) {
            FileStatus file = inputDataFiles.get(seqId);
            String completeFilePath = " " + file.getPath().toString() + " " + seqId;
            Path newOutPutFile = new Path((stagingDir + "/input/"
                    + file.getPath().toString().replace('.', '_').replace('/', '_').replace(':', '_')
                    + ".txt"));
            FSDataOutputStream stream = fs.create(newOutPutFile);
            LOGGER.info("wrote {}", completeFilePath);
            stream.writeUTF(completeFilePath);
            stream.flush();
            stream.close();
        }
    } catch (Exception e) {
        LOGGER.error("Exception while reading input files ", e);
    }

    job.setMapperClass(SegmentCreationPhaseMapReduceJob.SegmentCreationMapper.class);

    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        job.getConfiguration().set("mapreduce.job.credentials.binary",
                System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(LongWritable.class);
    job.setMapOutputValueClass(Text.class);

    FileInputFormat.addInputPath(job, new Path(stagingDir + "/input/"));
    FileOutputFormat.setOutputPath(job, new Path(stagingDir + "/output/"));

    job.getConfiguration().setInt(JobContext.NUM_MAPS, inputDataFiles.size());
    job.getConfiguration().set(SEGMENT_CREATION_THIRDEYE_CONFIG.toString(),
            OBJECT_MAPPER.writeValueAsString(thirdeyeConfig));

    job.setMaxReduceAttempts(1);
    job.setMaxMapAttempts(0);
    job.setNumReduceTasks(0);
    for (Object key : props.keySet()) {
        job.getConfiguration().set(key.toString(), props.getProperty(key.toString()));
    }

    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed : " + job);
    }

    LOGGER.info("Moving Segment Tar files from {} to: {}", stagingDir + "/output/segmentTar", outputDir);
    FileStatus[] segmentArr = fs.listStatus(new Path(stagingDir + "/output/segmentTar"));
    for (FileStatus segment : segmentArr) {
        fs.rename(segment.getPath(), new Path(outputDir, segment.getPath().getName()));
    }

    // Delete temporary directory.
    LOGGER.info("Cleanup the working directory.");
    LOGGER.info("Deleting the dir: {}", stagingDir);
    fs.delete(stagingDir, true);

    return job;
}

From source file:com.msd.gin.halyard.tools.HalyardParallelExport.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(newOption("h", null, "Prints this help"));
    options.addOption(newOption("v", null, "Prints version"));
    options.addOption(newOption("s", "source_htable", "Source HBase table with Halyard RDF store"));
    options.addOption(newOption("q", "sparql_query",
            "SPARQL tuple or graph query with use of '" + PARALLEL_SPLIT_FUNCTION_URI + "' function"));
    options.addOption(newOption("t", "target_url",
            "file://<path>/<file_name>{0}.<ext> or hdfs://<path>/<file_name>{0}.<ext> or jdbc:<jdbc_connection>/<table_name>"));
    options.addOption(newOption("p", "property=value", "JDBC connection properties"));
    options.addOption(newOption("l", "driver_classpath", "JDBC driver classpath delimited by ':'"));
    options.addOption(newOption("c", "driver_class", "JDBC driver class name"));
    try {/*  w  ww  .j a  v a 2 s .co m*/
        CommandLine cmd = new PosixParser().parse(options, args);
        if (args.length == 0 || cmd.hasOption('h')) {
            printHelp(options);
            return -1;
        }
        if (cmd.hasOption('v')) {
            Properties p = new Properties();
            try (InputStream in = HalyardExport.class
                    .getResourceAsStream("/META-INF/maven/com.msd.gin.halyard/hbasesail/pom.properties")) {
                if (in != null)
                    p.load(in);
            }
            System.out.println("Halyard Parallel Export version " + p.getProperty("version", "unknown"));
            return 0;
        }
        if (!cmd.getArgList().isEmpty())
            throw new ExportException("Unknown arguments: " + cmd.getArgList().toString());
        for (char c : "sqt".toCharArray()) {
            if (!cmd.hasOption(c))
                throw new ExportException("Missing mandatory option: " + c);
        }
        for (char c : "sqtlc".toCharArray()) {
            String s[] = cmd.getOptionValues(c);
            if (s != null && s.length > 1)
                throw new ExportException("Multiple values for option: " + c);
        }
        String source = cmd.getOptionValue('s');
        String query = cmd.getOptionValue('q');
        if (!query.contains(PARALLEL_SPLIT_FUNCTION_NAME)) {
            throw new ExportException("Parallel export SPARQL query must contain '"
                    + PARALLEL_SPLIT_FUNCTION_URI + "' function.");
        }
        String target = cmd.getOptionValue('t');
        if ((target.startsWith("file:") || target.startsWith("hdfs:")) && !target.contains("{0}")) {
            throw new ExportException(
                    "Parallel export file target must contain '{0}' counter in the file path or name.");
        }
        getConf().set(SOURCE, source);
        getConf().set(QUERY, query);
        getConf().set(TARGET, target);
        String driver = cmd.getOptionValue('c');
        if (driver != null) {
            getConf().set(JDBC_DRIVER, driver);
        }
        String props[] = cmd.getOptionValues('p');
        if (props != null) {
            for (int i = 0; i < props.length; i++) {
                props[i] = Base64.encodeBase64String(props[i].getBytes(UTF8));
            }
            getConf().setStrings(JDBC_PROPERTIES, props);
        }
        TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class,
                AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class,
                HBaseConfiguration.class, AuthenticationProtos.class, Trace.class);
        HBaseConfiguration.addHbaseResources(getConf());
        Job job = Job.getInstance(getConf(), "HalyardParallelExport " + source + " -> " + target);
        String cp = cmd.getOptionValue('l');
        if (cp != null) {
            String jars[] = cp.split(":");
            for (int i = 0; i < jars.length; i++) {
                File f = new File(jars[i]);
                if (!f.isFile())
                    throw new ExportException("Invalid JDBC driver classpath element: " + jars[i]);
                job.addFileToClassPath(new Path(f.toURI()));
                jars[i] = f.getName();
            }
            job.getConfiguration().setStrings(JDBC_CLASSPATH, jars);
        }
        job.setJarByClass(HalyardParallelExport.class);
        job.setMaxMapAttempts(1);
        job.setMapperClass(ParallelExportMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Void.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(IndexedInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            LOG.info("Parallel Export Completed..");
            return 0;
        }
        return -1;
    } catch (RuntimeException exp) {
        System.out.println(exp.getMessage());
        printHelp(options);
        throw exp;
    }

}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testMRRSleepJob() throws IOException, InterruptedException, ClassNotFoundException {
    LOG.info("\n\n\nStarting testMRRSleepJob().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;// w  w w.j ava  2s .c  o m
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(1); // speed up failures
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
            trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

    // FIXME once counters and task progress can be obtained properly
    // TODO use dag client to test counters and task progress?
    // what about completed jobs?
}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testRandomWriter() throws IOException, InterruptedException, ClassNotFoundException {

    LOG.info("\n\n\nStarting testRandomWriter().");
    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;//from   w  w w.jav  a  2 s  .c  om
    }

    RandomTextWriterJob randomWriterJob = new RandomTextWriterJob();
    mrrTezCluster.getConfig().set(RandomTextWriterJob.TOTAL_BYTES, "3072");
    mrrTezCluster.getConfig().set(RandomTextWriterJob.BYTES_PER_MAP, "1024");
    Job job = randomWriterJob.createJob(mrrTezCluster.getConfig());
    Path outputDir = new Path(OUTPUT_ROOT_DIR, "random-output");
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setSpeculativeExecution(false);
    job.setJarByClass(RandomTextWriterJob.class);
    job.setMaxMapAttempts(1); // speed up failures
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());
    Assert.assertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId,
            trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));

    // Make sure there are three files in the output-dir

    RemoteIterator<FileStatus> iterator = FileContext.getFileContext(mrrTezCluster.getConfig())
            .listStatus(outputDir);
    int count = 0;
    while (iterator.hasNext()) {
        FileStatus file = iterator.next();
        if (!file.getPath().getName().equals(FileOutputCommitter.SUCCEEDED_FILE_NAME)) {
            count++;
        }
    }
    Assert.assertEquals("Number of part files is wrong!", 3, count);

}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testFailingJob() throws IOException, InterruptedException, ClassNotFoundException {

    LOG.info("\n\n\nStarting testFailingJob().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;//from   www . ja  va 2  s . c o  m
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(1); // speed up failures
    job.getConfiguration().setBoolean(MRRSleepJob.MAP_FATAL_ERROR, true);
    job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "*");

    job.submit();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertFalse(succeeded);
    Assert.assertEquals(JobStatus.State.FAILED, job.getJobState());

    // FIXME once counters and task progress can be obtained properly
    // TODO verify failed task diagnostics
}

From source file:org.apache.tez.mapreduce.TestMRRJobs.java

License:Apache License

@Test(timeout = 60000)
public void testFailingAttempt() throws IOException, InterruptedException, ClassNotFoundException {

    LOG.info("\n\n\nStarting testFailingAttempt().");

    if (!(new File(MiniTezCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniTezCluster.APPJAR + " not found. Not running test.");
        return;//w w w.  j a v  a 2  s .  c  om
    }

    Configuration sleepConf = new Configuration(mrrTezCluster.getConfig());

    MRRSleepJob sleepJob = new MRRSleepJob();
    sleepJob.setConf(sleepConf);

    Job job = sleepJob.createJob(1, 1, 1, 1, 1, 1, 1, 1, 1, 1);

    job.setJarByClass(MRRSleepJob.class);
    job.setMaxMapAttempts(3); // speed up failures
    job.getConfiguration().setBoolean(MRRSleepJob.MAP_THROW_ERROR, true);
    job.getConfiguration().set(MRRSleepJob.MAP_ERROR_TASK_IDS, "0");

    job.submit();
    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);
    Assert.assertEquals(JobStatus.State.SUCCEEDED, job.getJobState());

    // FIXME once counters and task progress can be obtained properly
    // TODO verify failed task diagnostics
}