Example usage for org.apache.hadoop.mapred JobConf setWorkingDirectory

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setWorkingDirectory.

Prototype

public void setWorkingDirectory(Path dir)

Source Link

Document

Set the current working directory for the default file system.

Usage

From source file:io.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }/*from w w  w .  j  av a 2 s.  c  om*/
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0);// Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.ambari.servicemonitor.unit.BaseLocalClusterTestCase.java

License:Apache License

/**
 * Bond the job configuration to the directory set up for this class for data
 * @param tc test case/* ww w.  jav a  2  s.  c  o  m*/
 * @param testname test name
 * @param jobConf job conf to configure
 */
protected void bondDataOutputDir(BaseLocalClusterTestCase tc, String testname, JobConf jobConf) {
    Path datadir = new Path(getDataDir(tc, testname));
    jobConf.setWorkingDirectory(new Path(datadir, "working"));
    jobConf.set(HadoopKeys.MAPRED_OUTPUT_DIR, new Path(datadir, "output").toString());
}

From source file:org.apache.druid.indexer.updater.HadoopConverterJob.java

License:Apache License

public List<DataSegment> run() throws IOException {
    final JobConf jobConf = new JobConf();
    jobConf.setKeepFailedTaskFiles(false);
    for (Map.Entry<String, String> entry : converterConfig.getHadoopProperties().entrySet()) {
        jobConf.set(entry.getKey(), entry.getValue(), "converterConfig.getHadoopProperties()");
    }//from w  ww.j  a  v a2  s.  c o  m
    final List<DataSegment> segments = converterConfig.getSegments();
    if (segments.isEmpty()) {
        throw new IAE("No segments found for datasource [%s]", converterConfig.getDataSource());
    }
    converterConfigIntoConfiguration(converterConfig, segments, jobConf);

    jobConf.setNumReduceTasks(0); // Map only. Number of map tasks determined by input format
    jobConf.setWorkingDirectory(new Path(converterConfig.getDistributedSuccessCache()));

    setJobName(jobConf, segments);

    if (converterConfig.getJobPriority() != null) {
        jobConf.setJobPriority(JobPriority.valueOf(converterConfig.getJobPriority()));
    }

    final Job job = Job.getInstance(jobConf);

    job.setInputFormatClass(ConfigInputFormat.class);
    job.setMapperClass(ConvertingMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setMapSpeculativeExecution(false);
    job.setOutputFormatClass(ConvertingOutputFormat.class);

    JobHelper.setupClasspath(JobHelper.distributedClassPath(jobConf.getWorkingDirectory()),
            JobHelper.distributedClassPath(getJobClassPathDir(job.getJobName(), jobConf.getWorkingDirectory())),
            job);

    Throwable throwable = null;
    try {
        job.submit();
        log.info("Job %s submitted, status available at %s", job.getJobName(), job.getTrackingURL());
        final boolean success = job.waitForCompletion(true);
        if (!success) {
            final TaskReport[] reports = job.getTaskReports(TaskType.MAP);
            if (reports != null) {
                for (final TaskReport report : reports) {
                    log.error("Error in task [%s] : %s", report.getTaskId(),
                            Arrays.toString(report.getDiagnostics()));
                }
            }
            return null;
        }
        try {
            loadedBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_LOADED).getValue();
            writtenBytes = job.getCounters().findCounter(COUNTER_GROUP, COUNTER_WRITTEN).getValue();
        } catch (IOException ex) {
            log.error(ex, "Could not fetch counters");
        }
        final JobID jobID = job.getJobID();

        final Path jobDir = getJobPath(jobID, job.getWorkingDirectory());
        final FileSystem fs = jobDir.getFileSystem(job.getConfiguration());
        final RemoteIterator<LocatedFileStatus> it = fs.listFiles(jobDir, true);
        final List<Path> goodPaths = new ArrayList<>();
        while (it.hasNext()) {
            final LocatedFileStatus locatedFileStatus = it.next();
            if (locatedFileStatus.isFile()) {
                final Path myPath = locatedFileStatus.getPath();
                if (ConvertingOutputFormat.DATA_SUCCESS_KEY.equals(myPath.getName())) {
                    goodPaths.add(new Path(myPath.getParent(), ConvertingOutputFormat.DATA_FILE_KEY));
                }
            }
        }
        if (goodPaths.isEmpty()) {
            log.warn("No good data found at [%s]", jobDir);
            return null;
        }
        final List<DataSegment> returnList = ImmutableList
                .copyOf(Lists.transform(goodPaths, new Function<Path, DataSegment>() {
                    @Nullable
                    @Override
                    public DataSegment apply(final Path input) {
                        try {
                            if (!fs.exists(input)) {
                                throw new ISE("Somehow [%s] was found but [%s] is missing at [%s]",
                                        ConvertingOutputFormat.DATA_SUCCESS_KEY,
                                        ConvertingOutputFormat.DATA_FILE_KEY, jobDir);
                            }
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                        try (final InputStream stream = fs.open(input)) {
                            return HadoopDruidConverterConfig.jsonMapper.readValue(stream, DataSegment.class);
                        } catch (final IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                }));
        if (returnList.size() == segments.size()) {
            return returnList;
        } else {
            throw new ISE(
                    "Tasks reported success but result length did not match! Expected %d found %d at path [%s]",
                    segments.size(), returnList.size(), jobDir);
        }
    } catch (InterruptedException | ClassNotFoundException e) {
        RuntimeException exception = Throwables.propagate(e);
        throwable = exception;
        throw exception;
    } catch (Throwable t) {
        throwable = t;
        throw t;
    } finally {
        try {
            cleanup(job);
        } catch (IOException e) {
            if (throwable != null) {
                throwable.addSuppressed(e);
            } else {
                log.error(e, "Could not clean up job [%s]", job.getJobID());
            }
        }
    }
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "hbaseBulkOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();/*from  ww  w.j av  a2s .  c  om*/
    Path interPath = new Path(methodTestDir, "inter");
    //create job
    JobConf job = new JobConf(conf);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWriteOldMapper.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseBulkOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath);
    job.setOutputCommitter(HBaseBulkOutputCommitter.class);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job).exists(interPath));
}

From source file:org.apache.hcatalog.hbase.TestHBaseDirectOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:ONE,spanish:DOS", "3,english:ONE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/*from  ww  w  . j a v  a2  s.c  o m*/
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseDirectOutputFormat.class);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.apache.hive.hcatalog.hbase.TestHiveHBaseTableOutputFormat.java

License:Apache License

@Test
public void directOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "directOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);
    conf.set(HCatConstants.HCAT_KEY_HIVE_CONF, HCatUtil.serialize(allConf.getAllProperties()));

    //create table
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:ONE,spanish:UNO", "2,english:TWO,spanish:DOS",
            "3,english:THREE,spanish:TRES" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    getFileSystem().mkdirs(inputPath);/*  w w  w  . ja v a  2  s.c o  m*/
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();

    //create job
    JobConf job = new JobConf(conf);
    job.setJobName(testName);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWrite.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);
    // why we need to set all the 3 properties??
    job.setOutputFormat(HiveHBaseTableOutputFormat.class);
    job.set(HBaseSerDe.HBASE_TABLE_NAME, tableName);
    job.set(TableOutputFormat.OUTPUT_TABLE, tableName);
    job.set(HCatConstants.HCAT_DEFAULT_TOPIC_PREFIX + ".hbase.mapreduce.outputTableName", tableName);

    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } catch (Exception ex) {
        throw new IOException("Serialization error " + ex.getMessage(), ex);
    }

    job.setMapOutputKeyClass(BytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);
    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);
    job.setNumReduceTasks(0);
    System.getProperty("java.classpath");
    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    assertEquals(data.length, index);
}

From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java

License:Apache License

public Result execute(Result result, int arg1) throws KettleException {
    result.setNrErrors(0);//  w w  w. j  a va 2  s .co m

    Log4jFileAppender appender = null;
    String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$

    String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution);
    hadoopDistro = environmentSubstitute(hadoopDistro);
    if (Const.isEmpty(hadoopDistro)) {
        hadoopDistro = "generic";
    }

    try {
        appender = LogWriter.createFileAppender(logFileName, true, false);
        LogWriter.getInstance().addAppender(appender);
        log.setLogLevel(parentJob.getLogLevel());
    } catch (Exception e) {
        logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$
                e.toString()));
        logError(Const.getStackTracker(e));
    }

    try {
        URL resolvedJarUrl = null;
        String jarUrlS = environmentSubstitute(jarUrl);
        if (jarUrlS.indexOf("://") == -1) {
            // default to file://
            File jarFile = new File(jarUrlS);
            resolvedJarUrl = jarFile.toURI().toURL();
        } else {
            resolvedJarUrl = new URL(jarUrlS);
        }

        final String cmdLineArgsS = environmentSubstitute(cmdLineArgs);

        if (log.isDetailed())
            logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar",
                    resolvedJarUrl.toExternalForm()));

        if (isSimple) {
            /*      final AtomicInteger taskCount = new AtomicInteger(0);
                  final AtomicInteger successCount = new AtomicInteger(0);
                  final AtomicInteger failedCount = new AtomicInteger(0); */

            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode"));
            List<Class<?>> classesWithMains = JarUtility
                    .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader());
            for (final Class<?> clazz : classesWithMains) {
                Runnable r = new Runnable() {
                    public void run() {
                        try {
                            final ClassLoader cl = Thread.currentThread().getContextClassLoader();
                            try {
                                //                  taskCount.incrementAndGet();
                                Thread.currentThread().setContextClassLoader(clazz.getClassLoader());
                                Method mainMethod = clazz.getMethod("main", new Class[] { String[].class });
                                Object[] args = (cmdLineArgsS != null)
                                        ? new Object[] { cmdLineArgsS.split(" ") }
                                        : new Object[0];
                                mainMethod.invoke(null, args);
                            } finally {
                                Thread.currentThread().setContextClassLoader(cl);
                                //                  successCount.incrementAndGet();
                                //                  taskCount.decrementAndGet();
                            }
                        } catch (Throwable ignored) {
                            // skip, try the next one
                            //                logError(ignored.getMessage());
                            //                failedCount.incrementAndGet();
                            ignored.printStackTrace();
                        }
                    }
                };
                Thread t = new Thread(r);
                t.start();
            }

            // uncomment to implement blocking
            /* if (blocking) {
              while (taskCount.get() > 0 && !parentJob.isStopped()) {
                Thread.sleep(1000);
              }
                    
              if (!parentJob.isStopped()) {
                result.setResult(successCount.get() > 0);
                result.setNrErrors((successCount.get() > 0) ? 0 : 1);
              } else {
                // we can't really know at this stage if 
                // the hadoop job will finish successfully 
                // because we have to stop now
                result.setResult(true); // look on the bright side of life :-)...
                result.setNrErrors(0);
              }
            } else { */
            // non-blocking - just set success equal to no failures arising
            // from invocation
            //          result.setResult(failedCount.get() == 0);
            //          result.setNrErrors(failedCount.get());
            result.setResult(true);
            result.setNrErrors(0);
            /* } */
        } else {
            if (log.isDetailed())
                logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode"));

            URL[] urls = new URL[] { resolvedJarUrl };
            URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader());

            JobConf conf = new JobConf();
            String hadoopJobNameS = environmentSubstitute(hadoopJobName);
            conf.setJobName(hadoopJobNameS);

            String outputKeyClassS = environmentSubstitute(outputKeyClass);
            conf.setOutputKeyClass(loader.loadClass(outputKeyClassS));
            String outputValueClassS = environmentSubstitute(outputValueClass);
            conf.setOutputValueClass(loader.loadClass(outputValueClassS));

            if (mapperClass != null) {
                String mapperClassS = environmentSubstitute(mapperClass);
                Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS);
                conf.setMapperClass(mapper);
            }
            if (combinerClass != null) {
                String combinerClassS = environmentSubstitute(combinerClass);
                Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS);
                conf.setCombinerClass(combiner);
            }
            if (reducerClass != null) {
                String reducerClassS = environmentSubstitute(reducerClass);
                Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS);
                conf.setReducerClass(reducer);
            }

            if (inputFormatClass != null) {
                String inputFormatClassS = environmentSubstitute(inputFormatClass);
                Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader
                        .loadClass(inputFormatClassS);
                conf.setInputFormat(inputFormat);
            }
            if (outputFormatClass != null) {
                String outputFormatClassS = environmentSubstitute(outputFormatClass);
                Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader
                        .loadClass(outputFormatClassS);
                conf.setOutputFormat(outputFormat);
            }

            String hdfsHostnameS = environmentSubstitute(hdfsHostname);
            String hdfsPortS = environmentSubstitute(hdfsPort);
            String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname);
            String jobTrackerPortS = environmentSubstitute(jobTrackerPort);

            // See if we can auto detect the distribution first
            HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer();

            if (configurer == null) {
                // go with what has been selected by the user
                configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro);

                // if the user-specified distribution is detectable, make sure it is still
                // the current distribution!
                if (configurer != null && configurer.isDetectable()) {
                    if (!configurer.isAvailable()) {
                        throw new KettleException(BaseMessages.getString(PKG,
                                "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent",
                                configurer.distributionName()));
                    }
                }
            }
            if (configurer == null) {
                throw new KettleException(BaseMessages.getString(PKG,
                        "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro));
            }
            logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage",
                    configurer.distributionName()));

            List<String> configMessages = new ArrayList<String>();
            configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf,
                    configMessages);
            for (String m : configMessages) {
                logBasic(m);
            }

            String inputPathS = environmentSubstitute(inputPath);
            String[] inputPathParts = inputPathS.split(",");
            List<Path> paths = new ArrayList<Path>();
            for (String path : inputPathParts) {
                paths.add(new Path(configurer.getFilesystemURL() + path));
            }
            Path[] finalPaths = paths.toArray(new Path[paths.size()]);

            //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS));
            FileInputFormat.setInputPaths(conf, finalPaths);
            String outputPathS = environmentSubstitute(outputPath);
            FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS));

            // process user defined values
            for (UserDefinedItem item : userDefined) {
                if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null
                        && !"".equals(item.getValue())) {
                    String nameS = environmentSubstitute(item.getName());
                    String valueS = environmentSubstitute(item.getValue());
                    conf.set(nameS, valueS);
                }
            }

            String workingDirectoryS = environmentSubstitute(workingDirectory);
            conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS));
            conf.setJar(jarUrl);

            String numMapTasksS = environmentSubstitute(numMapTasks);
            String numReduceTasksS = environmentSubstitute(numReduceTasks);
            int numM = 1;
            try {
                numM = Integer.parseInt(numMapTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num"
                        + "map tasks to 1");
            }
            int numR = 1;
            try {
                numR = Integer.parseInt(numReduceTasksS);
            } catch (NumberFormatException e) {
                logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num"
                        + "reduce tasks to 1");
            }

            conf.setNumMapTasks(numM);
            conf.setNumReduceTasks(numR);

            JobClient jobClient = new JobClient(conf);
            RunningJob runningJob = jobClient.submitJob(conf);

            String loggingIntervalS = environmentSubstitute(loggingInterval);
            int logIntv = 60;
            try {
                logIntv = Integer.parseInt(loggingIntervalS);
            } catch (NumberFormatException e) {
                logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting "
                        + "logging interval to 60");
            }
            if (blocking) {
                try {
                    int taskCompletionEventIndex = 0;
                    while (!parentJob.isStopped() && !runningJob.isComplete()) {
                        if (logIntv >= 1) {
                            printJobStatus(runningJob);
                            taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex);
                            Thread.sleep(logIntv * 1000);
                        } else {
                            Thread.sleep(60000);
                        }
                    }

                    if (parentJob.isStopped() && !runningJob.isComplete()) {
                        // We must stop the job running on Hadoop
                        runningJob.killJob();
                        // Indicate this job entry did not complete
                        result.setResult(false);
                    }

                    printJobStatus(runningJob);
                    // Log any messages we may have missed while polling
                    logTaskMessages(runningJob, taskCompletionEventIndex);
                } catch (InterruptedException ie) {
                    logError(ie.getMessage(), ie);
                }

                // Entry is successful if the MR job is successful overall
                result.setResult(runningJob.isSuccessful());
            }

        }
    } catch (Throwable t) {
        t.printStackTrace();
        result.setStopped(true);
        result.setNrErrors(1);
        result.setResult(false);
        logError(t.getMessage(), t);
    }

    if (appender != null) {
        LogWriter.getInstance().removeAppender(appender);
        appender.close();

        ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(),
                parentJob.getJobname(), getName());
        result.getResultFiles().put(resultFile.getFile().toString(), resultFile);
    }

    return result;
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile,
        String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort)
        throws IOException, KettleException {

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();//from  ww w.  j  a v  a2 s.c  om

    // Register Map/Reduce Input and Map/Reduce Output plugin steps
    PluginMainClassType mainClassTypesAnnotation = StepPluginType.class
            .getAnnotation(PluginMainClassType.class);

    Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>();
    inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName());
    PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class,
            mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input",
            "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap,
            new ArrayList<String>(), null, null);
    PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin);

    Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>();
    outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName());
    PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class,
            mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output",
            "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap,
            new ArrayList<String>(), null, null);
    PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin);

    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();

    TransMeta transMeta = null;
    TransConfiguration transConfig = null;

    if (mapperTransformationFile != null) {
        conf.setMapRunnerClass(PentahoMapRunnable.class);
        transMeta = new TransMeta(mapperTransformationFile);
        transConfig = new TransConfiguration(transMeta, transExecConfig);
        conf.set("transformation-map-xml", transConfig.getXML());
        conf.set("transformation-map-input-stepname", "Injector");
        conf.set("transformation-map-output-stepname", "Output");
    }

    if (combinerTransformationFile != null) {
        conf.setCombinerClass(GenericTransCombiner.class);
        transMeta = new TransMeta(combinerTransformationFile);
        transConfig = new TransConfiguration(transMeta, transExecConfig);
        conf.set("transformation-combiner-xml", transConfig.getXML());
        conf.set("transformation-combiner-input-stepname", "Injector");
        conf.set("transformation-combiner-output-stepname", "Output");
    }

    if (reducerTransformationFile != null) {
        conf.setReducerClass((Class<? extends Reducer>) GenericTransReduce.class);
        transMeta = new TransMeta(reducerTransformationFile);
        transConfig = new TransConfiguration(transMeta, transExecConfig);
        conf.set("transformation-reduce-xml", transConfig.getXML());
        conf.set("transformation-reduce-input-stepname", "Injector");
        conf.set("transformation-reduce-output-stepname", "Output");
    }

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path("/"));
    FileOutputFormat.setOutputPath(conf, new Path("/"));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    return conf;
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

public static JobConf createJobConf(String mapperTransformationFile, String combinerTransformationFile,
        String reducerTransformationFile, String hostname, String hdfsPort, String trackerPort)
        throws IOException, KettleException {

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    KettleEnvironment.init();/*from   w ww  . j a  v a  2  s.com*/

    // Register Map/Reduce Input and Map/Reduce Output plugin steps
    PluginMainClassType mainClassTypesAnnotation = StepPluginType.class
            .getAnnotation(PluginMainClassType.class);

    Map<Class<?>, String> inputClassMap = new HashMap<Class<?>, String>();
    inputClassMap.put(mainClassTypesAnnotation.value(), HadoopEnterMeta.class.getName());
    PluginInterface inputStepPlugin = new Plugin(new String[] { "HadoopEnterPlugin" }, StepPluginType.class,
            mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Input",
            "Enter a Hadoop Mapper or Reducer transformation", "MRI.png", false, false, inputClassMap,
            new ArrayList<String>(), null, null);
    PluginRegistry.getInstance().registerPlugin(StepPluginType.class, inputStepPlugin);

    Map<Class<?>, String> outputClassMap = new HashMap<Class<?>, String>();
    outputClassMap.put(mainClassTypesAnnotation.value(), HadoopExitMeta.class.getName());
    PluginInterface outputStepPlugin = new Plugin(new String[] { "HadoopExitPlugin" }, StepPluginType.class,
            mainClassTypesAnnotation.value(), "Hadoop", "MapReduce Output",
            "Exit a Hadoop Mapper or Reducer transformation", "MRO.png", false, false, outputClassMap,
            new ArrayList<String>(), null, null);
    PluginRegistry.getInstance().registerPlugin(StepPluginType.class, outputStepPlugin);

    TransExecutionConfiguration transExecConfig = new TransExecutionConfiguration();

    TransMeta transMeta = null;
    TransConfiguration transConfig = null;

    if (mapperTransformationFile != null) {
        conf.setMapRunnerClass(PentahoMapRunnable.class);
        transMeta = new TransMeta(mapperTransformationFile);
        transConfig = new TransConfiguration(transMeta, transExecConfig);
        conf.set("transformation-map-xml", transConfig.getXML());
        conf.set("transformation-map-input-stepname", "Injector");
        conf.set("transformation-map-output-stepname", "Output");
    }

    if (combinerTransformationFile != null) {
        conf.setCombinerClass(GenericTransCombiner.class);
        transMeta = new TransMeta(combinerTransformationFile);
        transConfig = new TransConfiguration(transMeta, transExecConfig);
        conf.set("transformation-combiner-xml", transConfig.getXML());
        conf.set("transformation-combiner-input-stepname", "Injector");
        conf.set("transformation-combiner-output-stepname", "Output");
    }

    if (reducerTransformationFile != null) {
        conf.setReducerClass(GenericTransReduce.class);
        transMeta = new TransMeta(reducerTransformationFile);
        transConfig = new TransConfiguration(transMeta, transExecConfig);
        conf.set("transformation-reduce-xml", transConfig.getXML());
        conf.set("transformation-reduce-input-stepname", "Injector");
        conf.set("transformation-reduce-output-stepname", "Output");
    }

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./dist/pentaho-big-data-plugin-TRUNK-SNAPSHOT.jar");

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path("/"));
    FileOutputFormat.setOutputPath(conf, new Path("/"));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJar(jar.toURI().toURL().toExternalForm());
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    return conf;
}

From source file:org.pentaho.hadoop.mapreduce.test.TestSubmitMapReduceJob.java

License:Open Source License

@Test
public void submitJob() throws Exception {

    String[] args = { "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/input",
            "hdfs://" + hostname + ":" + hdfsPort + "/junit/wordcount/output" };

    JobConf conf = new JobConf();
    conf.setJobName("wordcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(IntWritable.class);

    File jar = new File("./test-res/pentaho-mapreduce-sample.jar");

    URLClassLoader loader = new URLClassLoader(new URL[] { jar.toURI().toURL() });

    conf.setMapperClass(/*from  w ww  .  j a va 2s.c o m*/
            (Class<? extends Mapper>) loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Map"));
    conf.setCombinerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));
    conf.setReducerClass((Class<? extends Reducer>) loader
            .loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount$Reduce"));

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    conf.set("fs.default.name", "hdfs://" + hostname + ":" + hdfsPort);
    conf.set("mapred.job.tracker", hostname + ":" + trackerPort);

    conf.setJarByClass(loader.loadClass("org.pentaho.hadoop.mapreduce.sample.MRWordCount"));
    conf.setWorkingDirectory(new Path("/tmp/wordcount"));

    JobClient jobClient = new JobClient(conf);
    ClusterStatus status = jobClient.getClusterStatus();
    assertEquals(State.RUNNING, status.getJobTrackerState());

    RunningJob runningJob = jobClient.submitJob(conf);
    System.out.print("Running " + runningJob.getJobName() + "");
    while (!runningJob.isComplete()) {
        System.out.print(".");
        Thread.sleep(500);
    }
    System.out.println();
    System.out.println("Finished " + runningJob.getJobName() + ".");

    FileObject file = fsManager.resolveFile(buildHDFSURL("/junit/wordcount/output/part-00000"));
    String output = IOUtils.toString(file.getContent().getInputStream());
    assertEquals("Bye\t1\nGoodbye\t1\nHadoop\t2\nHello\t2\nWorld\t2\n", output);
}