Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf()

Source Link

Document

Construct a map/reduce job configuration.

Usage

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static JobConf createJobConf(Map<Object, Object> properties, JobConf defaultJobconf) {
    JobConf jobConf = defaultJobconf == null ? new JobConf() : copyJobConf(defaultJobconf);

    if (properties == null)
        return jobConf;

    return copyConfiguration(properties, jobConf);
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static Thread getHDFSShutdownHook() {
    Exception caughtException;/*  w w  w  .  ja v  a 2 s. c  o  m*/

    try {
        // we must init the FS so the finalizer is registered
        FileSystem.getLocal(new JobConf());

        Field field = FileSystem.class.getDeclaredField("clientFinalizer");
        field.setAccessible(true);

        Thread finalizer = (Thread) field.get(null);

        if (finalizer != null)
            Runtime.getRuntime().removeShutdownHook(finalizer);

        return finalizer;
    } catch (NoSuchFieldException exception) {
        caughtException = exception;
    } catch (IllegalAccessException exception) {
        caughtException = exception;
    } catch (IOException exception) {
        caughtException = exception;
    }

    LOG.debug("unable to find and remove client hdfs shutdown hook, received exception: {}",
            caughtException.getClass().getName());

    return null;
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static Configuration removePropertiesFrom(Configuration jobConf, String... keys) {
    Map<Object, Object> properties = createProperties(jobConf);

    for (String key : keys)
        properties.remove(key);// w w  w.  j  a  v a2  s  . c  o m

    return copyConfiguration(properties, new JobConf());
}

From source file:cascading.flow.MapReduceFlowStep.java

License:Open Source License

@Override
protected JobConf getJobConf(JobConf parentConf) throws IOException {
    // allow to delete
    sink.sinkInit(new JobConf());

    return jobConf;
}

From source file:cascading.hive.HivePartitionDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = new Properties();
    AppProps.setApplicationName(properties, "cascading hive partitioning demo");

    JobConf jobConf = new JobConf();
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log"));

    String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" };
    String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" };
    String[] partitionKeys = new String[] { "region" };

    HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb", "mytable", columnNames,
            columnTypes, partitionKeys, "\t");

    HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme());

    Tap partitionTap = new HivePartitionTap(hiveTap);

    Fields allFields = new Fields(columnNames);

    Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log");

    class Echo extends BaseOperation implements Function {
        public Echo(Fields fieldDeclaration) {
            super(2, fieldDeclaration);
        }/*  w ww  .  j  a v  a 2 s. c o m*/

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
            TupleEntry argument = functionCall.getArguments();
            functionCall.getOutputCollector().add(argument.getTuple());
        }
    }

    Pipe pipe = new Each(" import ", allFields, new Echo(allFields), Fields.RESULTS);

    Flow flow = new HadoopFlowConnector().connect(input, partitionTap, pipe);

    flow.complete();

    Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");

    Connection con = DriverManager.getConnection("jdbc:hive://", "", "");
    Statement stmt = con.createStatement();

    ResultSet rs = stmt.executeQuery("select * from mydb.mytable where region = 'ASIA' ");

    String[] names = partitionedDescriptor.getColumnNames();
    System.out.println("----------------------Hive JDBC--------------------------");
    while (rs.next()) {
        StringBuffer buf = new StringBuffer("JDBC>>> ");
        for (int i = 0; i < names.length; i++) {
            String name = names[i];
            buf.append(name).append("=").append(rs.getObject(i + 1)).append(", ");
        }
        System.out.println(buf.toString());
    }
    System.out.println("---------------------------------------------------------");
    stmt.close();
    con.close();

    // do the same as the JDBC above, but in Cascading.

    class RegionFilter extends BaseOperation implements Filter {
        final String region;

        public RegionFilter(String region) {
            this.region = region;
        }

        @Override
        public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) {
            if (filterCall.getArguments().getString("region").equals(this.region))
                return false;
            return true;
        }
    }

    Tap requestsInAsiaSink = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/requests-from-asia",
            SinkMode.REPLACE);

    Pipe headPipe = new Each("requests from ASIA", allFields, new RegionFilter("ASIA"));

    Flow headFlow = new HadoopFlowConnector().connect(partitionTap, requestsInAsiaSink, headPipe);

    headFlow.complete();

    TupleEntryIterator tupleEntryIterator = requestsInAsiaSink.openForRead(headFlow.getFlowProcess());

    while (tupleEntryIterator.hasNext()) {
        TupleEntry tupleEntry = tupleEntryIterator.next();
        System.out.println("Cascading>>> " + tupleEntry);
    }
    tupleEntryIterator.close();
}

From source file:cascading.hive.HiveViewDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = new Properties();
    AppProps.setApplicationName(properties, "cascading hive partitioning demo");

    JobConf jobConf = new JobConf();

    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log"));

    String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" };
    String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" };
    String[] partitionKeys = new String[] { "region" };

    HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb2", "mytable2", columnNames,
            columnTypes, partitionKeys, "\t");

    HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme());

    Tap outputTap = new HivePartitionTap(hiveTap);
    class Echo extends BaseOperation implements Function {
        public Echo(Fields fieldDeclaration) {
            super(2, fieldDeclaration);
        }//from   ww  w.ja  v  a2  s .c  om

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
            TupleEntry argument = functionCall.getArguments();
            functionCall.getOutputCollector().add(argument.getTuple());
        }
    }

    Fields allFields = new Fields(columnNames);

    Pipe pipe = new Each(" echo ", allFields, new Echo(allFields), Fields.RESULTS);

    Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log");

    Flow flow = new HadoopFlowConnector().connect(input, outputTap, pipe);

    flow.complete();

    String viewSelect = "select distinct customer from mydb2.mytable2 where region = 'ASIA'";
    String viewDef = "create or replace view customers_in_asia as " + viewSelect;
    HiveViewAnalyzer analyzer = new HiveViewAnalyzer();
    Collection<Tap> inputs = analyzer.asTaps(viewSelect);

    HiveFlow viewflow = new HiveFlow("create view", viewDef, inputs);
    viewflow.complete();

    Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");

    Connection con = DriverManager.getConnection("jdbc:hive://", "", "");
    Statement stmt = con.createStatement();

    ResultSet rs = stmt.executeQuery("select * from customers_in_asia ");

    System.out.println("----------------------Hive JDBC--------------------------");
    while (rs.next())
        System.out.println("customer=" + rs.getString(1));

    System.out.println("---------------------------------------------------------");
    stmt.close();
    con.close();

}

From source file:cascading.load.Main.java

License:Open Source License

private void printSummary(CascadeStats stats) throws IOException {
    stats.captureDetail();//ww w .  j  a  v  a  2s .  co m

    OutputStream outputStream = options.hasStatsRoot() ? new ByteArrayOutputStream() : System.out;
    PrintWriter writer = new PrintWriter(outputStream);

    writer.println(options);

    StatsPrinter.printCascadeStats(writer, stats);

    if (options.hasStatsRoot()) {
        String[] lines = outputStream.toString().split("\n");

        Hfs statsTap = new Hfs(new TextLine(), options.getStatsRoot(), SinkMode.REPLACE);

        TupleEntryCollector tapWriter = statsTap.openForWrite(new JobConf());

        for (String line : lines)
            tapWriter.add(new Tuple(line));

        tapWriter.close();
    }
}

From source file:cascading.platform.hadoop.HadoopPlatform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;// w w  w .  j av  a2  s  .  co  m

    if (!isUseCluster()) {
        LOG.info("not using cluster");
        configuration = new JobConf();

        // enforce the local file system in local mode
        configuration.set("fs.default.name", "file:///");
        configuration.set("mapred.job.tracker", "local");
        configuration.set("mapreduce.jobtracker.staging.root.dir",
                System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

        if (Util.isEmpty(stagingDir))
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "cascading-hadoop/build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "cascading-hadoop/build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

        JobConf conf = new JobConf();

        if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
            LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
            configuration = conf;

            ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

            if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                configuration.set("fs.default.name", System.getProperty("fs.default.name"));
            }

            if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
            }

            configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
            fileSys = FileSystem.get(configuration);
        } else {
            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();
            mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1, null, null, conf);

            configuration = mr.createJobConf();
        }

        //      jobConf.set( "mapred.map.max.attempts", "1" );
        //      jobConf.set( "mapred.reduce.max.attempts", "1" );
        configuration.set("mapred.child.java.opts", "-Xmx512m");
        configuration.setInt("mapred.job.reuse.jvm.num.tasks", -1);
        configuration.setInt("jobclient.completion.poll.interval", 50);
        configuration.setInt("jobclient.progress.monitor.poll.interval", 50);
        ((JobConf) configuration).setMapSpeculativeExecution(false);
        ((JobConf) configuration).setReduceSpeculativeExecution(false);
    }

    ((JobConf) configuration).setNumMapTasks(numMappers);
    ((JobConf) configuration).setNumReduceTasks(numReducers);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    HadoopPlanner.copyProperties((JobConf) configuration, globalProperties); // copy any external properties

    HadoopPlanner.copyJobConf(properties, (JobConf) configuration); // put all properties on the jobconf
}

From source file:cascading.platform.hadoop2.Hadoop2MR1Platform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;//w w  w  .j a va 2  s.c  o m

    if (!isUseCluster()) {
        LOG.info("not using cluster");
        configuration = new JobConf();

        // enforce settings to make local mode behave the same across distributions
        configuration.set("fs.defaultFS", "file:///");
        configuration.set("mapreduce.framework.name", "local");
        configuration.set("mapreduce.jobtracker.staging.root.dir",
                System.getProperty("user.dir") + "/" + "build/tmp/cascading/staging");

        String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

        if (Util.isEmpty(stagingDir))
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

        JobConf conf = new JobConf();

        if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
            LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
            configuration = conf;

            ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

            if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                configuration.set("fs.default.name", System.getProperty("fs.default.name"));
            }

            if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
            }

            if (!Util.isEmpty(System.getProperty("fs.defaultFS"))) {
                LOG.info("using {}={}", "fs.defaultFS", System.getProperty("fs.defaultFS"));
                configuration.set("fs.defaultFS", System.getProperty("fs.defaultFS"));
            }

            if (!Util.isEmpty(System.getProperty("yarn.resourcemanager.address"))) {
                LOG.info("using {}={}", "yarn.resourcemanager.address",
                        System.getProperty("yarn.resourcemanager.address"));
                configuration.set("yarn.resourcemanager.address",
                        System.getProperty("yarn.resourcemanager.address"));
            }

            if (!Util.isEmpty(System.getProperty("mapreduce.jobhistory.address"))) {
                LOG.info("using {}={}", "mapreduce.jobhistory.address",
                        System.getProperty("mapreduce.jobhistory.address"));
                configuration.set("mapreduce.jobhistory.address",
                        System.getProperty("mapreduce.jobhistory.address"));
            }

            configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
            configuration.set("mapreduce.framework.name", "yarn");

            fileSys = FileSystem.get(configuration);
        } else {
            conf.setBoolean("yarn.is.minicluster", true);
            //      conf.setInt( "yarn.nodemanager.delete.debug-delay-sec", -1 );
            //      conf.set( "yarn.scheduler.capacity.root.queues", "default" );
            //      conf.set( "yarn.scheduler.capacity.root.default.capacity", "100" );
            // disable blacklisting hosts not to fail localhost during unit tests
            conf.setBoolean("yarn.app.mapreduce.am.job.node-blacklisting.enable", false);

            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();

            FileSystem.setDefaultUri(conf, fileSys.getUri());

            mr = MiniMRClientClusterFactory.create(this.getClass(), 4, conf);

            configuration = mr.getConfig();
        }

        configuration.set("mapred.child.java.opts", "-Xmx512m");
        configuration.setInt("mapreduce.job.jvm.numtasks", -1);
        configuration.setInt("mapreduce.client.completion.pollinterval", 50);
        configuration.setInt("mapreduce.client.progressmonitor.pollinterval", 50);
        configuration.setBoolean("mapreduce.map.speculative", false);
        configuration.setBoolean("mapreduce.reduce.speculative", false);
    }

    configuration.setInt("mapreduce.job.maps", numMappers);
    configuration.setInt("mapreduce.job.reduces", numReducers);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    Hadoop2MR1Planner.copyProperties(configuration, globalProperties); // copy any external properties

    Hadoop2MR1Planner.copyConfiguration(properties, configuration); // put all properties on the jobconf
}

From source file:cascading.plumber.grids.AbstractGridTest.java

License:Apache License

@Test
public void shouldCopyJobConfIntoProperties() {
    JobConf jobConf = new JobConf();
    jobConf.set(KEY, VALUE);//from   ww  w  .  ja  v  a2 s .  c o m
    new MockGrid().createFlowConnector(jobConf);
}