Example usage for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:ca.uwaterloo.cs.bigdata2017w.assignment4.BuildPersonalizedPageRankRecords.java

License:Apache License

/**
 * Runs this tool./*from   www .j  ava  2  s .c  o m*/
 */
@SuppressWarnings({ "static-access" })
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT));
    options.addOption(OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT));
    options.addOption(
            OptionBuilder.withArgName("num").hasArg().withDescription("number of nodes").create(NUM_NODES));
    options.addOption(
            OptionBuilder.withArgName("sources").hasArg().withDescription("source nodes").create(SOURCES));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();

    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT) || !cmdline.hasOption(OUTPUT) || !cmdline.hasOption(NUM_NODES)) {
        System.out.println("args: " + Arrays.toString(args));
        HelpFormatter formatter = new HelpFormatter();
        formatter.setWidth(120);
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String inputPath = cmdline.getOptionValue(INPUT);
    String outputPath = cmdline.getOptionValue(OUTPUT);
    int n = Integer.parseInt(cmdline.getOptionValue(NUM_NODES));
    String sourcesString = cmdline.getOptionValue(SOURCES);
    String[] sources = sourcesString.split(",");
    for (int i = 0; i < sources.length; i++) {
        sources[i] = sources[i].trim();
    }

    LOG.info("Tool name: " + BuildPersonalizedPageRankRecords.class.getSimpleName());
    LOG.info(" - inputDir: " + inputPath);
    LOG.info(" - outputDir: " + outputPath);
    LOG.info(" - numNodes: " + n);
    LOG.info(" - use sources: " + sourcesString);

    Configuration conf = getConf();
    conf.setInt(NODE_CNT_FIELD, n);
    conf.setInt("mapred.min.split.size", 1024 * 1024 * 1024);
    conf.setStrings(SOURCES, sources);

    Job job = Job.getInstance(conf);
    job.setJobName(BuildPersonalizedPageRankRecords.class.getSimpleName() + ":" + inputPath);
    job.setJarByClass(BuildPersonalizedPageRankRecords.class);

    job.setNumReduceTasks(0);

    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapOutputKeyClass(IntWritable.class);
    job.setMapOutputValueClass(PageRankNode.class);

    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(PageRankNode.class);

    job.setMapperClass(MyMapper.class);

    // Delete the output directory if it exists already.
    FileSystem.get(conf).delete(new Path(outputPath), true);

    job.waitForCompletion(true);

    return 0;
}

From source file:ca.uwaterloo.iss4e.hadoop.pointperrow.CosineMain.java

License:Open Source License

public int run(String[] args) throws IOException {
    Configuration conf = getConf();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 2) {
        System.err.println("Usage: ca.uwaterloo.iss4e.hadoop.pointperrow.ConsineMain <input> <output>");
        System.exit(2);/*  w ww. j  a  v  a2 s .  c  o  m*/
    }
    Job job1 = new Job(conf, "ConsineMain");
    job1.setJarByClass(CosineMain.class);

    job1.setMapperClass(AggregateReadingsMapper.class);
    job1.setMapOutputKeyClass(LongWritable.class);
    job1.setMapOutputValueClass(DoubleWritable.class);

    job1.setReducerClass(AggregateReadingsReducer.class);
    job1.setOutputKeyClass(LongWritable.class);
    job1.setOutputValueClass(Text.class);
    FileInputFormat.setInputDirRecursive(job1, true);
    FileInputFormat.setInputPaths(job1, new Path(otherArgs[0]));
    int lastIdx = otherArgs[0].lastIndexOf("/");
    String tempOutput = otherArgs[0].substring(0, lastIdx) + "/temp";
    FileOutputFormat.setOutputPath(job1, new Path(tempOutput));

    System.out.println("\nStarting Job-1 ...");
    final long startTime = System.currentTimeMillis();
    try {
        final long startTimeJob1 = System.currentTimeMillis();
        if (!job1.waitForCompletion(true)) {
            System.out.println("Job-1 failed.");
        } else {
            System.out.println("Duration of Job1 " + ((System.currentTimeMillis() - startTimeJob1) / 1000.0)
                    + " seconds.");
            final Job job2 = new Job(conf, "ConsineMain Aggregate");
            job2.setJarByClass(CosineMain.class);
            job2.setInputFormatClass(CartesianInputFormat.class);
            CartesianInputFormat.setLeftInputInfo(job2, TextInputFormat.class, tempOutput);
            CartesianInputFormat.setRightInputInfo(job2, TextInputFormat.class, tempOutput);
            FileOutputFormat.setOutputPath(job2, new Path(otherArgs[1]));

            job2.setMapperClass(CartesianProductMapper.class);
            job2.setMapOutputKeyClass(DoubleWritable.class);
            job2.setMapOutputValueClass(Text.class);

            job2.setSortComparatorClass(DescendingKeyComparator.class);

            job2.setReducerClass(CartesianProductReducer.class);
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(DoubleWritable.class);

            job2.setNumReduceTasks(10);
            final long startTimeJob2 = System.currentTimeMillis();
            System.out.println("\nStarting Job-2 ...");
            if (!job2.waitForCompletion(true)) {
                System.out.println("Job-2 failed.");
            } else {
                System.out.println("Duration of Job2: "
                        + ((System.currentTimeMillis() - startTimeJob2) / 1000.0) + " seconds.");
            }

        }
        FileSystem fs = FileSystem.get(conf);
        fs.delete(new Path(tempOutput), true);
    } catch (Exception e) {
        throw new RuntimeException(e);
    } finally {
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Total Duration: " + duration + " seconds.");
    }
    return 0;
}

From source file:cascading.ClusterTestCase.java

License:Open Source License

public FileSystem getFileSystem() throws IOException {
    if (fileSys != null)
        return fileSys;

    return FileSystem.get(jobConf);
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

public static FileSystem getDefaultFS(Configuration config) {
    try {/*from  w ww . j  a va  2s .c  om*/
        return FileSystem.get(config);
    } catch (IOException exception) {
        throw new FlowException("unable to get handle to underlying filesystem", exception);
    }
}

From source file:cascading.flow.hadoop.util.HadoopUtil.java

License:Open Source License

private static Path getWorkingDirectory(Configuration conf) {
    String name = conf.get("mapred.working.dir");
    if (name != null) {
        return new Path(name);
    } else {/*from w ww .  j  a  va  2 s  .c  o  m*/
        try {
            Path dir = FileSystem.get(conf).getWorkingDirectory();
            conf.set("mapred.working.dir", dir.toString());
            return dir;
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }
}

From source file:cascading.flow.tez.Hadoop2TezFlowStep.java

License:Open Source License

private static void setWorkingDirectory(Configuration conf) {
    String name = conf.get(JobContext.WORKING_DIR);

    if (name != null)
        return;// ww  w . ja va2s  .  co m

    try {
        Path dir = FileSystem.get(conf).getWorkingDirectory();
        conf.set(JobContext.WORKING_DIR, dir.toString());
    } catch (IOException exception) {
        throw new RuntimeException(exception);
    }
}

From source file:cascading.flow.tez.planner.Hadoop2TezFlowStepJob.java

License:Open Source License

private Path prepareEnsureStagingDir(TezConfiguration workingConf) throws IOException {
    String stepStagingPath = createStepStagingPath();

    workingConf.set(TezConfiguration.TEZ_AM_STAGING_DIR, stepStagingPath);

    Path stagingDir = new Path(stepStagingPath);
    FileSystem fileSystem = FileSystem.get(workingConf);

    stagingDir = fileSystem.makeQualified(stagingDir);

    TokenCache.obtainTokensForNamenodes(new Credentials(), new Path[] { stagingDir }, workingConf);

    TezClientUtils.ensureStagingDirExists(workingConf, stagingDir);

    if (fileSystem.getScheme().startsWith("file:/"))
        new File(stagingDir.toUri()).mkdirs();

    return stagingDir;
}

From source file:cascading.hive.HivePartitionDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = new Properties();
    AppProps.setApplicationName(properties, "cascading hive partitioning demo");

    JobConf jobConf = new JobConf();
    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log"));

    String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" };
    String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" };
    String[] partitionKeys = new String[] { "region" };

    HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb", "mytable", columnNames,
            columnTypes, partitionKeys, "\t");

    HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme());

    Tap partitionTap = new HivePartitionTap(hiveTap);

    Fields allFields = new Fields(columnNames);

    Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log");

    class Echo extends BaseOperation implements Function {
        public Echo(Fields fieldDeclaration) {
            super(2, fieldDeclaration);
        }//from   w w  w .jav  a2 s  .  c om

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
            TupleEntry argument = functionCall.getArguments();
            functionCall.getOutputCollector().add(argument.getTuple());
        }
    }

    Pipe pipe = new Each(" import ", allFields, new Echo(allFields), Fields.RESULTS);

    Flow flow = new HadoopFlowConnector().connect(input, partitionTap, pipe);

    flow.complete();

    Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");

    Connection con = DriverManager.getConnection("jdbc:hive://", "", "");
    Statement stmt = con.createStatement();

    ResultSet rs = stmt.executeQuery("select * from mydb.mytable where region = 'ASIA' ");

    String[] names = partitionedDescriptor.getColumnNames();
    System.out.println("----------------------Hive JDBC--------------------------");
    while (rs.next()) {
        StringBuffer buf = new StringBuffer("JDBC>>> ");
        for (int i = 0; i < names.length; i++) {
            String name = names[i];
            buf.append(name).append("=").append(rs.getObject(i + 1)).append(", ");
        }
        System.out.println(buf.toString());
    }
    System.out.println("---------------------------------------------------------");
    stmt.close();
    con.close();

    // do the same as the JDBC above, but in Cascading.

    class RegionFilter extends BaseOperation implements Filter {
        final String region;

        public RegionFilter(String region) {
            this.region = region;
        }

        @Override
        public boolean isRemove(FlowProcess flowProcess, FilterCall filterCall) {
            if (filterCall.getArguments().getString("region").equals(this.region))
                return false;
            return true;
        }
    }

    Tap requestsInAsiaSink = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/requests-from-asia",
            SinkMode.REPLACE);

    Pipe headPipe = new Each("requests from ASIA", allFields, new RegionFilter("ASIA"));

    Flow headFlow = new HadoopFlowConnector().connect(partitionTap, requestsInAsiaSink, headPipe);

    headFlow.complete();

    TupleEntryIterator tupleEntryIterator = requestsInAsiaSink.openForRead(headFlow.getFlowProcess());

    while (tupleEntryIterator.hasNext()) {
        TupleEntry tupleEntry = tupleEntryIterator.next();
        System.out.println("Cascading>>> " + tupleEntry);
    }
    tupleEntryIterator.close();
}

From source file:cascading.hive.HiveViewDemo.java

License:Open Source License

public static void main(String[] args) throws Exception {
    Properties properties = new Properties();
    AppProps.setApplicationName(properties, "cascading hive partitioning demo");

    JobConf jobConf = new JobConf();

    FileSystem fs = FileSystem.get(jobConf);
    fs.copyFromLocalFile(false, true, new Path(accessLog), new Path("/tmp/access.log"));

    String[] columnNames = new String[] { "ts", "customer", "bucket", "operation", "key", "region" };
    String[] columnTypes = new String[] { "timestamp", "string", "string", "string", "string", "string" };
    String[] partitionKeys = new String[] { "region" };

    HiveTableDescriptor partitionedDescriptor = new HiveTableDescriptor("mydb2", "mytable2", columnNames,
            columnTypes, partitionKeys, "\t");

    HiveTap hiveTap = new HiveTap(partitionedDescriptor, partitionedDescriptor.toScheme());

    Tap outputTap = new HivePartitionTap(hiveTap);
    class Echo extends BaseOperation implements Function {
        public Echo(Fields fieldDeclaration) {
            super(2, fieldDeclaration);
        }/*from w ww  . j a v a  2  s  .c  o m*/

        @Override
        public void operate(FlowProcess flowProcess, FunctionCall functionCall) {
            TupleEntry argument = functionCall.getArguments();
            functionCall.getOutputCollector().add(argument.getTuple());
        }
    }

    Fields allFields = new Fields(columnNames);

    Pipe pipe = new Each(" echo ", allFields, new Echo(allFields), Fields.RESULTS);

    Tap input = new Hfs(new TextDelimited(allFields), "hdfs:/tmp/access.log");

    Flow flow = new HadoopFlowConnector().connect(input, outputTap, pipe);

    flow.complete();

    String viewSelect = "select distinct customer from mydb2.mytable2 where region = 'ASIA'";
    String viewDef = "create or replace view customers_in_asia as " + viewSelect;
    HiveViewAnalyzer analyzer = new HiveViewAnalyzer();
    Collection<Tap> inputs = analyzer.asTaps(viewSelect);

    HiveFlow viewflow = new HiveFlow("create view", viewDef, inputs);
    viewflow.complete();

    Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver");

    Connection con = DriverManager.getConnection("jdbc:hive://", "", "");
    Statement stmt = con.createStatement();

    ResultSet rs = stmt.executeQuery("select * from customers_in_asia ");

    System.out.println("----------------------Hive JDBC--------------------------");
    while (rs.next())
        System.out.println("customer=" + rs.getString(1));

    System.out.println("---------------------------------------------------------");
    stmt.close();
    con.close();

}

From source file:cascading.platform.hadoop.HadoopPlatform.java

License:Open Source License

@Override
public synchronized void setUp() throws IOException {
    if (configuration != null)
        return;/*  www . ja  va2s  .c o  m*/

    if (!isUseCluster()) {
        LOG.info("not using cluster");
        configuration = new JobConf();

        // enforce the local file system in local mode
        configuration.set("fs.default.name", "file:///");
        configuration.set("mapred.job.tracker", "local");
        configuration.set("mapreduce.jobtracker.staging.root.dir",
                System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        String stagingDir = configuration.get("mapreduce.jobtracker.staging.root.dir");

        if (Util.isEmpty(stagingDir))
            configuration.set("mapreduce.jobtracker.staging.root.dir",
                    System.getProperty("user.dir") + "/build/tmp/cascading/staging");

        fileSys = FileSystem.get(configuration);
    } else {
        LOG.info("using cluster");

        if (Util.isEmpty(System.getProperty("hadoop.log.dir")))
            System.setProperty("hadoop.log.dir", "cascading-hadoop/build/test/log");

        if (Util.isEmpty(System.getProperty("hadoop.tmp.dir")))
            System.setProperty("hadoop.tmp.dir", "cascading-hadoop/build/test/tmp");

        new File(System.getProperty("hadoop.log.dir")).mkdirs(); // ignored

        JobConf conf = new JobConf();

        if (!Util.isEmpty(System.getProperty("mapred.jar"))) {
            LOG.info("using a remote cluster with jar: {}", System.getProperty("mapred.jar"));
            configuration = conf;

            ((JobConf) configuration).setJar(System.getProperty("mapred.jar"));

            if (!Util.isEmpty(System.getProperty("fs.default.name"))) {
                LOG.info("using {}={}", "fs.default.name", System.getProperty("fs.default.name"));
                configuration.set("fs.default.name", System.getProperty("fs.default.name"));
            }

            if (!Util.isEmpty(System.getProperty("mapred.job.tracker"))) {
                LOG.info("using {}={}", "mapred.job.tracker", System.getProperty("mapred.job.tracker"));
                configuration.set("mapred.job.tracker", System.getProperty("mapred.job.tracker"));
            }

            configuration.set("mapreduce.user.classpath.first", "true"); // use test dependencies
            fileSys = FileSystem.get(configuration);
        } else {
            dfs = new MiniDFSCluster(conf, 4, true, null);
            fileSys = dfs.getFileSystem();
            mr = new MiniMRCluster(4, fileSys.getUri().toString(), 1, null, null, conf);

            configuration = mr.createJobConf();
        }

        //      jobConf.set( "mapred.map.max.attempts", "1" );
        //      jobConf.set( "mapred.reduce.max.attempts", "1" );
        configuration.set("mapred.child.java.opts", "-Xmx512m");
        configuration.setInt("mapred.job.reuse.jvm.num.tasks", -1);
        configuration.setInt("jobclient.completion.poll.interval", 50);
        configuration.setInt("jobclient.progress.monitor.poll.interval", 50);
        ((JobConf) configuration).setMapSpeculativeExecution(false);
        ((JobConf) configuration).setReduceSpeculativeExecution(false);
    }

    ((JobConf) configuration).setNumMapTasks(numMappers);
    ((JobConf) configuration).setNumReduceTasks(numReducers);

    Map<Object, Object> globalProperties = getGlobalProperties();

    if (logger != null)
        globalProperties.put("log4j.logger", logger);

    FlowProps.setJobPollingInterval(globalProperties, 10); // should speed up tests

    HadoopPlanner.copyProperties((JobConf) configuration, globalProperties); // copy any external properties

    HadoopPlanner.copyJobConf(properties, (JobConf) configuration); // put all properties on the jobconf
}