Example usage for org.apache.hadoop.fs FileSystem get

List of usage examples for org.apache.hadoop.fs FileSystem get

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem get.

Prototype

public static FileSystem get(Configuration conf) throws IOException 

Source Link

Document

Returns the configured FileSystem implementation.

Usage

From source file:com.ailk.oci.ocnosql.tools.load.csvbulkload.CsvBulkLoadTool.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    HBaseConfiguration.addHbaseResources(getConf());
    Configuration conf = getConf();
    String quorum = conf.get("hbase.zookeeper.quorum");
    String clientPort = conf.get("hbase.zookeeper.property.clientPort");
    LOG.info("hbase.zookeeper.quorum=" + quorum);
    LOG.info("hbase.zookeeper.property.clientPort=" + clientPort);
    LOG.info("phoenix.query.dateFormat=" + conf.get("phoenix.query.dateFormat"));

    CommandLine cmdLine = null;//from   w  w w .j ava2  s .co m
    try {
        cmdLine = parseOptions(args);
        LOG.info("JdbcUrl=" + getJdbcUrl(quorum + ":" + clientPort));
    } catch (IllegalStateException e) {
        printHelpAndExit(e.getMessage(), getOptions());
    }
    Class.forName(DriverManager.class.getName());
    Connection conn = DriverManager.getConnection(getJdbcUrl(quorum + ":" + clientPort));
    String tableName = cmdLine.getOptionValue(TABLE_NAME_OPT.getOpt());
    String schemaName = cmdLine.getOptionValue(SCHEMA_NAME_OPT.getOpt());
    String qualifiedTableName = getQualifiedTableName(schemaName, tableName);
    List<ColumnInfo> importColumns = buildImportColumns(conn, cmdLine, qualifiedTableName);

    LOG.info("tableName=" + tableName);
    LOG.info("schemaName=" + schemaName);
    LOG.info("qualifiedTableName=" + qualifiedTableName);

    configureOptions(cmdLine, importColumns, getConf());

    try {
        validateTable(conn, schemaName, tableName);
    } finally {
        conn.close();
    }

    Path inputPath = new Path(cmdLine.getOptionValue(INPUT_PATH_OPT.getOpt()));
    Path outputPath = null;
    if (cmdLine.hasOption(OUTPUT_PATH_OPT.getOpt())) {
        outputPath = new Path(cmdLine.getOptionValue(OUTPUT_PATH_OPT.getOpt()));
    } else {
        outputPath = new Path("/tmp/" + UUID.randomUUID());
    }
    LOG.info("Configuring HFile output path to {}", outputPath);

    Job job = new Job(getConf(),
            "Phoenix MapReduce import for " + getConf().get(PhoenixCsvToKeyValueMapper.TABLE_NAME_CONFKEY));

    // Allow overriding the job jar setting by using a -D system property at startup
    if (job.getJar() == null) {
        job.setJarByClass(PhoenixCsvToKeyValueMapper.class);
    }
    job.setInputFormatClass(TextInputFormat.class);
    FileInputFormat.addInputPath(job, inputPath);

    FileSystem.get(getConf());
    FileOutputFormat.setOutputPath(job, outputPath);

    job.setMapperClass(PhoenixCsvToKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(KeyValue.class);

    HTable htable = new HTable(getConf(), qualifiedTableName);

    // Auto configure partitioner and reducer according to the Main Data table
    HFileOutputFormat.configureIncrementalLoad(job, htable);

    LOG.info("Running MapReduce import job from {} to {}", inputPath, outputPath);
    boolean success = job.waitForCompletion(true);
    if (!success) {
        LOG.error("Import job failed, check JobTracker for details");
        return 1;
    }

    LOG.info("Loading HFiles from {}", outputPath);
    LoadIncrementalHFiles loader = new LoadIncrementalHFiles(getConf());
    loader.doBulkLoad(outputPath, htable);
    htable.close();

    LOG.info("Incremental load complete");

    LOG.info("Removing output directory {}", outputPath);
    if (!FileSystem.get(getConf()).delete(outputPath, true)) {
        LOG.error("Removing output directory {} failed", outputPath);
    }

    return 0;
}

From source file:com.alexholmes.hadooputils.combine.avro.AvroFileGenerator.java

License:Apache License

public int run(final String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println(/* w  ww.  ja va  2s  .c  om*/
                String.format("Usage: %s: <file path> <number of records>", AvroFileGenerator.class.getName()));
        return 1;
    }

    Path file = new Path(args[0]);
    int numRecords = Integer.valueOf(args[1]);

    FileSystem fs = FileSystem.get(super.getConf());

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {
        for (int i = 0; i < numRecords; i++) {
            writer.append(new Text("k" + i), new Text("v" + i));
        }
    } finally {
        writer.close();
    }

    return 0;
}

From source file:com.alexholmes.hadooputils.combine.avro.mapred.CombineAvroInputFormatTest.java

License:Apache License

@SuppressWarnings("deprecation")
public void testProjection() throws Exception {
    JobConf job = new JobConf();

    Integer defaultRank = new Integer(-1);

    String jsonSchema = "{\"type\":\"record\"," + "\"name\":\"org.apache.avro.mapred.Pair\"," + "\"fields\": [ "
            + "{\"name\":\"rank\", \"type\":\"int\", \"default\": -1},"
            + "{\"name\":\"value\", \"type\":\"long\"}" + "]}";

    Schema readerSchema = Schema.parse(jsonSchema);

    AvroJob.setInputSchema(job, readerSchema);

    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path inputPath = new Path(dir + "/out" + "/part-00000" + AvroOutputFormat.EXT);
    FileStatus fileStatus = FileSystem.get(job).getFileStatus(inputPath);
    FileSplit fileSplit = new FileSplit(inputPath, 0, fileStatus.getLen(), job);

    AvroRecordReader<Pair<Integer, Long>> recordReader = new AvroRecordReader<Pair<Integer, Long>>(job,
            fileSplit);/* w  ww  .  ja v a 2s  .co m*/

    AvroWrapper<Pair<Integer, Long>> inputPair = new AvroWrapper<Pair<Integer, Long>>(null);
    NullWritable ignore = NullWritable.get();

    long sumOfCounts = 0;
    long numOfCounts = 0;
    while (recordReader.next(inputPair, ignore)) {
        Assert.assertEquals((Integer) inputPair.datum().get(0), defaultRank);
        sumOfCounts += (Long) inputPair.datum().get(1);
        numOfCounts++;
    }

    Assert.assertEquals(numOfCounts, WordCountUtil.COUNTS.size());

    long actualSumOfCounts = 0;
    for (Long count : WordCountUtil.COUNTS.values()) {
        actualSumOfCounts += count;
    }

    Assert.assertEquals(sumOfCounts, actualSumOfCounts);
}

From source file:com.alexholmes.hadooputils.combine.seqfile.mapred.CombineSequenceFileTest.java

License:Apache License

public void writeSequenceFile(Path path) throws IOException {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, path, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {/*from  w  w  w .j a  v  a  2s .  co m*/
        writer.append(key, value);
    } finally {
        writer.close();
    }
}

From source file:com.alexholmes.hadooputils.combine.seqfile.SequenceFileGenerator.java

License:Apache License

public int run(final String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println(String.format("Usage: %s: <file path> <number of records>",
                SequenceFileGenerator.class.getName()));
        return 1;
    }//from  w  w w .java  2  s  . c o m

    Path file = new Path(args[0]);
    int numRecords = Integer.valueOf(args[1]);

    FileSystem fs = FileSystem.get(super.getConf());

    SequenceFile.Writer writer = SequenceFile.createWriter(fs, super.getConf(), file, Text.class, Text.class,
            SequenceFile.CompressionType.BLOCK, new DefaultCodec());
    try {
        for (int i = 0; i < numRecords; i++) {
            writer.append(new Text("k" + i), new Text("v" + i));
        }
    } finally {
        writer.close();
    }

    return 0;
}

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

/**
 * The driver for the sort MapReduce job.
 *
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *///from   w ww.j  av a 2s  .  c  om
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {

    jobConf.setJarByClass(Sort.class);
    jobConf.setJobName("sorter");

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
        jobConf.setNumMapTasks(numMapTasks);
    }
    if (numReduceTasks != null) {
        jobConf.setNumReduceTasks(numReduceTasks);
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }

        // Set user-supplied (possibly default) job configs
        jobConf.setNumReduceTasks(numReduces);
    }

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(SortReduce.class);

    jobConf.setInputFormat(SortInputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    if (mapCodecClass != null) {
        jobConf.setMapOutputCompressorClass(mapCodecClass);
    }

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);
    }

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        }
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        }
        return true;
    }
    return false;
}

From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java

License:Apache License

/**
 * Constructor which instantiates input/output paths.
 *
 * @param config     the Hadoop configuration
 * @param inputPath  the input directory where input files will be created
 * @param outputPath the output directory that the MapReduce job will write to
 * @throws IOException if something goes wrong
 *//*from  w  w  w.j  a  va  2 s . c  o m*/
public TextIOJobBuilder(final Configuration config, final Path inputPath, final Path outputPath)
        throws IOException {
    this(FileSystem.get(config), inputPath, outputPath);
}

From source file:com.alexholmes.hadooputils.test.TextIOJobBuilder.java

License:Apache License

/**
 * Constructor which instantiates input/output paths.
 *
 * @param config the Hadoop configuration
 * @throws IOException if something goes wrong
 *//*from  w ww  .  j a  va2  s  .c  o  m*/
public TextIOJobBuilder(final Configuration config) throws IOException {
    this(FileSystem.get(config), new Path("/input"), new Path("/output"));
}

From source file:com.alexholmes.json.mapreduce.ExampleJob.java

License:Apache License

/**
 * Writes the contents of {@link #JSON} into a file in the job input directory in HDFS.
 *
 * @param conf     the Hadoop config/*  ww w . j  a v  a 2  s  . c om*/
 * @param inputDir the HDFS input directory where we'll write a file
 * @throws IOException if something goes wrong
 */
public static void writeInput(Configuration conf, Path inputDir) throws IOException {
    FileSystem fs = FileSystem.get(conf);

    if (fs.exists(inputDir)) {
        throw new IOException(
                String.format("Input directory '%s' exists - please remove and rerun this example", inputDir));
    }

    OutputStreamWriter writer = new OutputStreamWriter(fs.create(new Path(inputDir, "input.txt")));
    writer.write(JSON);
    IOUtils.closeStream(writer);
}

From source file:com.alibaba.jstorm.hdfs.HdfsCache.java

License:Apache License

public HdfsCache(Map conf) {
    this.hadoopConf = new Configuration();
    String hdfsHostName = (String) conf.get(Config.BLOBSTORE_HDFS_HOSTNAME);
    Integer hdfsPort = JStormUtils.parseInt(conf.get(Config.BLOBSTORE_HDFS_PORT));
    LOG.info("hdfs address hdfs://{}:{}", hdfsHostName, hdfsPort);
    hadoopConf.set("fs.defaultFS", String.format("hdfs://%s:%d", hdfsHostName, hdfsPort));
    hadoopConf.setBoolean("fs.hdfs.impl.disable.cache", true);

    try {/*from w w  w.  j ava2  s  .  c  o  m*/
        fs = FileSystem.get(hadoopConf);
        String configBaseDir = (String) conf.get("hdfs.base.dir");
        if (configBaseDir != null) {
            this.baseDir = configBaseDir;
        } else {
            String clusterName = ConfigExtension.getClusterName(conf) != null
                    ? ConfigExtension.getClusterName(conf)
                    : "default";
            baseDir = baseDir + "/" + clusterName;
            if (!exist(baseDir))
                mkdir(baseDir);
        }
    } catch (IOException e) {
        LOG.error("Failed to instance hdfs cache", e);
        throw new RuntimeException(e.getMessage());
    }
}