Example usage for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:ZipFileRecordReader.java

License:Apache License

/**
 * Initialise and open the ZIP file from the FileSystem
 */// ww  w.  j  av a  2 s . co  m
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext)
        throws IOException, InterruptedException {
    FileSplit split = (FileSplit) inputSplit;
    Configuration conf = taskAttemptContext.getConfiguration();
    Path path = split.getPath();
    FileSystem fs = path.getFileSystem(conf);

    // Open the stream
    fsin = fs.open(path);
    zip = new ZipInputStream(fsin);
}

From source file:FormatStorage2ColumnStorageMR.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.out.println("FormatStorage2ColumnStorageMR <input> <output>");
        System.exit(-1);/*ww  w.ja  v  a2 s. co  m*/
    }

    JobConf conf = new JobConf(FormatStorageMR.class);

    conf.setJobName("FormatStorage2ColumnStorageMR");

    conf.setNumMapTasks(1);
    conf.setNumReduceTasks(4);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(Unit.Record.class);

    conf.setMapperClass(FormatStorageMapper.class);
    conf.setReducerClass(ColumnStorageReducer.class);

    conf.setInputFormat(FormatStorageInputFormat.class);
    conf.set("mapred.output.compress", "flase");

    Head head = new Head();
    initHead(head);

    head.toJobConf(conf);

    FileInputFormat.setInputPaths(conf, args[0]);
    Path outputPath = new Path(args[1]);
    FileOutputFormat.setOutputPath(conf, outputPath);

    FileSystem fs = outputPath.getFileSystem(conf);
    fs.delete(outputPath, true);

    JobClient jc = new JobClient(conf);
    RunningJob rj = null;
    rj = jc.submitJob(conf);

    String lastReport = "";
    SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
    long reportTime = System.currentTimeMillis();
    long maxReportInterval = 3 * 1000;
    while (!rj.isComplete()) {
        try {
            Thread.sleep(1000);
        } catch (InterruptedException e) {
        }

        int mapProgress = Math.round(rj.mapProgress() * 100);
        int reduceProgress = Math.round(rj.reduceProgress() * 100);

        String report = " map = " + mapProgress + "%,  reduce = " + reduceProgress + "%";

        if (!report.equals(lastReport) || System.currentTimeMillis() >= reportTime + maxReportInterval) {

            String output = dateFormat.format(Calendar.getInstance().getTime()) + report;
            System.out.println(output);
            lastReport = report;
            reportTime = System.currentTimeMillis();
        }
    }

    System.exit(0);

}

From source file:acromusashi.stream.bolt.hdfs.HdfsStoreBolt.java

License:Open Source License

/**
 * {@inheritDoc}/* w  ww  .  j av  a2  s  .  com*/
 */
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context, OutputCollector collector) {
    super.prepare(stormConf, context, collector);

    String componentId = context.getThisComponentId();
    int taskId = context.getThisTaskId();

    HdfsStoreConfig config = new HdfsStoreConfig();

    config.setOutputUri((String) stormConf.get("hdfsstorebolt.outputuri"));
    config.setFileNameHeader((String) stormConf.get("hdfsstorebolt.filenameheader"));
    config.setFileSwitchIntarval(((Long) stormConf.get("hdfsstorebolt.interval")).intValue());
    config.setFileNameBody("_" + componentId + "_" + taskId + "_");

    boolean isPreprocess = true;
    Object isPreprocessObj = stormConf.get("hdfsstorebolt.executepreprocess");
    if (isPreprocessObj != null && isPreprocessObj instanceof Boolean) {
        isPreprocess = ((Boolean) isPreprocessObj).booleanValue();
    }

    try {
        // HDFS?
        Configuration conf = new Configuration();
        Path dstPath = new Path(config.getOutputUri());
        FileSystem fileSystem = dstPath.getFileSystem(conf);

        // HDFS???????
        if (isPreprocess) {
            HdfsPreProcessor.execute(fileSystem, config.getOutputUri(),
                    config.getFileNameHeader() + config.getFileNameBody(), config.getTmpFileSuffix());
        }

        this.delegate = new HdfsOutputSwitcher();
        this.delegate.initialize(fileSystem, config, System.currentTimeMillis());
    } catch (Exception ex) {
        logger.warn("Failed to HDFS write initialize.", ex);
        throw new InitFailException(ex);
    }
}

From source file:alluxio.hadoop.mapreduce.KeyValueOutputCommitter.java

License:Apache License

/**
 * @param taskContext MapReduce task configuration
 * @return true if the task output directory exists, otherwise false
 * @throws IOException if fails to determine whether the output directory exists
 *///from ww  w.  ja  v a 2  s.c om
@Override
public boolean needsTaskCommit(TaskAttemptContext taskContext) throws IOException {
    Path taskOutputPath = new Path(KeyValueOutputFormat.getTaskOutputURI(taskContext).toString());
    FileSystem fs = taskOutputPath.getFileSystem(taskContext.getConfiguration());
    return fs.exists(taskOutputPath);
}

From source file:alluxio.underfs.hdfs.HdfsUnderFileSystem.java

License:Apache License

/**
 * Constructs a new HDFS {@link UnderFileSystem}.
 *
 * @param uri the {@link AlluxioURI} for this UFS
 * @param conf the configuration for Hadoop
 *///from  w  ww  .  j  a v  a2  s .  c  om
public HdfsUnderFileSystem(AlluxioURI uri, Object conf) {
    super(uri);
    final String ufsPrefix = uri.toString();
    final org.apache.hadoop.conf.Configuration hadoopConf;
    if (conf != null && conf instanceof org.apache.hadoop.conf.Configuration) {
        hadoopConf = (org.apache.hadoop.conf.Configuration) conf;
    } else {
        hadoopConf = new org.apache.hadoop.conf.Configuration();
    }
    prepareConfiguration(ufsPrefix, hadoopConf);
    hadoopConf.addResource(new Path(hadoopConf.get(PropertyKey.UNDERFS_HDFS_CONFIGURATION.toString())));
    HdfsUnderFileSystemUtils.addS3Credentials(hadoopConf);

    Path path = new Path(ufsPrefix);
    try {
        mFileSystem = path.getFileSystem(hadoopConf);
    } catch (IOException e) {
        LOG.error("Exception thrown when trying to get FileSystem for {}", ufsPrefix, e);
        throw Throwables.propagate(e);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

/**
 * Returns the column-wise mean of a DistributedRowMatrix
 * /*  ww  w  . j a  v a2 s  . c o m*/
 * @param vectorClass desired class for the column-wise mean vector e.g.
 *          RandomAccessSparseVector, DenseVector
 * @return Vector containing the column-wise mean of this
 */
public Vector columnMeans(String vectorClass) throws IOException {
    Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    String vectorClassFull = "org.apache.mahout.math." + vectorClass;
    Vector mean = MatrixColumnMeansJob.run(initialConf, rowPath, outputVectorTmpPath, vectorClassFull);
    if (!keepTempFiles) {
        FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
        fs.delete(outputVectorTmpPath, true);
    }
    return mean;
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Vector times(Vector v) {
    try {//from  www.  j a  va  2  s  .  c  o m
        Configuration initialConf = getConf() == null ? new Configuration() : getConf();
        Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
        Configuration conf = TimesSquaredJob.createTimesJobConf(initialConf, v, numRows, rowPath,
                outputVectorTmpPath);
        JobClient.runJob(new JobConf(conf));
        Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
        if (!keepTempFiles) {
            FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
            fs.delete(outputVectorTmpPath, true);
        }
        return result;
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Vector timesSquared(Vector v) {
    try {//w  w w. j av  a 2 s .co  m
        Configuration initialConf = getConf() == null ? new Configuration() : getConf();
        Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
        Configuration conf = TimesSquaredJob.createTimesSquaredJobConf(initialConf, v, rowPath,
                outputVectorTmpPath);
        JobClient.runJob(new JobConf(conf));
        Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
        if (!keepTempFiles) {
            FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
            fs.delete(outputVectorTmpPath, true);
        }
        return result;
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

private static void prepareInput(Configuration conf, Path inputPath, Path exampleFile, int n)
        throws IOException {
    FileSystem fs = inputPath.getFileSystem(conf);

    // Create input file writers depending on bspTaskNum
    int bspTaskNum = conf.getInt("bsp.peers.num", 1);
    SequenceFile.Writer[] inputWriters = new SequenceFile.Writer[bspTaskNum];
    for (int i = 0; i < bspTaskNum; i++) {
        Path inputFile = new Path(inputPath, "input" + i + ".seq");
        LOG.info("inputFile: " + inputFile.toString());
        inputWriters[i] = SequenceFile.createWriter(fs, conf, inputFile, IntWritable.class, NullWritable.class,
                CompressionType.NONE);//from w  ww .  j  av a2 s.  c  o  m
    }

    // Create example file writer
    SequenceFile.Writer exampleWriter = SequenceFile.createWriter(fs, conf, exampleFile, IntWritable.class,
            NullWritable.class, CompressionType.NONE);

    // Write random values to input files and example
    IntWritable inputKey = new IntWritable();
    NullWritable nullValue = NullWritable.get();
    Random r = new Random();
    for (long i = 0; i < n; i++) {
        inputKey.set(r.nextInt(n));
        for (int j = 0; j < inputWriters.length; j++) {
            inputWriters[j].append(inputKey, nullValue);
        }
        inputKey.set(r.nextInt(n));
        exampleWriter.append(inputKey, nullValue);
    }

    // Close file writers
    for (int j = 0; j < inputWriters.length; j++) {
        inputWriters[j].close();
    }
    exampleWriter.close();
}

From source file:at.illecker.hama.hybrid.examples.hellohybrid.HelloHybridBSP.java

License:Apache License

static void printOutput(BSPJob job, Path path) throws IOException {
    FileSystem fs = path.getFileSystem(job.getConfiguration());
    FileStatus[] files = fs.listStatus(path);
    for (int i = 0; i < files.length; i++) {
        if (files[i].getLen() > 0) {
            System.out.println("File " + files[i].getPath());
            SequenceFile.Reader reader = null;
            try {
                reader = new SequenceFile.Reader(fs, files[i].getPath(), job.getConfiguration());

                IntWritable key = new IntWritable();
                NullWritable value = NullWritable.get();
                while (reader.next(key, value)) {
                    System.out.println("key: '" + key.get() + "' value: '" + value + "'\n");
                }/* w  w  w  .  ja v a 2s  .  com*/
            } catch (IOException e) {
                FSDataInputStream in = fs.open(files[i].getPath());
                IOUtils.copyBytes(in, System.out, job.getConfiguration(), false);
                in.close();
            } finally {
                if (reader != null) {
                    reader.close();
                }
            }
        }
    }
    // fs.delete(FileOutputFormat.getOutputPath(job), true);
}