Example usage for org.apache.hadoop.fs Path getFileSystem

List of usage examples for org.apache.hadoop.fs Path getFileSystem

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getFileSystem.

Prototype

public FileSystem getFileSystem(Configuration conf) throws IOException 

Source Link

Document

Return the FileSystem that owns this Path.

Usage

From source file:azkaban.jobtype.javautils.HadoopUtils.java

License:Apache License

public static JobConf addAllSubPaths(JobConf conf, Path path) throws IOException {
    if (shouldPathBeIgnored(path)) {
        throw new IllegalArgumentException(String.format("Path[%s] should be ignored.", path));
    }/*from w  w  w. ja  va2s .  c  o  m*/

    final FileSystem fs = path.getFileSystem(conf);

    if (fs.exists(path)) {
        for (FileStatus status : fs.listStatus(path)) {
            if (!shouldPathBeIgnored(status.getPath())) {
                if (status.isDir()) {
                    addAllSubPaths(conf, status.getPath());
                } else {
                    FileInputFormat.addInputPath(conf, status.getPath());
                }
            }
        }
    }
    return conf;
}

From source file:azkaban.jobtype.javautils.HadoopUtils.java

License:Apache License

public static void saveProps(Props props, String file) throws IOException {
    Path path = new Path(file);

    FileSystem fs = null;/*from   ww  w. jav a 2  s .  c  o m*/
    fs = path.getFileSystem(new Configuration());

    saveProps(fs, props, file);
}

From source file:azure.TweetUpload.java

License:Apache License

public static void main(String[] args) {
            try {
 
                    String filePath = "hdfs://localhost.localdomain:8020/tmp/hive-mapred/"
                                    + args[0] + "/000000_0"; // File location
 
                    Configuration configuration = new Configuration();
 
                    Path path = new Path(filePath);
                    Path newFilePath = new Path("temp_" + args[0]);
                    FileSystem fs = path.getFileSystem(configuration);
                       /*from   w ww . ja  v a 2  s.co  m*/
                    fs.copyToLocalFile(path, newFilePath);
                    // Copy temporary to local directory
 
                    CloudStorageAccount account = CloudStorageAccount
                                    .parse(storageConnectionString);
                    CloudBlobClient serviceClient = account.createCloudBlobClient();
 
                    CloudBlobContainer container = serviceClient
                                    .getContainerReference("container_name_here"); // Container name (must be lower case)
                    container.createIfNotExists();
 
                    // Upload file
                    CloudBlockBlob blob = container
                                    .getBlockBlobReference("user/rdp_username_here/analysisFiles/"
                                                    + args[0] + ".tsv");
                    File sourceFile = new File(newFilePath.toString());
                    blob.upload(new FileInputStream(sourceFile), sourceFile.length());
 
                    File tmpFile = new File(newFilePath.toString());
                    tmpFile.delete(); // Delete the temporary file
                       
                       
                    // In case of errors
            } catch (Exception e) {
                    System.exit(-1);
            }
    }

From source file:backup.store.ExternalExtendedBlockSort.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Path dir = new Path("file:///home/apm/Development/git-projects/hdfs-backup/hdfs-backup-core/tmp");
    dir.getFileSystem(conf).delete(dir, true);
    long start = System.nanoTime();
    try (ExternalExtendedBlockSort<LongWritable> sort = new ExternalExtendedBlockSort<>(conf, dir,
            LongWritable.class)) {
        Random random = new Random();
        for (int bp = 0; bp < 1; bp++) {
            String bpid = UUID.randomUUID().toString();
            for (int i = 0; i < 10000000; i++) {
                // for (int i = 0; i < 10; i++) {
                long genstamp = random.nextInt(20000);
                long blockId = random.nextLong();
                ExtendedBlock extendedBlock = new ExtendedBlock(bpid, blockId,
                        random.nextInt(Integer.MAX_VALUE), genstamp);
                sort.add(extendedBlock, new LongWritable(blockId));
            }/*from w w w  . j  a v  a 2s  . co  m*/
        }
        System.out.println("finished");
        sort.finished();
        System.out.println("interate");
        for (String blockPoolId : sort.getBlockPoolIds()) {
            ExtendedBlockEnum<LongWritable> blockEnum = sort.getBlockEnum(blockPoolId);
            ExtendedBlock block;
            long l = 0;
            while ((block = blockEnum.next()) != null) {
                // System.out.println(block);
                long blockId = block.getBlockId();
                l += blockId;
                LongWritable currentValue = blockEnum.currentValue();
                if (currentValue.get() != blockId) {
                    System.err.println("Error " + blockId);
                }
            }
            System.out.println(l);
        }
    }
    long end = System.nanoTime();
    System.out.println("Time [" + (end - start) / 1000000.0 + " ms]");
}

From source file:backup.store.ExternalExtendedBlockSort.java

License:Apache License

public ExtendedBlockEnum<T> getBlockEnum(String blockPoolId) throws Exception {
    Path output = getOutputFilePath(blockPoolId);
    FileSystem fileSystem = output.getFileSystem(conf);
    if (!fileSystem.exists(output)) {
        return null;
    }/*from   www.ja  va 2 s.c om*/
    return new BlockEnum(blockPoolId, new Reader(conf, Reader.file(output)));
}

From source file:backup.store.ExternalExtendedBlockSort.java

License:Apache License

private synchronized void sortIfNeeded() throws IOException {
    for (String blockPoolId : writers.keySet()) {
        Path output = getOutputFilePath(blockPoolId);
        Path input = getInputFilePath(blockPoolId);
        FileSystem fileSystem = output.getFileSystem(conf);
        if (!fileSystem.exists(output) && fileSystem.exists(input)) {
            LocalFileSystem local = FileSystem.getLocal(conf);
            SequenceFile.Sorter sorter = new Sorter(local, ComparableBlock.class, dataClass, conf);
            sorter.sort(input, output);/*from  w ww .j a va  2 s .  co  m*/
        }
    }
}

From source file:bdss.cmu.edu.Sort.java

License:Apache License

/**
 * The main driver for sort program.//www .j av  a  2  s. c om
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker.
 */
public int run(String[] args) throws Exception {

    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class;
    Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class;
    Class<? extends WritableComparable> outputKeyClass = BytesWritable.class;
    Class<? extends Writable> outputValueClass = BytesWritable.class;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class);
            } else if ("-outValue".equals(args[i])) {
                outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage(); // exits
        }
    }
    // Set user-supplied (possibly default) job configs
    job = new Job(conf);
    job.setJobName("sorter");
    job.setJarByClass(Sort.class);

    job.setMapperClass(Mapper.class);
    job.setReducerClass(Reducer.class);

    job.setNumReduceTasks(num_reduces);

    job.setInputFormatClass(inputFormatClass);
    job.setOutputFormatClass(outputFormatClass);

    job.setOutputKeyClass(outputKeyClass);
    job.setOutputValueClass(outputValueClass);

    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(job.getConfiguration(), partitionFile);
        InputSampler.<K, V>writePartitionFile(job, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, conf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with "
            + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseReducer.java

License:Apache License

private void getBaseDirs(Context context) {
    try {// w  w w  .  j av  a 2 s . c om
        String dir = getJobAbsoluteOutputDir(context);
        baseDir = dir.isEmpty() ? "tmp" : dir;

        Path path = new Path(context.getConfiguration().get("mapred.output.dir"));
        FileSystem fs = path.getFileSystem(context.getConfiguration());

        if (fs.getFileStatus(path) != null) {
            aprioriPhase = fs.getFileStatus(path).getPath().getName().split("-")[0].substring(2);
        }
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:be.uantwerpen.adrem.bigfim.AprioriPhaseReducer.java

License:Apache License

private int getLargestIndex(Configuration conf, Path path, String prefix, int index) {
    int largestIx = -1;
    try {//from   w  ww.  j a  v  a2 s  .  c  om
        FileSystem fs = path.getFileSystem(conf);
        for (FileStatus file : fs.listStatus(path, new NameStartsWithFilter(prefix))) {
            largestIx = max(largestIx, parseInt(file.getPath().getName().split("-")[index]));
        }
    } catch (NumberFormatException e) {
    } catch (IOException e) {
    }
    return largestIx;
}

From source file:be.uantwerpen.adrem.bigfim.BigFIMDriver.java

License:Apache License

private boolean canStartPrefixGeneration(FIMOptions opt, int phase) throws IOException {
    Path path = new Path(opt.outputDir + separator + "tg" + phase);
    return phase >= opt.prefixLength && path.getFileSystem(new Configuration()).exists(path);
}