Example usage for org.apache.hadoop.fs Path getName

List of usage examples for org.apache.hadoop.fs Path getName

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path getName.

Prototype

public String getName() 

Source Link

Document

Returns the final component of this path.

Usage

From source file:GetRetweetersAndCountPerUser.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length != 3) {
        System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>");
        System.exit(2);/* w  w  w.  j  a  v a  2 s.  co  m*/
    }
    Job job = new Job(conf, "word count");
    job.setJarByClass(RetweetersPerUser.class);
    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    System.out.println(otherArgs[0]);
    job.setMapperClass(TweetMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setOutputKeyClass(IntWritable.class);
    job.setOutputValueClass(IntWritable.class);
    job.setNumReduceTasks(Integer.parseInt(args[2]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    if (job.waitForCompletion(true)) {
        FileSystem hdfs = FileSystem.get(new URI(args[1]), conf);
        Path dir = new Path(args[1]);
        PathFilter filter = new PathFilter() {
            public boolean accept(Path file) {
                return file.getName().startsWith("part-r-");
            }
        };

        HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>();
        FileStatus[] files = hdfs.listStatus(dir, filter);
        Arrays.sort(files);
        for (int i = 0; i != files.length; i++) {
            Path pt = files[i].getPath();
            BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt)));
            String line = null;
            while ((line = br.readLine()) != null) {
                String[] columns = new String[2];
                columns = line.split("\t");
                int key = Integer.parseInt(columns[0]);
                if (counts_for_user.containsKey(key))
                    counts_for_user.put(key, counts_for_user.get(key) + 1);
                else
                    counts_for_user.put(key, 1);
            }
            br.close();
        }

        FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count"));
        PrintWriter writer = new PrintWriter(fsDataOutputStream);
        for (Entry<Integer, Integer> e : counts_for_user.entrySet()) {
            writer.write(e.getKey() + "\t" + e.getValue() + "\n");
        }
        writer.close();
        fsDataOutputStream.close();
        hdfs.close();
        System.exit(0);
    }
    System.exit(1);
}

From source file:HadoopUtilsTest.java

License:Apache License

public static void main(String[] args) throws IOException {
    Configuration confgiruration = HBaseConfiguration.create();
    FileSystem fileSystem = null;
    try {//from ww w.java2 s.c o m
        fileSystem = FileSystem.get(confgiruration);
        FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"),
                new PathFilter() {
                    @Override
                    public boolean accept(Path path) {

                        return path.getName().matches("part-r-\\d*");
                    }
                });
        for (FileStatus f : fileStatuses) {
            IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false);
        }
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        if (null != fileSystem) {
            fileSystem.close();
        }
    }
}

From source file:Script.java

License:Open Source License

/** Deserialize the Javascript function from the distributed cache.
 *  @param conf    The Hadoop configuration
 *  @return        The Javascript function
 *//* w w  w  . j  a v a  2s  . c om*/
public Object deserialize(Configuration conf, String file) throws IOException {
    Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf);
    if (null != cacheFiles && cacheFiles.length > 0) {
        for (Path path : cacheFiles) { // loop through cache files
            if (path.getName().equals(file)) { // find this file
                return evalCache(conf, path.toString(), file);
            }
        }
    }
    return null;
}

From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java

License:Apache License

public static List<Path> writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns,
        Path path, int numBspTask, int numGPUBspTask, int GPUPercentage) throws IOException {

    List<Path> splittedFiles = new ArrayList<Path>();

    // Compute work distributions
    int cpuTaskNum = numBspTask - numGPUBspTask;
    int inputVectorsPerGPUTask = 0;
    int inputVectorsPerCPU = 0;
    int inputVectorsPerCPUTask = 0;
    if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) {
        inputVectorsPerGPUTask = (rows * GPUPercentage) / 100;
        inputVectorsPerCPU = rows - inputVectorsPerGPUTask;
    } else {//w  w  w . jav a 2 s.c o  m
        inputVectorsPerCPU = rows;
    }
    if (cpuTaskNum > 0) {
        inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum;
    }

    for (int part = 0; part < numBspTask; part++) {

        Path partIn = new Path(path, "part" + part + ".seq");
        splittedFiles.add(partIn);
        FileSystem fs = FileSystem.get(conf);
        final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, IntWritable.class,
                VectorWritable.class, CompressionType.NONE);

        int interval = 0;
        if (part > cpuTaskNum) {
            interval = inputVectorsPerGPUTask;
        } else {
            interval = inputVectorsPerCPUTask;
        }
        int start = interval * part;
        int end = start + interval;
        if ((numBspTask - 1) == part) {
            end = rows; // set to totalRows
        }
        LOG.info("Partition " + part + " file " + partIn.getParent().getName() + "/" + partIn.getName()
                + " from " + start + " to " + (end - 1));

        for (int i = start; i < end; i++) {
            DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]);
            dataWriter.append(new IntWritable(i), new VectorWritable(rowVector));
        }
        dataWriter.close();
    }

    return splittedFiles;
}

From source file:authordetect.input.SingleBookReader.java

/**
 * @param inputSplit/*from   w w  w  .  j  ava 2  s  .  c o m*/
 * @param context    the information about the task
 * @throws java.io.IOException
 * @throws InterruptedException
 */
@Override
public void initialize(InputSplit inputSplit, TaskAttemptContext context)
        throws IOException, InterruptedException {

    FileSplit split = (FileSplit) inputSplit;
    Configuration configuration = context.getConfiguration();

    // get the option from configuration:
    // 0 for group by author, 1 for group by book
    int option = configuration.getInt("GROUP_OPTION", 0);

    Path path = split.getPath();
    filename = path.getName();
    FileSystem fileSystem = path.getFileSystem(configuration);
    FSDataInputStream inputStream = fileSystem.open(path);
    lineReader = new LineReader(inputStream, configuration);

    //initial start point and end point
    start = split.getStart();
    end = start + split.getLength();

    inputStream.seek(start);
    if (start != 0) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start));
    }

    start += lineReader.readLine(currentLine);

    prepareToScanBook(option);
}

From source file:azkaban.jobtype.javautils.HadoopUtils.java

License:Apache License

public static boolean shouldPathBeIgnored(Path path) throws IOException {
    return path.getName().startsWith("_");
}

From source file:azkaban.viewer.hdfs.BsonFileViewer.java

License:Apache License

@Override
public Set<Capability> getCapabilities(FileSystem fs, Path path) throws AccessControlException {
    if (path.getName().endsWith(".bson")) {
        return EnumSet.of(Capability.READ);
    }//from   w w w . j av  a2 s . com
    return EnumSet.noneOf(Capability.class);
}

From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java

License:Apache License

private void getPathSegments(Path path, List<Path> paths, List<String> segments) {
    Path curr = path;
    while (curr.getParent() != null) {
        paths.add(curr);// w  w w  . jav  a 2s  . co  m
        segments.add(curr.getName());
        curr = curr.getParent();
    }
    Collections.reverse(paths);
    Collections.reverse(segments);
}

From source file:azkaban.viewer.hdfs.HdfsImageFileViewer.java

License:Apache License

public boolean canReadFile(FileSystem fs, Path path) {
    String fileName = path.getName();
    int pos = fileName.lastIndexOf('.');
    if (pos >= 0) {
        String suffix = fileName.substring(pos).toLowerCase();
        if (acceptedSuffix.contains(suffix)) {
            long len = 0;
            try {
                len = fs.getFileStatus(path).getLen();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();/*w  ww .  j av a2s.  co m*/
                return false;
            }
            if (len <= MAX_IMAGE_FILE_SIZE) {
                return true;
            }
        }
    }
    return false;
}

From source file:azkaban.viewer.hdfs.HtmlFileViewer.java

License:Apache License

@Override
public Set<Capability> getCapabilities(final FileSystem fs, final Path path) throws AccessControlException {
    final String fileName = path.getName();
    final int pos = fileName.lastIndexOf('.');
    if (pos < 0) {
        return EnumSet.noneOf(Capability.class);
    }/* w ww . ja v a2 s  .  c o  m*/

    final String suffix = fileName.substring(pos).toLowerCase();
    if (this.acceptedSuffix.contains(suffix)) {
        return EnumSet.of(Capability.READ);
    } else {
        return EnumSet.noneOf(Capability.class);
    }
}