List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:GetRetweetersAndCountPerUser.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 3) { System.err.println("Usage: GetRetweetersAndCountPerUser <in> <out> <num_reducers>"); System.exit(2);/* w w w. j a v a 2 s. co m*/ } Job job = new Job(conf, "word count"); job.setJarByClass(RetweetersPerUser.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); System.out.println(otherArgs[0]); job.setMapperClass(TweetMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); job.setNumReduceTasks(Integer.parseInt(args[2])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); if (job.waitForCompletion(true)) { FileSystem hdfs = FileSystem.get(new URI(args[1]), conf); Path dir = new Path(args[1]); PathFilter filter = new PathFilter() { public boolean accept(Path file) { return file.getName().startsWith("part-r-"); } }; HashMap<Integer, Integer> counts_for_user = new HashMap<Integer, Integer>(); FileStatus[] files = hdfs.listStatus(dir, filter); Arrays.sort(files); for (int i = 0; i != files.length; i++) { Path pt = files[i].getPath(); BufferedReader br = new BufferedReader(new InputStreamReader(hdfs.open(pt))); String line = null; while ((line = br.readLine()) != null) { String[] columns = new String[2]; columns = line.split("\t"); int key = Integer.parseInt(columns[0]); if (counts_for_user.containsKey(key)) counts_for_user.put(key, counts_for_user.get(key) + 1); else counts_for_user.put(key, 1); } br.close(); } FSDataOutputStream fsDataOutputStream = hdfs.create(new Path(otherArgs[1] + "_count")); PrintWriter writer = new PrintWriter(fsDataOutputStream); for (Entry<Integer, Integer> e : counts_for_user.entrySet()) { writer.write(e.getKey() + "\t" + e.getValue() + "\n"); } writer.close(); fsDataOutputStream.close(); hdfs.close(); System.exit(0); } System.exit(1); }
From source file:HadoopUtilsTest.java
License:Apache License
public static void main(String[] args) throws IOException { Configuration confgiruration = HBaseConfiguration.create(); FileSystem fileSystem = null; try {//from ww w.java2 s.c o m fileSystem = FileSystem.get(confgiruration); FileStatus[] fileStatuses = fileSystem.listStatus(new Path("/icntv/grade/correlate-result/2013-12-12"), new PathFilter() { @Override public boolean accept(Path path) { return path.getName().matches("part-r-\\d*"); } }); for (FileStatus f : fileStatuses) { IOUtils.copyBytes(fileSystem.open(f.getPath()), System.out, 4096, false); } } catch (Exception e) { e.printStackTrace(); } finally { if (null != fileSystem) { fileSystem.close(); } } }
From source file:Script.java
License:Open Source License
/** Deserialize the Javascript function from the distributed cache. * @param conf The Hadoop configuration * @return The Javascript function *//* w w w . j a v a 2s . c om*/ public Object deserialize(Configuration conf, String file) throws IOException { Path[] cacheFiles = DistributedCache.getLocalCacheFiles(conf); if (null != cacheFiles && cacheFiles.length > 0) { for (Path path : cacheFiles) { // loop through cache files if (path.getName().equals(file)) { // find this file return evalCache(conf, path.toString(), file); } } } return null; }
From source file:at.illecker.hama.hybrid.examples.matrixmultiplication2.DistributedRowMatrix.java
License:Apache License
public static List<Path> writeDistributedRowMatrix(Configuration conf, double[][] matrix, int rows, int columns, Path path, int numBspTask, int numGPUBspTask, int GPUPercentage) throws IOException { List<Path> splittedFiles = new ArrayList<Path>(); // Compute work distributions int cpuTaskNum = numBspTask - numGPUBspTask; int inputVectorsPerGPUTask = 0; int inputVectorsPerCPU = 0; int inputVectorsPerCPUTask = 0; if ((numGPUBspTask > 0) && (GPUPercentage > 0) && (GPUPercentage <= 100)) { inputVectorsPerGPUTask = (rows * GPUPercentage) / 100; inputVectorsPerCPU = rows - inputVectorsPerGPUTask; } else {//w w w . jav a 2 s.c o m inputVectorsPerCPU = rows; } if (cpuTaskNum > 0) { inputVectorsPerCPUTask = inputVectorsPerCPU / cpuTaskNum; } for (int part = 0; part < numBspTask; part++) { Path partIn = new Path(path, "part" + part + ".seq"); splittedFiles.add(partIn); FileSystem fs = FileSystem.get(conf); final SequenceFile.Writer dataWriter = SequenceFile.createWriter(fs, conf, partIn, IntWritable.class, VectorWritable.class, CompressionType.NONE); int interval = 0; if (part > cpuTaskNum) { interval = inputVectorsPerGPUTask; } else { interval = inputVectorsPerCPUTask; } int start = interval * part; int end = start + interval; if ((numBspTask - 1) == part) { end = rows; // set to totalRows } LOG.info("Partition " + part + " file " + partIn.getParent().getName() + "/" + partIn.getName() + " from " + start + " to " + (end - 1)); for (int i = start; i < end; i++) { DenseDoubleVector rowVector = new DenseDoubleVector(matrix[i]); dataWriter.append(new IntWritable(i), new VectorWritable(rowVector)); } dataWriter.close(); } return splittedFiles; }
From source file:authordetect.input.SingleBookReader.java
/** * @param inputSplit/*from w w w . j ava 2 s . c o m*/ * @param context the information about the task * @throws java.io.IOException * @throws InterruptedException */ @Override public void initialize(InputSplit inputSplit, TaskAttemptContext context) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration configuration = context.getConfiguration(); // get the option from configuration: // 0 for group by author, 1 for group by book int option = configuration.getInt("GROUP_OPTION", 0); Path path = split.getPath(); filename = path.getName(); FileSystem fileSystem = path.getFileSystem(configuration); FSDataInputStream inputStream = fileSystem.open(path); lineReader = new LineReader(inputStream, configuration); //initial start point and end point start = split.getStart(); end = start + split.getLength(); inputStream.seek(start); if (start != 0) { start += lineReader.readLine(new Text(), 0, (int) Math.min(Integer.MAX_VALUE, end - start)); } start += lineReader.readLine(currentLine); prepareToScanBook(option); }
From source file:azkaban.jobtype.javautils.HadoopUtils.java
License:Apache License
public static boolean shouldPathBeIgnored(Path path) throws IOException { return path.getName().startsWith("_"); }
From source file:azkaban.viewer.hdfs.BsonFileViewer.java
License:Apache License
@Override public Set<Capability> getCapabilities(FileSystem fs, Path path) throws AccessControlException { if (path.getName().endsWith(".bson")) { return EnumSet.of(Capability.READ); }//from w w w . j av a2 s . com return EnumSet.noneOf(Capability.class); }
From source file:azkaban.viewer.hdfs.HdfsBrowserServlet.java
License:Apache License
private void getPathSegments(Path path, List<Path> paths, List<String> segments) { Path curr = path; while (curr.getParent() != null) { paths.add(curr);// w w w . jav a 2s . co m segments.add(curr.getName()); curr = curr.getParent(); } Collections.reverse(paths); Collections.reverse(segments); }
From source file:azkaban.viewer.hdfs.HdfsImageFileViewer.java
License:Apache License
public boolean canReadFile(FileSystem fs, Path path) { String fileName = path.getName(); int pos = fileName.lastIndexOf('.'); if (pos >= 0) { String suffix = fileName.substring(pos).toLowerCase(); if (acceptedSuffix.contains(suffix)) { long len = 0; try { len = fs.getFileStatus(path).getLen(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace();/*w ww . j av a2s. co m*/ return false; } if (len <= MAX_IMAGE_FILE_SIZE) { return true; } } } return false; }
From source file:azkaban.viewer.hdfs.HtmlFileViewer.java
License:Apache License
@Override public Set<Capability> getCapabilities(final FileSystem fs, final Path path) throws AccessControlException { final String fileName = path.getName(); final int pos = fileName.lastIndexOf('.'); if (pos < 0) { return EnumSet.noneOf(Capability.class); }/* w ww . ja v a2 s . c o m*/ final String suffix = fileName.substring(pos).toLowerCase(); if (this.acceptedSuffix.contains(suffix)) { return EnumSet.of(Capability.READ); } else { return EnumSet.noneOf(Capability.class); } }