List of usage examples for org.apache.hadoop.fs FileStatus isDirectory
public boolean isDirectory()
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByValueWithDelimitedTextInputFormat() throws Exception { final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253" + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510" + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"; final String inputData2 = "Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584" + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714" + "\nTimon Leonard,716 Ac Ave,1-857-935-3882,62240"; createTextFileInHDFS(inputData1, "/input1", "testFile1.txt"); createTextFileInHDFS(inputData2, "/input2", "testFile2.txt"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value", "-outputPath", "output", "-inputFormat", "co.nubetech.hiho.dedup.DelimitedTextInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", }; MergeJob job = runMergeJobs(args);/*ww w .j a v a2s. c om*/ assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"); expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"); expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"); expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"); expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByValueWithTextInputFormat() throws Exception { final String inputData1 = "Macon Kent,6269 Aenean St.,1-247-399-1051,08253" + "\nDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510" + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"; final String inputData2 = "Timon Leonard,716 Ac Ave,1-857-935-3882,62240" + "\nMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584" + "\nCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"; createTextFileInHDFS(inputData1, "/input1", "testFile1.txt"); createTextFileInHDFS(inputData2, "/input2", "testFile2.txt"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.TextInputFormat", "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" }; MergeJob job = runMergeJobs(args);//from w w w . java2s . com assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"); expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"); expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"); expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"); expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByKeyWithKeyValueTextInputFormat() throws Exception { final String inputData1 = "A\tMacon Kent,6269 Aenean St.,1-247-399-1051,08253" + "\nB\tDale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510" + "\nC\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"; final String inputData2 = "A\tTimon Leonard,716 Ac Ave,1-857-935-3882,62240" + "\nD\tMacaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584" + "\nB\tCharles Wood,525-9709 In Rd.,1-370-528-4758,62714"; createTextFileInHDFS(inputData1, "/input1", "testFile1.txt"); createTextFileInHDFS(inputData2, "/input2", "testFile2.txt"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "key", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" }; MergeJob job = runMergeJobs(args);//from ww w . j a va 2 s . com assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(4, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"); expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"); expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"); expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(4, count); }
From source file:co.nubetech.hiho.merge.TestMergeJob.java
License:Apache License
@Test public void testMergeByValueWithSequenceFileAsTextInputFormat() throws Exception { HashMap<IntWritable, Text> inputData1 = new HashMap<IntWritable, Text>(); inputData1.put(new IntWritable(1), new Text("Macon Kent,6269 Aenean St.,1-247-399-1051,08253")); inputData1.put(new IntWritable(2), new Text("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510")); inputData1.put(new IntWritable(3), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData1, "/input1", "testFile1.seq"); HashMap<IntWritable, Text> inputData2 = new HashMap<IntWritable, Text>(); inputData2.put(new IntWritable(1), new Text("Timon Leonard,716 Ac Ave,1-857-935-3882,62240")); inputData2.put(new IntWritable(2), new Text("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584")); inputData2.put(new IntWritable(4), new Text("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714")); createSequenceFileInHdfs(inputData2, "/input2", "testFile2.seq"); String[] args = new String[] { "-newPath", "/input1", "-oldPath", "/input2", "-mergeBy", "value", "-outputPath", "output", "-inputFormat", "org.apache.hadoop.mapreduce.lib.input.SequenceFileAsTextInputFormat", "-inputKeyClassName", "org.apache.hadoop.io.Text", "-inputValueClassName", "org.apache.hadoop.io.Text", "-outputFormat", "co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat" }; MergeJob job = runMergeJobs(args);//w ww . j a va 2 s. c o m assertEquals(3, job.getTotalRecordsNew()); assertEquals(3, job.getTotalRecordsOld()); assertEquals(0, job.getBadRecords()); assertEquals(5, job.getOutput()); FileSystem outputFS = getFileSystem(); Path outputPath = new Path(outputFS.getHomeDirectory(), "output"); FileStatus[] status = outputFS.listStatus(outputPath, getOutputPathFilter()); assertTrue(outputFS.exists(outputPath)); List<String> expectedOutput = new ArrayList<String>(); expectedOutput.add("Macon Kent,6269 Aenean St.,1-247-399-1051,08253"); expectedOutput.add("Dale Zamora,521-7792 Mauris Rd.,1-214-625-6970,90510"); expectedOutput.add("Charles Wood,525-9709 In Rd.,1-370-528-4758,62714"); expectedOutput.add("Timon Leonard,716 Ac Ave,1-857-935-3882,62240"); expectedOutput.add("Macaulay Jackson,5435 Dui. Avenue,1-770-395-6446,31584"); int count = 0; for (FileStatus fileStat : status) { logger.debug("File status is " + fileStat.getPath() + " and is it a dir? " + fileStat.isDirectory()); FSDataInputStream in = outputFS.open(fileStat.getPath()); String line = null; while ((line = in.readLine()) != null) { logger.debug("Output is " + line); assertTrue("Matched output " + line, expectedOutput.contains(line)); expectedOutput.remove(line); count++; } in.close(); } assertEquals(5, count); }
From source file:com.aliyun.fs.oss.nat.NativeOssFileSystem.java
License:Apache License
@Override public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException { FileStatus status = null; try {/*from w w w. j a v a 2s. c o m*/ // get the status or throw an FNFE status = getFileStatus(f); // if the thread reaches here, there is something at the path if (status.isDirectory()) { // path references a directory: automatic error throw new FileAlreadyExistsException(f + " is a directory"); } if (!overwrite) { // path references a file and overwrite is disabled throw new FileAlreadyExistsException(f + " already exists"); } LOG.debug("Overwriting file " + f); } catch (FileNotFoundException e) { // this means the file is not found } Path absolutePath = makeAbsolute(f); String key = pathToKey(absolutePath); return new FSDataOutputStream( new NativeOssFsOutputStream(getConf(), store, key, false, progress, bufferSize), statistics); }
From source file:com.aliyun.fs.utils.OssInputUtils.java
License:Apache License
public FileSplit[] getSplits(String file, int numSplits) throws IOException { Path path = new Path(file); this.fs = FileSystem.get(path.toUri(), conf); fs.initialize(path.toUri(), conf);/*from w ww. j a v a 2 s.c o m*/ FileStatus[] files = fs.listStatus(path); long totalSize = 0; for (FileStatus file1 : files) { if (file1.isDirectory()) { throw new IOException("Not a file: " + file1.getPath()); } totalSize += file1.getLen(); } long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); long minSize = Math .max(conf.getLong(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.SPLIT_MINSIZE, 1), 1); ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); for (FileStatus file2 : files) { Path fp = file2.getPath(); long length = file2.getLen(); if (length != 0) { long splitSize = Math.max(minSize, goalSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { FileSplit split = new FileSplit(fp, length - bytesRemaining, splitSize, new String[0]); splits.add(split); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { FileSplit split = new FileSplit(fp, length - bytesRemaining, bytesRemaining, new String[0]); splits.add(split); } } } LOG.info("Total # of splits: " + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
From source file:com.aliyun.odps.fs.VolumeFileSystem.java
License:Apache License
@Override public FSDataInputStream open(Path f, int bufferSize) throws IOException { Path absF = fixRelativePart(f); String filePath = getPathName(absF); if (!exists(absF)) { throw new FileNotFoundException(filePath); }//w w w . j a v a 2 s .c o m FileStatus fileStatus = getFileStatus(f); if (fileStatus.isDirectory()) { throw new FileNotFoundException(VolumeFSErrorMessageGenerator.isADirectory(filePath)); } return new FSDataInputStream( new VolumeFSInputStream(filePath, volumeClient, fileStatus.getLen(), getConf())); }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
/** API for Java clients;visible for testing;may become a public API eventually */ int run(Options options) throws Exception { if (getConf().getBoolean("isMR1", false) && "local".equals(getConf().get("mapred.job.tracker"))) { throw new IllegalStateException( "Running with LocalJobRunner (i.e. all of Hadoop inside a single JVM) is not supported " + "because LocalJobRunner does not (yet) implement the Hadoop Distributed Cache feature, " + "which is required for passing files via --files and --libjars"); }//from w w w .jav a 2 s . c om long programStartTime = System.nanoTime(); getConf().setInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, options.maxSegments); // switch off a false warning about allegedly not implementing Tool // also see http://hadoop.6.n7.nabble.com/GenericOptionsParser-warning-td8103.html // also see https://issues.apache.org/jira/browse/HADOOP-8183 getConf().setBoolean("mapred.used.genericoptionsparser", true); if (options.log4jConfigFile != null) { Utils.setLogConfigFile(options.log4jConfigFile, getConf()); addDistributedCacheFile(options.log4jConfigFile, getConf()); } Configuration config = HBaseConfiguration.create(); Job job = Job.getInstance(config); job.setJarByClass(getClass()); // To be able to run this example from eclipse, we need to make sure // the built jar is distributed to the map-reduce tasks from the // local file system. job.addCacheArchive(new URI("file:///home/cloudera/ahae/target/ahae.jar")); FileSystem fs = options.outputDir.getFileSystem(job.getConfiguration()); if (fs.exists(options.outputDir) && !delete(options.outputDir, true, fs)) { return -1; } Path outputResultsDir = new Path(options.outputDir, RESULTS_DIR); Path outputReduceDir = new Path(options.outputDir, "reducers"); int reducers = 1; Scan scan = new Scan(); scan.addFamily(CF); // tag::SETUP[] scan.setCaching(500); // <1> scan.setCacheBlocks(false); // <2> TableMapReduceUtil.initTableMapperJob( // <3> options.inputTable, // Input HBase table name scan, // Scan instance to control what to index HBaseAvroToSOLRMapper.class, // Mapper to parse cells content. Text.class, // Mapper output key SolrInputDocumentWritable.class, // Mapper output value job); FileOutputFormat.setOutputPath(job, outputReduceDir); job.setJobName(getClass().getName() + "/" + Utils.getShortClassName(HBaseAvroToSOLRMapper.class)); job.setReducerClass(SolrReducer.class); // <4> job.setPartitionerClass(SolrCloudPartitioner.class); // <5> job.getConfiguration().set(SolrCloudPartitioner.ZKHOST, options.zkHost); job.getConfiguration().set(SolrCloudPartitioner.COLLECTION, options.collection); job.getConfiguration().setInt(SolrCloudPartitioner.SHARDS, options.shards); job.setOutputFormatClass(SolrOutputFormat.class); SolrOutputFormat.setupSolrHomeCache(options.solrHomeDir, job); job.setOutputKeyClass(Text.class); job.setOutputValueClass(SolrInputDocumentWritable.class); job.setSpeculativeExecution(false); // end::SETUP[] job.setNumReduceTasks(reducers); // Set the number of reducers based on the number of shards we have. if (!waitForCompletion(job, true)) { return -1;// job failed } // ------------------------------------------------------------------------------------------------------------------------------------- assert reducers == options.shards; // normalize output shard dir prefix, i.e. // rename part-r-00000 to part-00000 (stems from zero tree merge iterations) // rename part-m-00000 to part-00000 (stems from > 0 tree merge iterations) for (FileStatus stats : fs.listStatus(outputReduceDir)) { String dirPrefix = SolrOutputFormat.getOutputName(job); Path srcPath = stats.getPath(); if (stats.isDirectory() && srcPath.getName().startsWith(dirPrefix)) { String dstName = dirPrefix + srcPath.getName().substring(dirPrefix.length() + "-m".length()); Path dstPath = new Path(srcPath.getParent(), dstName); if (!rename(srcPath, dstPath, fs)) { return -1; } } } ; // publish results dir if (!rename(outputReduceDir, outputResultsDir, fs)) { return -1; } if (options.goLive && !new GoLive().goLive(options, listSortedOutputShardDirs(job, outputResultsDir, fs))) { return -1; } goodbye(job, programStartTime); return 0; }
From source file:com.architecting.ch07.MapReduceIndexerTool.java
License:Apache License
private FileStatus[] listSortedOutputShardDirs(Job job, Path outputReduceDir, FileSystem fs) throws FileNotFoundException, IOException { final String dirPrefix = SolrOutputFormat.getOutputName(job); FileStatus[] dirs = fs.listStatus(outputReduceDir, new PathFilter() { @Override/*w w w.j av a 2 s . c o m*/ public boolean accept(Path path) { return path.getName().startsWith(dirPrefix); } }); for (FileStatus dir : dirs) { if (!dir.isDirectory()) { throw new IllegalStateException("Not a directory: " + dir.getPath()); } } // use alphanumeric sort (rather than lexicographical sort) to properly handle more than 99999 // shards Arrays.sort(dirs, new Comparator<FileStatus>() { @Override public int compare(FileStatus f1, FileStatus f2) { return new AlphaNumericComparator().compare(f1.getPath().getName(), f2.getPath().getName()); } }); return dirs; }
From source file:com.asakusafw.runtime.compatibility.hadoop2.FileSystemCompatibilityHadoop2.java
License:Apache License
@Override public boolean isDirectory(FileStatus status) { if (status == null) { throw new IllegalArgumentException("status must not be null"); //$NON-NLS-1$ }/*from w w w.ja v a 2 s .c om*/ return status.isDirectory(); }