List of usage examples for org.apache.hadoop.fs FileSystem concat
public void concat(final Path trg, final Path[] psrcs) throws IOException
From source file:a.TestConcatExample.java
License:Apache License
@Test public void concatIsPermissive() throws IOException, URISyntaxException { MiniDFSCluster cluster = null;/*from w w w .j av a 2 s . com*/ final Configuration conf = WebHdfsTestUtil.createConf(); conf.set("dfs.namenode.fs-limits.min-block-size", "1000"); // Allow tiny blocks for the test try { cluster = new MiniDFSCluster.Builder(conf).numDataNodes(2).build(); cluster.waitActive(); final FileSystem webHdfs = WebHdfsTestUtil.getWebHdfsFileSystem(conf, WebHdfsFileSystem.SCHEME); final FileSystem dfs = cluster.getFileSystem(); final FileSystem fs = dfs; // WebHDFS has a bug in getLocatedBlocks Path root = new Path("/dir"); fs.mkdirs(root); short origRep = 3; short secondRep = (short) (origRep - 1); Path f1 = new Path("/dir/f1"); long size1 = writeFile(fs, f1, /* blocksize */ 4096, origRep, 5); long f1NumBlocks = fs.getFileBlockLocations(f1, 0, size1).length; assertEquals(5, f1NumBlocks); Path f2 = new Path("/dir/f2"); long size2 = writeFile(fs, f2, /* blocksize (must divide 512 for checksum) */ 4096 - 512, secondRep, 4); long f2NumBlocks = fs.getFileBlockLocations(f2, 0, size2).length; assertEquals(5, f2NumBlocks); fs.concat(f1, new Path[] { f2 }); FileStatus[] fileStatuses = fs.listStatus(root); // Only one file should remain assertEquals(1, fileStatuses.length); FileStatus fileStatus = fileStatuses[0]; // And it should be named after the first file assertEquals("f1", fileStatus.getPath().getName()); // The entire file takes the replication of the first argument assertEquals(origRep, fileStatus.getReplication()); // As expected, the new concated file is the length of both the previous files assertEquals(size1 + size2, fileStatus.getLen()); // And we should have the same number of blocks assertEquals(f1NumBlocks + f2NumBlocks, fs.getFileBlockLocations(fileStatus.getPath(), 0, size1 + size2).length); } finally { if (cluster != null) { cluster.shutdown(); } } }
From source file:org.apache.gobblin.data.management.copy.splitter.DistcpFileSplitter.java
License:Apache License
/** * Merges all the splits for a given file. * Should be called on the target/destination file system (after blocks have been copied to targetFs). * @param fs {@link FileSystem} where file parts exist. * @param file {@link CopyableFile} to merge. * @param workUnits {@link WorkUnitState}s for all parts of this file. * @param parentPath {@link Path} where the parts of the file are located. * @return a {@link WorkUnit} equivalent to the distcp work unit if the file had not been split. * @throws IOException//ww w .jav a2s . c o m */ private static WorkUnitState mergeSplits(FileSystem fs, CopyableFile file, Collection<WorkUnitState> workUnits, Path parentPath) throws IOException { log.info(String.format("File %s was written in %d parts. Merging.", file.getDestination(), workUnits.size())); Path[] parts = new Path[workUnits.size()]; for (WorkUnitState workUnit : workUnits) { if (!isSplitWorkUnit(workUnit)) { throw new IOException("Not a split work unit."); } Split split = getSplit(workUnit).get(); parts[split.getSplitNumber()] = new Path(parentPath, split.getPartName()); } Path target = new Path(parentPath, file.getDestination().getName()); fs.rename(parts[0], target); fs.concat(target, Arrays.copyOfRange(parts, 1, parts.length)); WorkUnitState finalWorkUnit = workUnits.iterator().next(); finalWorkUnit.removeProp(SPLIT_KEY); return finalWorkUnit; }