List of usage examples for org.apache.hadoop.fs Path getParent
public Path getParent()
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
/** * Closes the current part file.//from www .j a v a 2s . co m * * <p> * This moves the current in-progress part file to a pending file and adds it to the list * of pending files in our bucket state. */ private void closeCurrentPartFile(BucketState<T> bucketState) throws Exception { if (bucketState.isWriterOpen) { bucketState.writer.close(); bucketState.isWriterOpen = false; } if (bucketState.currentFile != null) { Path currentPartPath = new Path(bucketState.currentFile); Path inProgressPath = new Path(currentPartPath.getParent(), inProgressPrefix + currentPartPath.getName()).suffix(inProgressSuffix); Path pendingPath = new Path(currentPartPath.getParent(), pendingPrefix + currentPartPath.getName()) .suffix(pendingSuffix); FileSystem fs = inProgressPath.getFileSystem(hadoopConf); fs.rename(inProgressPath, pendingPath); LOG.debug("Moving in-progress bucket {} to pending file {}", inProgressPath, pendingPath); bucketState.pendingFiles.add(currentPartPath.toString()); bucketState.currentFile = null; } }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void notifyCheckpointComplete(long checkpointId) throws Exception { synchronized (state.bucketStates) { Iterator<Map.Entry<String, BucketState<T>>> it = state.bucketStates.entrySet().iterator(); while (it.hasNext()) { BucketState<T> bucketState = it.next().getValue(); synchronized (bucketState.pendingFilesPerCheckpoint) { Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); Set<Long> checkpointsToRemove = new HashSet<>(); for (Long pastCheckpointId : pastCheckpointIds) { if (pastCheckpointId <= checkpointId) { LOG.debug("Moving pending files to final location for checkpoint {}", pastCheckpointId); // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()).suffix(pendingSuffix); FileSystem fs = pendingPath.getFileSystem(hadoopConf); fs.rename(pendingPath, finalPath); LOG.debug(/*w w w . j a va 2 s. c om*/ "Moving pending file {} to final location having completed checkpoint {}.", pendingPath, pastCheckpointId); } checkpointsToRemove.add(pastCheckpointId); } } if (!bucketState.isWriterOpen && bucketState.pendingFiles.isEmpty()) { // We've dealt with all the pending files and the writer for this bucket is not currently open. // Therefore this bucket is currently inactive and we can remove it from our state. it.remove(); } else { for (Long toRemove : checkpointsToRemove) { bucketState.pendingFilesPerCheckpoint.remove(toRemove); } } } } } }
From source file:com.mvad.flink.demo.streaming.lib.sink.bucketing.BucketingSink.java
License:Apache License
@Override public void restoreState(State<T> state) { this.state = state; FileSystem fs;// ww w . j a v a 2s . c o m try { fs = new Path(basePath).getFileSystem(HadoopFileSystem.getHadoopConfiguration()); } catch (IOException e) { LOG.error("Error while creating FileSystem in checkpoint restore.", e); throw new RuntimeException("Error while creating FileSystem in checkpoint restore.", e); } for (BucketState<T> bucketState : state.bucketStates.values()) { // we can clean all the pending files since they where renamed to final files // after this checkpoint was successful bucketState.pendingFiles.clear(); if (bucketState.currentFile != null) { // We were writing to a file when the last checkpoint occured. This file can either // be still in-progress or became a pending file at some point after the checkpoint. // Either way, we have to truncate it back to a valid state (or write a .valid-length) // file that specifies up to which length it is valid and rename it to the final name // before starting a new bucket file. Path partPath = new Path(bucketState.currentFile); try { Path partPendingPath = new Path(partPath.getParent(), pendingPrefix + partPath.getName()) .suffix(pendingSuffix); Path partInProgressPath = new Path(partPath.getParent(), inProgressPrefix + partPath.getName()) .suffix(inProgressSuffix); if (fs.exists(partPendingPath)) { LOG.debug( "In-progress file {} has been moved to pending after checkpoint, moving to final location.", partPath); // has been moved to pending in the mean time, rename to final location fs.rename(partPendingPath, partPath); } else if (fs.exists(partInProgressPath)) { LOG.debug("In-progress file {} is still in-progress, moving to final location.", partPath); // it was still in progress, rename to final path fs.rename(partInProgressPath, partPath); } else if (fs.exists(partPath)) { LOG.debug("In-Progress file {} was already moved to final location {}.", bucketState.currentFile, partPath); } else { LOG.debug( "In-Progress file {} was neither moved to pending nor is still in progress. Possibly, " + "it was moved to final location by a previous snapshot restore", bucketState.currentFile); } refTruncate = reflectTruncate(fs); // truncate it or write a ".valid-length" file to specify up to which point it is valid if (refTruncate != null) { LOG.debug("Truncating {} to valid length {}", partPath, bucketState.currentFileValidLength); // some-one else might still hold the lease from a previous try, we are // recovering, after all ... if (fs instanceof DistributedFileSystem) { DistributedFileSystem dfs = (DistributedFileSystem) fs; LOG.debug("Trying to recover file lease {}", partPath); dfs.recoverLease(partPath); boolean isclosed = dfs.isFileClosed(partPath); StopWatch sw = new StopWatch(); sw.start(); while (!isclosed) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } isclosed = dfs.isFileClosed(partPath); } } Boolean truncated = (Boolean) refTruncate.invoke(fs, partPath, bucketState.currentFileValidLength); if (!truncated) { LOG.debug("Truncate did not immediately complete for {}, waiting...", partPath); // we must wait for the asynchronous truncate operation to complete StopWatch sw = new StopWatch(); sw.start(); long newLen = fs.getFileStatus(partPath).getLen(); while (newLen != bucketState.currentFileValidLength) { if (sw.getTime() > asyncTimeout) { break; } try { Thread.sleep(500); } catch (InterruptedException e1) { // ignore it } newLen = fs.getFileStatus(partPath).getLen(); } if (newLen != bucketState.currentFileValidLength) { throw new RuntimeException("Truncate did not truncate to right length. Should be " + bucketState.currentFileValidLength + " is " + newLen + "."); } } } else { LOG.debug("Writing valid-length file for {} to specify valid length {}", partPath, bucketState.currentFileValidLength); Path validLengthFilePath = new Path(partPath.getParent(), validLengthPrefix + partPath.getName()).suffix(validLengthSuffix); if (!fs.exists(validLengthFilePath)) { FSDataOutputStream lengthFileOut = fs.create(validLengthFilePath); lengthFileOut.writeUTF(Long.toString(bucketState.currentFileValidLength)); lengthFileOut.close(); } } // Now that we've restored the bucket to a valid state, reset the current file info bucketState.currentFile = null; bucketState.currentFileValidLength = -1; } catch (IOException e) { LOG.error("Error while restoring BucketingSink state.", e); throw new RuntimeException("Error while restoring BucketingSink state.", e); } catch (InvocationTargetException | IllegalAccessException e) { LOG.error("Cound not invoke truncate.", e); throw new RuntimeException("Could not invoke truncate.", e); } } LOG.debug("Clearing pending/in-progress files."); // Move files that are confirmed by a checkpoint but did not get moved to final location // because the checkpoint notification did not happen before a failure Set<Long> pastCheckpointIds = bucketState.pendingFilesPerCheckpoint.keySet(); LOG.debug("Moving pending files to final location on restore."); for (Long pastCheckpointId : pastCheckpointIds) { // All the pending files are buckets that have been completed but are waiting to be renamed // to their final name for (String filename : bucketState.pendingFilesPerCheckpoint.get(pastCheckpointId)) { Path finalPath = new Path(filename); Path pendingPath = new Path(finalPath.getParent(), pendingPrefix + finalPath.getName()) .suffix(pendingSuffix); try { if (fs.exists(pendingPath)) { LOG.debug( "(RESTORE) Moving pending file {} to final location after complete checkpoint {}.", pendingPath, pastCheckpointId); fs.rename(pendingPath, finalPath); } } catch (IOException e) { LOG.error("(RESTORE) Error while renaming pending file {} to final path {}: {}", pendingPath, finalPath, e); throw new RuntimeException( "Error while renaming pending file " + pendingPath + " to final path " + finalPath, e); } } } synchronized (bucketState.pendingFilesPerCheckpoint) { bucketState.pendingFilesPerCheckpoint.clear(); } } // we need to get this here since open() has not yet been called int subtaskIndex = getRuntimeContext().getIndexOfThisSubtask(); // delete pending files try { RemoteIterator<LocatedFileStatus> bucketFiles = fs.listFiles(new Path(basePath), true); while (bucketFiles.hasNext()) { LocatedFileStatus file = bucketFiles.next(); if (file.getPath().toString().endsWith(pendingSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting pending file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } if (file.getPath().toString().endsWith(inProgressSuffix)) { // only delete files that contain our subtask index if (file.getPath().toString().contains(partPrefix + "-" + subtaskIndex + "-")) { LOG.debug("(RESTORE) Deleting in-progress file {}", file.getPath().toString()); fs.delete(file.getPath(), true); } } } } catch (IOException e) { LOG.error("Error while deleting old pending files: {}", e); throw new RuntimeException("Error while deleting old pending files.", e); } }
From source file:com.netflix.aegisthus.input.AegisthusInputFormat.java
License:Apache License
/** * The main thing that the addSSTableSplit handles is to split SSTables * using their index if available. The general algorithm is that if the file * is large than the blocksize plus some fuzzy factor to *//* w w w. j a v a 2 s. com*/ public void addSSTableSplit(List<InputSplit> splits, JobContext job, FileStatus file) throws IOException { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if (length != 0) { long blockSize = file.getBlockSize(); long maxSplitSize = (long) (blockSize * .99); long fuzzySplit = (long) (blockSize * 1.2); long bytesRemaining = length; Iterator<Long> scanner = null; Path compressionPath = new Path(path.getParent(), path.getName().replaceAll("-Data.db", "-CompressionInfo.db")); if (!fs.exists(compressionPath)) { // Only initialize if we are going to have more than a single // split if (fuzzySplit < length) { Path indexPath = new Path(path.getParent(), path.getName().replaceAll("-Data.db", "-Index.db")); if (!fs.exists(indexPath)) { fuzzySplit = length; } else { FSDataInputStream fileIn = fs.open(indexPath); scanner = new OffsetScanner(new DataInputStream(new BufferedInputStream(fileIn)), indexPath.getName()); } } long splitStart = 0; while (splitStart + fuzzySplit < length && scanner.hasNext()) { long splitSize = 0; // The scanner returns an offset from the start of the file. while (splitSize < maxSplitSize && scanner.hasNext()) { splitSize = scanner.next() - splitStart; } int blkIndex = getBlockIndex(blkLocations, splitStart + (splitSize / 2)); LOG.info("split path: " + path.getName() + ":" + splitStart + ":" + splitSize); splits.add(new AegSplit(path, splitStart, splitSize, blkLocations[blkIndex].getHosts(), convertors)); bytesRemaining -= splitSize; splitStart += splitSize; } } if (bytesRemaining != 0) { LOG.info("end path: " + path.getName() + ":" + (length - bytesRemaining) + ":" + bytesRemaining); splits.add(new AegSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts(), convertors, fs.exists(compressionPath), compressionPath)); } } else { LOG.info("skipping zero length file: " + path.toString()); } }
From source file:com.netflix.aegisthus.tools.Utils.java
License:Apache License
public static void copy(Path from, Path to, boolean snappy, TaskAttemptContext ctx) throws IOException { FileSystem fromFs = from.getFileSystem(ctx.getConfiguration()); FileSystem toFs = to.getFileSystem(ctx.getConfiguration()); if (!to.isAbsolute()) { to = new Path(ctx.getConfiguration().get("mapred.working.dir"), to); }// w ww .java 2 s.co m if (!snappy && onSameHdfs(ctx.getConfiguration(), from, to)) { LOG.info(String.format("renaming %s to %s", from, to)); toFs.mkdirs(to.getParent()); toFs.rename(from, to); return; } InputStream in = fromFs.open(from); OutputStream out = toFs.create(to, false); try { if (snappy) { in = new SnappyInputStream2(in); } byte[] buffer = new byte[65536]; int bytesRead; int count = 0; while ((bytesRead = in.read(buffer)) >= 0) { if (bytesRead > 0) { out.write(buffer, 0, bytesRead); } if (count++ % 50 == 0) { ctx.progress(); } } } finally { in.close(); out.close(); } }
From source file:com.netflix.bdp.s3.S3PartitionedOutputCommitter.java
License:Apache License
@Override public void commitJob(JobContext context) throws IOException { List<S3Util.PendingUpload> pending = getPendingUploads(context); FileSystem s3 = getOutputPath(context).getFileSystem(context.getConfiguration()); Set<Path> partitions = Sets.newLinkedHashSet(); LOG.info("The partitions are: " + partitions); for (S3Util.PendingUpload commit : pending) { Path filePath = new Path("s3://" + commit.getBucketName() + "/" + commit.getKey()); partitions.add(filePath.getParent()); }// w ww . j a v a 2 s .c o m // enforce conflict resolution boolean threw = true; try { switch (getMode(context)) { case FAIL: // FAIL checking is done on the task side, so this does nothing break; case APPEND: // no check is needed because the output may exist for appending break; case REPLACE: for (Path partitionPath : partitions) { if (s3.exists(partitionPath)) { LOG.info("Removing partition path to be replaced: " + partitionPath); if (!s3.delete(partitionPath, true /* recursive */)) { throw new IOException("Failed to delete existing " + "partition directory for replace:" + partitionPath); } } } break; default: throw new RuntimeException("Unknown conflict resolution mode: " + getMode(context)); } threw = false; } catch (IOException e) { throw new IOException("Failed to enforce conflict resolution", e); } finally { if (threw) { abortJobInternal(context, pending, threw); } } commitJobInternal(context, pending); }
From source file:com.netflix.bdp.s3mper.listing.BigTableGcsConsistentListingAspectTest.java
License:Apache License
@Test public void testFileCreateMethods() throws Throwable { System.out.println("testFileCreateMethods"); Path file = new Path(testPath + "/create-methods.test"); //create(Path) OutputStream fout = deleteFs.create(file); assertNotNull(fout);/*from w w w . jav a2 s. c om*/ fout.close(); List<FileInfo> files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(Path, Progressable)"); fout = deleteFs.create(file, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(Path, boolean)"); fout = deleteFs.create(file, true); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(Path, short)"); fout = deleteFs.create(file, (short) 1); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(Path, boolean, int)"); fout = deleteFs.create(file, true, 4096); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(FileSystem, Path, FsPermission)"); fout = deleteFs.create(deleteFs, file, FsPermission.getDefault()); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(FileSystem, short, Progressable)"); fout = deleteFs.create(file, (short) 1, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(FileSystem, boolean, int, Progressable)"); fout = deleteFs.create(file, true, 4096, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(FileSystem, boolean, int, short, long)"); fout = deleteFs.create(file, true, 4096, (short) 1, 100000000); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); System.out.println("create(FileSystem, boolean, int, short, long, Progressable)"); fout = deleteFs.create(file, true, 4096, (short) 1, 100000000, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); //janitor.clearPath(testPath); }
From source file:com.netflix.bdp.s3mper.listing.BigTableGcsConsistentListingAspectTest.java
License:Apache License
@Test public void testUpdateMetastore() throws Throwable { System.out.println("updateMetastore"); Path arg1Path = new Path(testPath + "/update.test"); OutputStream fout = deleteFs.create(arg1Path); assertNotNull(fout);/* w w w. ja v a 2 s. c om*/ fout.close(); List<FileInfo> files = meta.list(Collections.singletonList(arg1Path.getParent())); assertEquals(1, files.size()); deleteFs.delete(arg1Path, true); //janitor.clearPath(testPath); }
From source file:com.netflix.bdp.s3mper.listing.BigTableGcsConsistentListingAspectTest.java
License:Apache License
@Test public void testDarkloading() throws Throwable { Path path = new Path(testPath + "/test"); meta.add(path, false);/* ww w.j a v a2s . c o m*/ List<FileInfo> files = meta.list(Collections.singletonList(path.getParent())); assertEquals(0, files.size()); }
From source file:com.netflix.bdp.s3mper.listing.ConsistentListingAspectTest.java
License:Apache License
@Test public void testFileCreateMethods() throws Throwable { System.out.println("testFileCreateMethods"); Path file = new Path(testPath + "/create-methods.test"); //create(Path) OutputStream fout = deleteFs.create(file); assertNotNull(fout);// w w w . j ava2 s . c o m fout.close(); List<FileInfo> files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(Path, Progressable)"); fout = deleteFs.create(file, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(Path, boolean)"); fout = deleteFs.create(file, true); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(Path, short)"); fout = deleteFs.create(file, (short) 1); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(Path, boolean, int)"); fout = deleteFs.create(file, true, 4096); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(FileSystem, Path, FsPermission)"); fout = deleteFs.create(deleteFs, file, FsPermission.getDefault()); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(FileSystem, short, Progressable)"); fout = deleteFs.create(file, (short) 1, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(FileSystem, boolean, int, Progressable)"); fout = deleteFs.create(file, true, 4096, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(FileSystem, boolean, int, short, long)"); fout = deleteFs.create(file, true, 4096, (short) 1, 100000000); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); System.out.println("create(FileSystem, boolean, int, short, long, Progressable)"); fout = deleteFs.create(file, true, 4096, (short) 1, 100000000, new Progressable() { @Override public void progress() { } }); assertNotNull(fout); fout.close(); files = meta.list(Collections.singletonList(file.getParent())); assertEquals(1, files.size()); deleteFs.delete(file.getParent(), true); janitor.clearPath(testPath); }