List of usage examples for org.apache.hadoop.fs Path suffix
public Path suffix(String suffix)
From source file:com.ngdata.sep.impl.fork.ForkedReplicationSource.java
License:Apache License
/** * Open a reader on the current path// www . j ava 2s. c om * * @param sleepMultiplier by how many times the default sleeping time is augmented * @return true if we should continue with that file, false if we are over with it */ protected boolean openReader(int sleepMultiplier) { try { LOG.debug("Opening log for replication " + this.currentPath.getName() + " at " + this.repLogReader.getPosition()); try { this.reader = repLogReader.openReader(this.currentPath); } catch (FileNotFoundException fnfe) { if (this.queueRecovered) { // We didn't find the log in the archive directory, look if it still // exists in the dead RS folder (there could be a chain of failures // to look at) LOG.info("NB dead servers : " + deadRegionServers.size()); for (String curDeadServerName : deadRegionServers) { Path deadRsDirectory = new Path(manager.getLogDir().getParent(), curDeadServerName); Path[] locs = new Path[] { new Path(deadRsDirectory, currentPath.getName()), new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT), currentPath.getName()), }; for (Path possibleLogLocation : locs) { LOG.info("Possible location " + possibleLogLocation.toUri().toString()); if (this.manager.getFs().exists(possibleLogLocation)) { // We found the right new location LOG.info("Log " + this.currentPath + " still exists at " + possibleLogLocation); // Breaking here will make us sleep since reader is null return true; } } } // TODO What happens if the log was missing from every single location? // Although we need to check a couple of times as the log could have // been moved by the master between the checks // It can also happen if a recovered queue wasn't properly cleaned, // such that the znode pointing to a log exists but the log was // deleted a long time ago. // For the moment, we'll throw the IO and processEndOfFile throw new IOException("File from recovered queue is " + "nowhere to be found", fnfe); } else { // If the log was archived, continue reading from there Path archivedLogLocation = new Path(manager.getOldLogDir(), currentPath.getName()); if (this.manager.getFs().exists(archivedLogLocation)) { currentPath = archivedLogLocation; LOG.info("Log " + this.currentPath + " was moved to " + archivedLogLocation); // Open the log at the new location this.openReader(sleepMultiplier); } // TODO What happens the log is missing in both places? } } } catch (IOException ioe) { if (ioe instanceof EOFException && isCurrentLogEmpty()) return true; LOG.warn(peerClusterZnode + " Got: ", ioe); this.reader = null; if (ioe.getCause() instanceof NullPointerException) { // Workaround for race condition in HDFS-4380 // which throws a NPE if we open a file before any data node has the most recent block // Just sleep and retry. Will require re-reading compressed HLogs for compressionContext. LOG.warn("Got NPE opening reader, will retry."); } else if (sleepMultiplier == this.maxRetriesMultiplier) { // TODO Need a better way to determine if a file is really gone but // TODO without scanning all logs dir LOG.warn("Waited too long for this file, considering dumping"); return !processEndOfFile(); } } return true; }
From source file:com.quixey.hadoop.fs.oss.CloudOSSFileSystemStoreTest.java
License:Apache License
private void writeRenameReadCompare(Path path, long len) throws NoSuchAlgorithmException, IOException { // write files of length `len` to `path` MessageDigest digest = MessageDigest.getInstance("MD5"); try (OutputStream out = new BufferedOutputStream(new DigestOutputStream(fs.create(path), digest))) { for (long i = 0; i < len; i++) out.write(74);/*from w ww . j a v a2 s.c om*/ } assertTrue(fs.exists(path)); // rename - might cause a multipart copy Path copyPath = path.suffix(".copy"); fs.rename(path, copyPath); assertTrue(fs.exists(copyPath)); // download the file MessageDigest digest2 = MessageDigest.getInstance("MD5"); long copyLen = 0; try (InputStream in = new BufferedInputStream(new DigestInputStream(fs.open(copyPath), digest2))) { while (-1 != in.read()) copyLen++; } // compare lengths, digests assertEquals(len, copyLen); assertArrayEquals(digest.digest(), digest2.digest()); }
From source file:com.splunk.shuttl.archiver.filesystem.HadoopFileSystemArchiveTest.java
License:Apache License
public void deletePathRecursivly_givenADirectoryWithFilesInIt_thePathShouldBeDeleted() throws IOException { File testDirectory = TUtilsFile.createDirectory(); File testFile = TUtilsFile.createFileInParent(testDirectory, "STUFF"); TUtilsFile.populateFileWithRandomContent(testFile); hadoopFileSystemPutter.putFile(testDirectory); Path testFilePath = hadoopFileSystemPutter.getPathForFile(testDirectory); // Make sure setup was correct assertTrue(fileSystem.exists(testFilePath)); assertTrue(fileSystem.exists(testFilePath.suffix("/STUFF"))); // Test/* w w w . j a va 2 s. c o m*/ hadoopFileSystemArchive.deletePathRecursivly(testFilePath); // Verify assertFalse(fileSystem.exists(testFilePath)); assertFalse(fileSystem.exists(testFilePath.suffix("STUFF"))); }
From source file:com.splunk.shuttl.archiver.util.UtilsPath.java
License:Apache License
/** * When appending the scheme is taking from pathToAppend and only the actual * path is taking from pathThatWillBeAppended * /* w ww . j a v a 2 s .co m*/ * @param pathThatWillBeAppended * This is the base path the scheme will be taken from this one. * @param pathToAppend * The path string is taken from this argument and appended to the * previous one. * * @return a new Path created by appending 'pathToAppend' to * 'pathThatWillBeAppended' */ public static Path createPathByAppending(Path pathThatWillBeAppended, Path pathToAppend) { return pathThatWillBeAppended.suffix(pathToAppend.toUri().getPath()); }
From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java
License:Apache License
public static Path getNonexistantPath(Path parentDir, String name, FileSystem fs) throws IOException { Path requestedPath = new Path(parentDir, name); Path path = requestedPath;/* ww w .j a va2s . c o m*/ int serial = 1; while (fs.exists(path)) { path = requestedPath.suffix("-" + serial); serial++; } return path; }
From source file:eu.larkc.RDFPig.pig.PigQueriesGenerator.java
License:Apache License
public TupleSetMetadata evaluate(TupleExpr expr, Executor.Cache cache, Path outputLocation, List<String> list, double sample) throws QueryEvaluationException, IOException { // Start evaluate the expression Context context = new Context(); context.pigQueries = list;/*from w w w. ja v a 2 s. com*/ context.cache = cache; context.desiredSampling = sample; TupleSetMetadata meta = evaluate(expr, context); if (meta.location == null) { Path outputLoc = new Path(outputLocation, Integer.toString(storageCount++)); String storageType; if (expr instanceof QueryRoot) { storageType = "PigStorage('\\t')"; outputLoc.suffix("-text"); list.add(String.format("STORE %s INTO '%s' USING %s;\n", meta.name, outputLoc.toString() + "-output", storageType)); } else { if (sample == 1.0) { storageType = "org.apache.hadoop.zebra.pig.TableStorer('')"; list.add(String.format("STORE %s INTO '%s' USING %s;\n", meta.name, outputLoc.toString(), storageType)); } else { list.add(String.format("STORE %s INTO '%s';", meta.name, outputLoc.toString())); } } meta.location = outputLoc; } return meta; }
From source file:fi.tkk.ics.hadoop.bam.BAMInputFormat.java
License:Open Source License
private Path getIdxPath(Path path) { return path.suffix(".splitting-bai"); }
From source file:fi.tkk.ics.hadoop.bam.cli.plugins.View.java
License:Open Source License
@Override protected int run(CmdLineParser parser) { final List<String> args = parser.getRemainingArgs(); if (args.isEmpty()) { System.err.println("view :: PATH not given."); return 3; }/*ww w .ja v a2 s .c om*/ Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()), "view"); final String path = args.get(0); final List<String> regions = args.subList(1, args.size()); final boolean headerOnly = parser.getBoolean(headerOnlyOpt); final SAMFileReader reader; try { final Path p = new Path(path); SeekableStream idx; try { idx = WrapSeekable.openPath(getConf(), p.suffix(".bai")); } catch (Exception e) { idx = null; } final SeekableStream sam = WrapSeekable.openPath(getConf(), p); reader = idx == null ? new SAMFileReader(sam, false) : new SAMFileReader(sam, idx, false); } catch (Exception e) { System.err.printf("view :: Could not open '%s': %s\n", path, e.getMessage()); return 4; } reader.setValidationStringency(ValidationStringency.SILENT); final SAMFileHeader header; try { header = reader.getFileHeader(); } catch (SAMFormatException e) { System.err.printf("view :: Could not parse '%s': %s\n", path, e.getMessage()); return 4; } final String fmt = (String) parser.getOptionValue(formatOpt); final SAMFormat format = fmt == null ? SAMFormat.SAM : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH)); final SAMFileWriterImpl writer; switch (format) { case BAM: writer = new BAMFileWriter(System.out, new File("<stdout>")); break; case SAM: writer = new SAMTextWriter(System.out); break; default: writer = null; assert false; } writer.setSortOrder(header.getSortOrder(), true); writer.setHeader(header); if (regions.isEmpty() || headerOnly) { if (!headerOnly) if (!writeIterator(writer, reader.iterator(), path)) return 4; writer.close(); return 0; } if (!reader.isBinary()) { System.err.println("view :: Cannot output regions from SAM file"); return 4; } if (!reader.hasIndex()) { System.err.println("view :: Cannot output regions from BAM file lacking an index"); return 4; } reader.enableIndexCaching(true); boolean errors = false; for (final String region : regions) { final StringTokenizer st = new StringTokenizer(region, ":-"); final String refStr = st.nextToken(); final int beg, end; if (st.hasMoreTokens()) { beg = parseCoordinate(st.nextToken()); end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1; if (beg < 0 || end < 0) { errors = true; continue; } if (end < beg) { System.err.printf("view :: Invalid range, cannot end before start: '%d-%d'\n", beg, end); errors = true; continue; } } else beg = end = 0; SAMSequenceRecord ref = header.getSequence(refStr); if (ref == null) try { ref = header.getSequence(Integer.parseInt(refStr)); } catch (NumberFormatException e) { } if (ref == null) { System.err.printf("view :: Not a valid sequence name or index: '%s'\n", refStr); errors = true; continue; } final SAMRecordIterator it = reader.queryOverlapping(ref.getSequenceName(), beg, end); if (!writeIterator(writer, it, path)) return 4; } writer.close(); return errors ? 5 : 0; }
From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java
License:Open Source License
/** * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link * org.apache.hadoop.conf.Configuration} object instead of a supplied * argument list//from ww w .jav a 2s.com * * @throws java.lang.IllegalArgumentException if the "input" property is not * in the Configuration */ public static void run(final Configuration conf) throws IOException { final String inputString = conf.get("input"); if (inputString == null) throw new IllegalArgumentException("String property \"input\" path not found in given Configuration"); final FileSystem fs = FileSystem.get(conf); // Default to a granularity level of 4096. This is generally sufficient // for very large BAM files, relative to a maximum heap size in the // gigabyte range. final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096)); final Path input = new Path(inputString); indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)), fs.getFileStatus(input).getLen()); }
From source file:fi.tkk.ics.hadoop.bam.util.BGZFSplitFileInputFormat.java
License:Open Source License
private Path getIdxPath(Path path) { return path.suffix(".bgzfi"); }