Example usage for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix)

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:com.ngdata.sep.impl.fork.ForkedReplicationSource.java

License:Apache License

/**
 * Open a reader on the current path// www  .  j ava  2s. c om
 *
 * @param sleepMultiplier by how many times the default sleeping time is augmented
 * @return true if we should continue with that file, false if we are over with it
 */
protected boolean openReader(int sleepMultiplier) {
    try {
        LOG.debug("Opening log for replication " + this.currentPath.getName() + " at "
                + this.repLogReader.getPosition());
        try {
            this.reader = repLogReader.openReader(this.currentPath);
        } catch (FileNotFoundException fnfe) {
            if (this.queueRecovered) {
                // We didn't find the log in the archive directory, look if it still
                // exists in the dead RS folder (there could be a chain of failures
                // to look at)
                LOG.info("NB dead servers : " + deadRegionServers.size());
                for (String curDeadServerName : deadRegionServers) {
                    Path deadRsDirectory = new Path(manager.getLogDir().getParent(), curDeadServerName);
                    Path[] locs = new Path[] { new Path(deadRsDirectory, currentPath.getName()),
                            new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT), currentPath.getName()), };
                    for (Path possibleLogLocation : locs) {
                        LOG.info("Possible location " + possibleLogLocation.toUri().toString());
                        if (this.manager.getFs().exists(possibleLogLocation)) {
                            // We found the right new location
                            LOG.info("Log " + this.currentPath + " still exists at " + possibleLogLocation);
                            // Breaking here will make us sleep since reader is null
                            return true;
                        }
                    }
                }
                // TODO What happens if the log was missing from every single location?
                // Although we need to check a couple of times as the log could have
                // been moved by the master between the checks
                // It can also happen if a recovered queue wasn't properly cleaned,
                // such that the znode pointing to a log exists but the log was
                // deleted a long time ago.
                // For the moment, we'll throw the IO and processEndOfFile
                throw new IOException("File from recovered queue is " + "nowhere to be found", fnfe);
            } else {
                // If the log was archived, continue reading from there
                Path archivedLogLocation = new Path(manager.getOldLogDir(), currentPath.getName());
                if (this.manager.getFs().exists(archivedLogLocation)) {
                    currentPath = archivedLogLocation;
                    LOG.info("Log " + this.currentPath + " was moved to " + archivedLogLocation);
                    // Open the log at the new location
                    this.openReader(sleepMultiplier);

                }
                // TODO What happens the log is missing in both places?
            }
        }
    } catch (IOException ioe) {
        if (ioe instanceof EOFException && isCurrentLogEmpty())
            return true;
        LOG.warn(peerClusterZnode + " Got: ", ioe);
        this.reader = null;
        if (ioe.getCause() instanceof NullPointerException) {
            // Workaround for race condition in HDFS-4380
            // which throws a NPE if we open a file before any data node has the most recent block
            // Just sleep and retry.  Will require re-reading compressed HLogs for compressionContext.
            LOG.warn("Got NPE opening reader, will retry.");
        } else if (sleepMultiplier == this.maxRetriesMultiplier) {
            // TODO Need a better way to determine if a file is really gone but
            // TODO without scanning all logs dir
            LOG.warn("Waited too long for this file, considering dumping");
            return !processEndOfFile();
        }
    }
    return true;
}

From source file:com.quixey.hadoop.fs.oss.CloudOSSFileSystemStoreTest.java

License:Apache License

private void writeRenameReadCompare(Path path, long len) throws NoSuchAlgorithmException, IOException {
    // write files of length `len` to `path`
    MessageDigest digest = MessageDigest.getInstance("MD5");
    try (OutputStream out = new BufferedOutputStream(new DigestOutputStream(fs.create(path), digest))) {
        for (long i = 0; i < len; i++)
            out.write(74);/*from  w  ww .  j a  v  a2 s.c  om*/
    }

    assertTrue(fs.exists(path));

    // rename - might cause a multipart copy
    Path copyPath = path.suffix(".copy");
    fs.rename(path, copyPath);
    assertTrue(fs.exists(copyPath));

    // download the file
    MessageDigest digest2 = MessageDigest.getInstance("MD5");
    long copyLen = 0;
    try (InputStream in = new BufferedInputStream(new DigestInputStream(fs.open(copyPath), digest2))) {
        while (-1 != in.read())
            copyLen++;
    }

    // compare lengths, digests
    assertEquals(len, copyLen);
    assertArrayEquals(digest.digest(), digest2.digest());
}

From source file:com.splunk.shuttl.archiver.filesystem.HadoopFileSystemArchiveTest.java

License:Apache License

public void deletePathRecursivly_givenADirectoryWithFilesInIt_thePathShouldBeDeleted() throws IOException {
    File testDirectory = TUtilsFile.createDirectory();
    File testFile = TUtilsFile.createFileInParent(testDirectory, "STUFF");
    TUtilsFile.populateFileWithRandomContent(testFile);
    hadoopFileSystemPutter.putFile(testDirectory);
    Path testFilePath = hadoopFileSystemPutter.getPathForFile(testDirectory);

    // Make sure setup was correct
    assertTrue(fileSystem.exists(testFilePath));
    assertTrue(fileSystem.exists(testFilePath.suffix("/STUFF")));

    // Test/* w  w  w . j  a va  2  s.  c  o m*/
    hadoopFileSystemArchive.deletePathRecursivly(testFilePath);

    // Verify
    assertFalse(fileSystem.exists(testFilePath));
    assertFalse(fileSystem.exists(testFilePath.suffix("STUFF")));

}

From source file:com.splunk.shuttl.archiver.util.UtilsPath.java

License:Apache License

/**
 * When appending the scheme is taking from pathToAppend and only the actual
 * path is taking from pathThatWillBeAppended
 * /*  w ww .  j  a  v a  2  s  .co  m*/
 * @param pathThatWillBeAppended
 *          This is the base path the scheme will be taken from this one.
 * @param pathToAppend
 *          The path string is taken from this argument and appended to the
 *          previous one.
 * 
 * @return a new Path created by appending 'pathToAppend' to
 *         'pathThatWillBeAppended'
 */
public static Path createPathByAppending(Path pathThatWillBeAppended, Path pathToAppend) {
    return pathThatWillBeAppended.suffix(pathToAppend.toUri().getPath());
}

From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java

License:Apache License

public static Path getNonexistantPath(Path parentDir, String name, FileSystem fs) throws IOException {
    Path requestedPath = new Path(parentDir, name);
    Path path = requestedPath;/*  ww  w  .j  a va2s  .  c o  m*/
    int serial = 1;
    while (fs.exists(path)) {
        path = requestedPath.suffix("-" + serial);
        serial++;
    }
    return path;
}

From source file:eu.larkc.RDFPig.pig.PigQueriesGenerator.java

License:Apache License

public TupleSetMetadata evaluate(TupleExpr expr, Executor.Cache cache, Path outputLocation, List<String> list,
        double sample) throws QueryEvaluationException, IOException {

    // Start evaluate the expression
    Context context = new Context();
    context.pigQueries = list;/*from   w w  w. ja  v a  2  s. com*/
    context.cache = cache;
    context.desiredSampling = sample;
    TupleSetMetadata meta = evaluate(expr, context);

    if (meta.location == null) {
        Path outputLoc = new Path(outputLocation, Integer.toString(storageCount++));
        String storageType;
        if (expr instanceof QueryRoot) {
            storageType = "PigStorage('\\t')";
            outputLoc.suffix("-text");
            list.add(String.format("STORE %s INTO '%s' USING %s;\n", meta.name,
                    outputLoc.toString() + "-output", storageType));
        } else {
            if (sample == 1.0) {
                storageType = "org.apache.hadoop.zebra.pig.TableStorer('')";
                list.add(String.format("STORE %s INTO '%s' USING %s;\n", meta.name, outputLoc.toString(),
                        storageType));
            } else {
                list.add(String.format("STORE %s INTO '%s';", meta.name, outputLoc.toString()));
            }
        }
        meta.location = outputLoc;
    }
    return meta;
}

From source file:fi.tkk.ics.hadoop.bam.BAMInputFormat.java

License:Open Source License

private Path getIdxPath(Path path) {
    return path.suffix(".splitting-bai");
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.View.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {

    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("view :: PATH not given.");
        return 3;
    }/*ww  w  .ja v  a2 s .c om*/

    Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()),
            "view");

    final String path = args.get(0);
    final List<String> regions = args.subList(1, args.size());

    final boolean headerOnly = parser.getBoolean(headerOnlyOpt);

    final SAMFileReader reader;

    try {
        final Path p = new Path(path);

        SeekableStream idx;
        try {
            idx = WrapSeekable.openPath(getConf(), p.suffix(".bai"));
        } catch (Exception e) {
            idx = null;
        }

        final SeekableStream sam = WrapSeekable.openPath(getConf(), p);

        reader = idx == null ? new SAMFileReader(sam, false) : new SAMFileReader(sam, idx, false);
    } catch (Exception e) {
        System.err.printf("view :: Could not open '%s': %s\n", path, e.getMessage());
        return 4;
    }

    reader.setValidationStringency(ValidationStringency.SILENT);

    final SAMFileHeader header;

    try {
        header = reader.getFileHeader();
    } catch (SAMFormatException e) {
        System.err.printf("view :: Could not parse '%s': %s\n", path, e.getMessage());
        return 4;
    }

    final String fmt = (String) parser.getOptionValue(formatOpt);

    final SAMFormat format = fmt == null ? SAMFormat.SAM : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH));

    final SAMFileWriterImpl writer;
    switch (format) {
    case BAM:
        writer = new BAMFileWriter(System.out, new File("<stdout>"));
        break;
    case SAM:
        writer = new SAMTextWriter(System.out);
        break;
    default:
        writer = null;
        assert false;
    }

    writer.setSortOrder(header.getSortOrder(), true);
    writer.setHeader(header);

    if (regions.isEmpty() || headerOnly) {
        if (!headerOnly)
            if (!writeIterator(writer, reader.iterator(), path))
                return 4;

        writer.close();
        return 0;
    }

    if (!reader.isBinary()) {
        System.err.println("view :: Cannot output regions from SAM file");
        return 4;
    }

    if (!reader.hasIndex()) {
        System.err.println("view :: Cannot output regions from BAM file lacking an index");
        return 4;
    }

    reader.enableIndexCaching(true);

    boolean errors = false;

    for (final String region : regions) {
        final StringTokenizer st = new StringTokenizer(region, ":-");
        final String refStr = st.nextToken();
        final int beg, end;

        if (st.hasMoreTokens()) {
            beg = parseCoordinate(st.nextToken());
            end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1;

            if (beg < 0 || end < 0) {
                errors = true;
                continue;
            }
            if (end < beg) {
                System.err.printf("view :: Invalid range, cannot end before start: '%d-%d'\n", beg, end);
                errors = true;
                continue;
            }
        } else
            beg = end = 0;

        SAMSequenceRecord ref = header.getSequence(refStr);
        if (ref == null)
            try {
                ref = header.getSequence(Integer.parseInt(refStr));
            } catch (NumberFormatException e) {
            }

        if (ref == null) {
            System.err.printf("view :: Not a valid sequence name or index: '%s'\n", refStr);
            errors = true;
            continue;
        }

        final SAMRecordIterator it = reader.queryOverlapping(ref.getSequenceName(), beg, end);

        if (!writeIterator(writer, it, path))
            return 4;
    }
    writer.close();
    return errors ? 5 : 0;
}

From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java

License:Open Source License

/**
 * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link
 * org.apache.hadoop.conf.Configuration} object instead of a supplied
 * argument list//from   ww w .jav  a 2s.com
 *
 * @throws java.lang.IllegalArgumentException if the "input" property is not
 *                                            in the Configuration
 */
public static void run(final Configuration conf) throws IOException {
    final String inputString = conf.get("input");
    if (inputString == null)
        throw new IllegalArgumentException("String property \"input\" path not found in given Configuration");

    final FileSystem fs = FileSystem.get(conf);

    // Default to a granularity level of 4096. This is generally sufficient
    // for very large BAM files, relative to a maximum heap size in the
    // gigabyte range.
    final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096));

    final Path input = new Path(inputString);

    indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)),
            fs.getFileStatus(input).getLen());
}

From source file:fi.tkk.ics.hadoop.bam.util.BGZFSplitFileInputFormat.java

License:Open Source License

private Path getIdxPath(Path path) {
    return path.suffix(".bgzfi");
}