Example usage for org.apache.hadoop.fs Path suffix

List of usage examples for org.apache.hadoop.fs Path suffix

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path suffix.

Prototype

public Path suffix(String suffix) 

Source Link

Document

Adds a suffix to the final name in the path.

Usage

From source file:com.ngdata.sep.impl.fork.ForkedReplicationSource.java

License:Apache License

/**
 * Open a reader on the current path// www  .  j ava  2s. c om
 *
 * @param sleepMultiplier by how many times the default sleeping time is augmented
 * @return true if we should continue with that file, false if we are over with it
 */
protected boolean openReader(int sleepMultiplier) {
    try {
        LOG.debug("Opening log for replication " + this.currentPath.getName() + " at "
                + this.repLogReader.getPosition());
        try {
            this.reader = repLogReader.openReader(this.currentPath);
        } catch (FileNotFoundException fnfe) {
            if (this.queueRecovered) {
                // We didn't find the log in the archive directory, look if it still
                // exists in the dead RS folder (there could be a chain of failures
                // to look at)
                LOG.info("NB dead servers : " + deadRegionServers.size());
                for (String curDeadServerName : deadRegionServers) {
                    Path deadRsDirectory = new Path(manager.getLogDir().getParent(), curDeadServerName);
                    Path[] locs = new Path[] { new Path(deadRsDirectory, currentPath.getName()),
                            new Path(deadRsDirectory.suffix(HLog.SPLITTING_EXT), currentPath.getName()), };
                    for (Path possibleLogLocation : locs) {
                        LOG.info("Possible location " + possibleLogLocation.toUri().toString());
                        if (this.manager.getFs().exists(possibleLogLocation)) {
                            // We found the right new location
                            LOG.info("Log " + this.currentPath + " still exists at " + possibleLogLocation);
                            // Breaking here will make us sleep since reader is null
                            return true;
                        }
                    }
                }
                // TODO What happens if the log was missing from every single location?
                // Although we need to check a couple of times as the log could have
                // been moved by the master between the checks
                // It can also happen if a recovered queue wasn't properly cleaned,
                // such that the znode pointing to a log exists but the log was
                // deleted a long time ago.
                // For the moment, we'll throw the IO and processEndOfFile
                throw new IOException("File from recovered queue is " + "nowhere to be found", fnfe);
            } else {
                // If the log was archived, continue reading from there
                Path archivedLogLocation = new Path(manager.getOldLogDir(), currentPath.getName());
                if (this.manager.getFs().exists(archivedLogLocation)) {
                    currentPath = archivedLogLocation;
                    LOG.info("Log " + this.currentPath + " was moved to " + archivedLogLocation);
                    // Open the log at the new location
                    this.openReader(sleepMultiplier);

                }
                // TODO What happens the log is missing in both places?
            }
        }
    } catch (IOException ioe) {
        if (ioe instanceof EOFException && isCurrentLogEmpty())
            return true;
        LOG.warn(peerClusterZnode + " Got: ", ioe);
        this.reader = null;
        if (ioe.getCause() instanceof NullPointerException) {
            // Workaround for race condition in HDFS-4380
            // which throws a NPE if we open a file before any data node has the most recent block
            // Just sleep and retry.  Will require re-reading compressed HLogs for compressionContext.
            LOG.warn("Got NPE opening reader, will retry.");
        } else if (sleepMultiplier == this.maxRetriesMultiplier) {
            // TODO Need a better way to determine if a file is really gone but
            // TODO without scanning all logs dir
            LOG.warn("Waited too long for this file, considering dumping");
            return !processEndOfFile();
        }
    }
    return true;
}

From source file:com.quixey.hadoop.fs.oss.CloudOSSFileSystemStoreTest.java

License:Apache License

private void writeRenameReadCompare(Path path, long len) throws NoSuchAlgorithmException, IOException {
    // write files of length `len` to `path`
    MessageDigest digest = MessageDigest.getInstance("MD5");
    try (OutputStream out = new BufferedOutputStream(new DigestOutputStream(fs.create(path), digest))) {
        for (long i = 0; i < len; i++)
            out.write(74);/*from  w  ww .  j a  v  a2 s.c  om*/
    }

    assertTrue(fs.exists(path));

    // rename - might cause a multipart copy
    Path copyPath = path.suffix(".copy");
    fs.rename(path, copyPath);
    assertTrue(fs.exists(copyPath));

    // download the file
    MessageDigest digest2 = MessageDigest.getInstance("MD5");
    long copyLen = 0;
    try (InputStream in = new BufferedInputStream(new DigestInputStream(fs.open(copyPath), digest2))) {
        while (-1 != in.read())
            copyLen++;
    }

    // compare lengths, digests
    assertEquals(len, copyLen);
    assertArrayEquals(digest.digest(), digest2.digest());
}

From source file:com.splunk.shuttl.archiver.filesystem.HadoopFileSystemArchiveTest.java

License:Apache License

public void deletePathRecursivly_givenADirectoryWithFilesInIt_thePathShouldBeDeleted() throws IOException {
    File testDirectory = TUtilsFile.createDirectory();
    File testFile = TUtilsFile.createFileInParent(testDirectory, "STUFF");
    TUtilsFile.populateFileWithRandomContent(testFile);
    hadoopFileSystemPutter.putFile(testDirectory);
    Path testFilePath = hadoopFileSystemPutter.getPathForFile(testDirectory);

    // Make sure setup was correct
    assertTrue(fileSystem.exists(testFilePath));
    assertTrue(fileSystem.exists(testFilePath.suffix("/STUFF")));

    // Test/* w  w  w . j  a va  2  s.  c  o m*/
    hadoopFileSystemArchive.deletePathRecursivly(testFilePath);

    // Verify
    assertFalse(fileSystem.exists(testFilePath));
    assertFalse(fileSystem.exists(testFilePath.suffix("STUFF")));

}

From source file:com.splunk.shuttl.archiver.util.UtilsPath.java

License:Apache License

/**
 * When appending the scheme is taking from pathToAppend and only the actual
 * path is taking from pathThatWillBeAppended
 * /*  w ww .  j  a  v a  2  s  .co  m*/
 * @param pathThatWillBeAppended
 *          This is the base path the scheme will be taken from this one.
 * @param pathToAppend
 *          The path string is taken from this argument and appended to the
 *          previous one.
 * 
 * @return a new Path created by appending 'pathToAppend' to
 *         'pathThatWillBeAppended'
 */
public static Path createPathByAppending(Path pathThatWillBeAppended, Path pathToAppend) {
    return pathThatWillBeAppended.suffix(pathToAppend.toUri().getPath());
}

From source file:edu.stolaf.cs.wmrserver.JobServiceHandler.java

License:Apache License

public static Path getNonexistantPath(Path parentDir, String name, FileSystem fs) throws IOException {
    Path requestedPath = new Path(parentDir, name);
    Path path = requestedPath;/*  ww  w  .j  a va2s  .  c o  m*/
    int serial = 1;
    while (fs.exists(path)) {
        path = requestedPath.suffix("-" + serial);
        serial++;
    }
    return path;
}

From source file:eu.larkc.RDFPig.pig.PigQueriesGenerator.java

License:Apache License

public TupleSetMetadata evaluate(TupleExpr expr, Executor.Cache cache, Path outputLocation, List<String> list,
        double sample) throws QueryEvaluationException, IOException {

    // Start evaluate the expression
    Context context = new Context();
    context.pigQueries = list;/*from   w w  w. ja  v a  2  s. com*/
    context.cache = cache;
    context.desiredSampling = sample;
    TupleSetMetadata meta = evaluate(expr, context);

    if (meta.location == null) {
        Path outputLoc = new Path(outputLocation, Integer.toString(storageCount++));
        String storageType;
        if (expr instanceof QueryRoot) {
            storageType = "PigStorage('\\t')";
            outputLoc.suffix("-text");
            list.add(String.format("STORE %s INTO '%s' USING %s;\n", meta.name,
                    outputLoc.toString() + "-output", storageType));
        } else {
            if (sample == 1.0) {
                storageType = "org.apache.hadoop.zebra.pig.TableStorer('')";
                list.add(String.format("STORE %s INTO '%s' USING %s;\n", meta.name, outputLoc.toString(),
                        storageType));
            } else {
                list.add(String.format("STORE %s INTO '%s';", meta.name, outputLoc.toString()));
            }
        }
        meta.location = outputLoc;
    }
    return meta;
}

From source file:fi.tkk.ics.hadoop.bam.BAMInputFormat.java

License:Open Source License

private Path getIdxPath(Path path) {
    return path.suffix(".splitting-bai");
}

From source file:fi.tkk.ics.hadoop.bam.cli.plugins.View.java

License:Open Source License

@Override
protected int run(CmdLineParser parser) {

    final List<String> args = parser.getRemainingArgs();
    if (args.isEmpty()) {
        System.err.println("view :: PATH not given.");
        return 3;
    }/*ww  w  .ja v  a2 s .c om*/

    Utils.toStringency(parser.getOptionValue(stringencyOpt, ValidationStringency.DEFAULT_STRINGENCY.toString()),
            "view");

    final String path = args.get(0);
    final List<String> regions = args.subList(1, args.size());

    final boolean headerOnly = parser.getBoolean(headerOnlyOpt);

    final SAMFileReader reader;

    try {
        final Path p = new Path(path);

        SeekableStream idx;
        try {
            idx = WrapSeekable.openPath(getConf(), p.suffix(".bai"));
        } catch (Exception e) {
            idx = null;
        }

        final SeekableStream sam = WrapSeekable.openPath(getConf(), p);

        reader = idx == null ? new SAMFileReader(sam, false) : new SAMFileReader(sam, idx, false);
    } catch (Exception e) {
        System.err.printf("view :: Could not open '%s': %s\n", path, e.getMessage());
        return 4;
    }

    reader.setValidationStringency(ValidationStringency.SILENT);

    final SAMFileHeader header;

    try {
        header = reader.getFileHeader();
    } catch (SAMFormatException e) {
        System.err.printf("view :: Could not parse '%s': %s\n", path, e.getMessage());
        return 4;
    }

    final String fmt = (String) parser.getOptionValue(formatOpt);

    final SAMFormat format = fmt == null ? SAMFormat.SAM : SAMFormat.valueOf(fmt.toUpperCase(Locale.ENGLISH));

    final SAMFileWriterImpl writer;
    switch (format) {
    case BAM:
        writer = new BAMFileWriter(System.out, new File("<stdout>"));
        break;
    case SAM:
        writer = new SAMTextWriter(System.out);
        break;
    default:
        writer = null;
        assert false;
    }

    writer.setSortOrder(header.getSortOrder(), true);
    writer.setHeader(header);

    if (regions.isEmpty() || headerOnly) {
        if (!headerOnly)
            if (!writeIterator(writer, reader.iterator(), path))
                return 4;

        writer.close();
        return 0;
    }

    if (!reader.isBinary()) {
        System.err.println("view :: Cannot output regions from SAM file");
        return 4;
    }

    if (!reader.hasIndex()) {
        System.err.println("view :: Cannot output regions from BAM file lacking an index");
        return 4;
    }

    reader.enableIndexCaching(true);

    boolean errors = false;

    for (final String region : regions) {
        final StringTokenizer st = new StringTokenizer(region, ":-");
        final String refStr = st.nextToken();
        final int beg, end;

        if (st.hasMoreTokens()) {
            beg = parseCoordinate(st.nextToken());
            end = st.hasMoreTokens() ? parseCoordinate(st.nextToken()) : -1;

            if (beg < 0 || end < 0) {
                errors = true;
                continue;
            }
            if (end < beg) {
                System.err.printf("view :: Invalid range, cannot end before start: '%d-%d'\n", beg, end);
                errors = true;
                continue;
            }
        } else
            beg = end = 0;

        SAMSequenceRecord ref = header.getSequence(refStr);
        if (ref == null)
            try {
                ref = header.getSequence(Integer.parseInt(refStr));
            } catch (NumberFormatException e) {
            }

        if (ref == null) {
            System.err.printf("view :: Not a valid sequence name or index: '%s'\n", refStr);
            errors = true;
            continue;
        }

        final SAMRecordIterator it = reader.queryOverlapping(ref.getSequenceName(), beg, end);

        if (!writeIterator(writer, it, path))
            return 4;
    }
    writer.close();
    return errors ? 5 : 0;
}

From source file:fi.tkk.ics.hadoop.bam.SplittingBAMIndexer.java

License:Open Source License

/**
 * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link
 * org.apache.hadoop.conf.Configuration} object instead of a supplied
 * argument list//from   ww w .jav  a 2s.com
 *
 * @throws java.lang.IllegalArgumentException if the "input" property is not
 *                                            in the Configuration
 */
public static void run(final Configuration conf) throws IOException {
    final String inputString = conf.get("input");
    if (inputString == null)
        throw new IllegalArgumentException("String property \"input\" path not found in given Configuration");

    final FileSystem fs = FileSystem.get(conf);

    // Default to a granularity level of 4096. This is generally sufficient
    // for very large BAM files, relative to a maximum heap size in the
    // gigabyte range.
    final SplittingBAMIndexer indexer = new SplittingBAMIndexer(conf.getInt("granularity", 4096));

    final Path input = new Path(inputString);

    indexer.index(fs.open(input), fs.create(input.suffix(OUTPUT_FILE_EXTENSION)),
            fs.getFileStatus(input).getLen());
}

From source file:fi.tkk.ics.hadoop.bam.util.BGZFSplitFileInputFormat.java

License:Open Source License

private Path getIdxPath(Path path) {
    return path.suffix(".bgzfi");
}