Example usage for org.apache.hadoop.fs Path equals

List of usage examples for org.apache.hadoop.fs Path equals

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path equals.

Prototype

@Override
    public boolean equals(Object o) 

Source Link

Usage

From source file:org.apache.oozie.action.hadoop.LauncherMapperHelper.java

License:Apache License

/**
 * Utility function to load the contents of action data sequence file into
 * memory object/*ww w.  j  a  va2 s.  c  om*/
 *
 * @param fs Action Filesystem
 * @param actionDir Path
 * @param conf Configuration
 * @return Map action data
 * @throws IOException
 * @throws InterruptedException
 */
public static Map<String, String> getActionData(final FileSystem fs, final Path actionDir,
        final Configuration conf) throws IOException, InterruptedException {
    UserGroupInformationService ugiService = Services.get().get(UserGroupInformationService.class);
    UserGroupInformation ugi = ugiService.getProxyUser(conf.get(OozieClient.USER_NAME));

    return ugi.doAs(new PrivilegedExceptionAction<Map<String, String>>() {
        @Override
        public Map<String, String> run() throws IOException {
            Map<String, String> ret = new HashMap<String, String>();
            Path seqFilePath = getActionDataSequenceFilePath(actionDir);
            if (fs.exists(seqFilePath)) {
                SequenceFile.Reader seqFile = new SequenceFile.Reader(fs, seqFilePath, conf);
                Text key = new Text(), value = new Text();
                while (seqFile.next(key, value)) {
                    ret.put(key.toString(), value.toString());
                }
                seqFile.close();
            } else { // maintain backward-compatibility. to be deprecated
                org.apache.hadoop.fs.FileStatus[] files = fs.listStatus(actionDir);
                InputStream is;
                BufferedReader reader = null;
                Properties props;
                if (files != null && files.length > 0) {
                    for (int x = 0; x < files.length; x++) {
                        Path file = files[x].getPath();
                        if (file.equals(new Path(actionDir, "externalChildIds.properties"))) {
                            is = fs.open(file);
                            reader = new BufferedReader(new InputStreamReader(is));
                            ret.put(LauncherMapper.ACTION_DATA_EXTERNAL_CHILD_IDS,
                                    IOUtils.getReaderAsString(reader, -1));
                        } else if (file.equals(new Path(actionDir, "newId.properties"))) {
                            is = fs.open(file);
                            reader = new BufferedReader(new InputStreamReader(is));
                            props = PropertiesUtils.readProperties(reader, -1);
                            ret.put(LauncherMapper.ACTION_DATA_NEW_ID, props.getProperty("id"));
                        } else if (file.equals(new Path(actionDir, LauncherMapper.ACTION_DATA_OUTPUT_PROPS))) {
                            int maxOutputData = conf.getInt(LauncherMapper.CONF_OOZIE_ACTION_MAX_OUTPUT_DATA,
                                    2 * 1024);
                            is = fs.open(file);
                            reader = new BufferedReader(new InputStreamReader(is));
                            ret.put(LauncherMapper.ACTION_DATA_OUTPUT_PROPS, PropertiesUtils
                                    .propertiesToString(PropertiesUtils.readProperties(reader, maxOutputData)));
                        } else if (file.equals(new Path(actionDir, LauncherMapper.ACTION_DATA_STATS))) {
                            int statsMaxOutputData = conf.getInt(
                                    LauncherMapper.CONF_OOZIE_EXTERNAL_STATS_MAX_SIZE, Integer.MAX_VALUE);
                            is = fs.open(file);
                            reader = new BufferedReader(new InputStreamReader(is));
                            ret.put(LauncherMapper.ACTION_DATA_STATS, PropertiesUtils.propertiesToString(
                                    PropertiesUtils.readProperties(reader, statsMaxOutputData)));
                        } else if (file.equals(new Path(actionDir, LauncherMapper.ACTION_DATA_ERROR_PROPS))) {
                            is = fs.open(file);
                            reader = new BufferedReader(new InputStreamReader(is));
                            ret.put(LauncherMapper.ACTION_DATA_ERROR_PROPS,
                                    IOUtils.getReaderAsString(reader, -1));
                        }
                    }
                }
            }
            return ret;
        }
    });
}

From source file:org.apache.oozie.action.hadoop.TestJavaActionExecutor.java

License:Apache License

public void testAddToCache() throws Exception {
    JavaActionExecutor ae = new JavaActionExecutor();
    Configuration conf = new XConfiguration();

    Path appPath = new Path(getFsTestCaseDir(), "wf");
    URI appUri = appPath.toUri();

    // test archive without fragment
    Path archivePath = new Path("test.jar");
    Path archiveFullPath = new Path(appPath, archivePath);
    ae.addToCache(conf, appPath, archiveFullPath.toString(), true);
    assertTrue(conf.get("mapred.cache.archives").contains(archiveFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test archive with fragment
    Path archiveFragmentPath = new Path("test.jar#a.jar");
    Path archiveFragmentFullPath = new Path(appPath, archiveFragmentPath);
    conf.clear();/* w ww  . jav a  2  s  . co m*/
    ae.addToCache(conf, appPath, archiveFragmentFullPath.toString(), true);
    assertTrue(conf.get("mapred.cache.archives").contains(archiveFragmentFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test .so without fragment
    Path appSoPath = new Path("lib/a.so");
    Path appSoFullPath = new Path(appPath, appSoPath);
    conf.clear();
    ae.addToCache(conf, appPath, appSoFullPath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(appSoFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test .so with fragment
    Path appSoFragmentPath = new Path("lib/a.so#a.so");
    Path appSoFragmentFullPath = new Path(appPath, appSoFragmentPath);
    conf.clear();
    ae.addToCache(conf, appPath, appSoFragmentFullPath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(appSoFragmentFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test .jar without fragment where app path is on same cluster as jar path
    Path appJarPath = new Path("lib/a.jar");
    Path appJarFullPath = new Path(appPath, appJarPath);
    conf = new Configuration();
    conf.set(WorkflowAppService.HADOOP_USER, getTestUser());
    ae.addToCache(conf, appPath, appJarFullPath.toString(), false);
    // assert that mapred.cache.files contains jar URI path (full on Hadoop-2)
    Path jarPath = HadoopShims.isYARN() ? new Path(appJarFullPath.toUri())
            : new Path(appJarFullPath.toUri().getPath());
    assertTrue(conf.get("mapred.cache.files").contains(jarPath.toString()));
    // assert that dist cache classpath contains jar URI path
    Path[] paths = DistributedCache.getFileClassPaths(conf);
    boolean pathFound = false;
    for (Path path : paths) {
        if (path.equals(jarPath)) {
            pathFound = true;
            break;
        }
    }
    assertTrue(pathFound);
    assertTrue(DistributedCache.getSymlink(conf));

    // test .jar without fragment where app path is on a different cluster than jar path
    appJarPath = new Path("lib/a.jar");
    appJarFullPath = new Path(appPath, appJarPath);
    Path appDifferentClusterPath = new Path(new URI(appUri.getScheme(), null, appUri.getHost() + "x",
            appUri.getPort(), appUri.getPath(), appUri.getQuery(), appUri.getFragment()));
    conf.clear();
    conf.set(WorkflowAppService.HADOOP_USER, getTestUser());
    ae.addToCache(conf, appDifferentClusterPath, appJarFullPath.toString(), false);
    // assert that mapred.cache.files contains absolute jar URI
    assertTrue(conf.get("mapred.cache.files").contains(appJarFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test .jar with fragment
    Path appJarFragmentPath = new Path("lib/a.jar#a.jar");
    Path appJarFragmentFullPath = new Path(appPath, appJarFragmentPath);
    conf.clear();
    conf.set(WorkflowAppService.HADOOP_USER, getTestUser());
    ae.addToCache(conf, appPath, appJarFragmentFullPath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(appJarFragmentFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test regular file without fragment
    Path appFilePath = new Path("lib/a.txt");
    Path appFileFullPath = new Path(appPath, appFilePath);
    conf.clear();
    ae.addToCache(conf, appPath, appFileFullPath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(appFileFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test regular file with fragment
    Path appFileFragmentPath = new Path("lib/a.txt#a.txt");
    Path appFileFragmentFullPath = new Path(appPath, appFileFragmentPath);
    conf.clear();
    ae.addToCache(conf, appPath, appFileFragmentFullPath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(appFileFragmentFullPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test path starting with "/" for archive
    Path testPath = new Path("/tmp/testpath/a.jar#a.jar");
    conf.clear();
    ae.addToCache(conf, appPath, testPath.toString(), true);
    assertTrue(conf.get("mapred.cache.archives").contains(testPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test path starting with "/" for cache.file
    conf.clear();
    ae.addToCache(conf, appPath, testPath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(testPath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test absolute path for archive
    Path testAbsolutePath = new Path("hftp://namenode.test.com:8020/tmp/testpath/a.jar#a.jar");
    conf.clear();
    ae.addToCache(conf, appPath, testAbsolutePath.toString(), true);
    assertTrue(conf.get("mapred.cache.archives").contains(testAbsolutePath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test absolute path for cache files
    conf.clear();
    ae.addToCache(conf, appPath, testAbsolutePath.toString(), false);
    assertTrue(conf.get("mapred.cache.files").contains(testAbsolutePath.toString()));
    assertTrue(DistributedCache.getSymlink(conf));

    // test relative path for archive
    conf.clear();
    ae.addToCache(conf, appPath, "lib/a.jar#a.jar", true);
    assertTrue(conf.get("mapred.cache.archives").contains(appUri.getPath() + "/lib/a.jar#a.jar"));
    assertTrue(DistributedCache.getSymlink(conf));

    // test relative path for cache files
    conf.clear();
    ae.addToCache(conf, appPath, "lib/a.jar#a.jar", false);
    assertTrue(conf.get("mapred.cache.files").contains(appUri.getPath() + "/lib/a.jar#a.jar"));
    assertTrue(DistributedCache.getSymlink(conf));
}

From source file:org.apache.reef.runtime.yarn.driver.restart.DFSEvaluatorLogOverwriteReaderWriter.java

License:Apache License

/**
 * Gets the alternative path. Returns one of changeLogPath and changeLogAltPath.
 *///from   www .  java 2  s  .  c om
private synchronized Path getAlternativePath(final Path path) {
    if (path.equals(changeLogPath)) {
        return changeLogAltPath;
    }

    return changeLogPath;
}

From source file:org.apache.spark.network.yarn.YarnShuffleService.java

License:Apache License

/**
 * Figure out the recovery path and handle moving the DB if YARN NM recovery gets enabled
 * and DB exists in the local dir of NM by old version of shuffle service.
 *//*from   ww w. j  a  va 2  s. c o  m*/
protected File initRecoveryDb(String dbName) {
    Preconditions.checkNotNull(_recoveryPath, "recovery path should not be null if NM recovery is enabled");

    File recoveryFile = new File(_recoveryPath.toUri().getPath(), dbName);
    if (recoveryFile.exists()) {
        return recoveryFile;
    }

    // db doesn't exist in recovery path go check local dirs for it
    String[] localDirs = _conf.getTrimmedStrings("yarn.nodemanager.local-dirs");
    for (String dir : localDirs) {
        File f = new File(new Path(dir).toUri().getPath(), dbName);
        if (f.exists()) {
            // If the recovery path is set then either NM recovery is enabled or another recovery
            // DB has been initialized. If NM recovery is enabled and had set the recovery path
            // make sure to move all DBs to the recovery path from the old NM local dirs.
            // If another DB was initialized first just make sure all the DBs are in the same
            // location.
            Path newLoc = new Path(_recoveryPath, dbName);
            Path copyFrom = new Path(f.toURI());
            if (!newLoc.equals(copyFrom)) {
                logger.info("Moving " + copyFrom + " to: " + newLoc);
                try {
                    // The move here needs to handle moving non-empty directories across NFS mounts
                    FileSystem fs = FileSystem.getLocal(_conf);
                    fs.rename(copyFrom, newLoc);
                } catch (Exception e) {
                    // Fail to move recovery file to new path, just continue on with new DB location
                    logger.error("Failed to move recovery file {} to the path {}", dbName,
                            _recoveryPath.toString(), e);
                }
            }
            return new File(newLoc.toUri().getPath());
        }
    }

    return new File(_recoveryPath.toUri().getPath(), dbName);
}

From source file:org.apache.sysml.runtime.matrix.mapred.MRJobConfiguration.java

License:Apache License

/**
 * Specific method because we need to set the input converter class according to the 
 * input infos. Note that any mapper instruction before reblock can work on binary block
 * if it can work on binary cell as well.
 * /*from ww w .ja v a 2  s. c o  m*/
 * @param job job configuration
 * @param inputIndexes array of byte indexes
 * @param inputs array of input string
 * @param inputInfos array of input infos
 * @param brlens array of block row lengths
 * @param bclens array of block column lengths
 * @throws Exception if Exception occurs
 */
public static void setUpMultipleInputsReblock(JobConf job, byte[] inputIndexes, String[] inputs,
        InputInfo[] inputInfos, int[] brlens, int[] bclens) throws Exception {
    if (inputs.length != inputInfos.length)
        throw new Exception("number of inputs and inputInfos does not match");

    //set up names of the input matrices and their inputformat information
    job.setStrings(INPUT_MATRICIES_DIRS_CONFIG, inputs);
    MRJobConfiguration.setMapFunctionInputMatrixIndexes(job, inputIndexes);

    for (int i = 0; i < inputs.length; i++) {
        ConvertTarget target = ConvertTarget.CELL;
        if (inputInfos[i] == InputInfo.BinaryBlockInputInfo)
            target = ConvertTarget.BLOCK;
        setInputInfo(job, inputIndexes[i], inputInfos[i], brlens[i], bclens[i], target);
    }

    //remove redundant input files
    ArrayList<Path> paths = new ArrayList<>();
    for (int i = 0; i < inputs.length; i++) {
        String name = inputs[i];
        Path p = new Path(name);
        boolean redundant = false;
        for (Path ep : paths)
            if (ep.equals(p)) {
                redundant = true;
                break;
            }
        if (redundant)
            continue;
        MultipleInputs.addInputPath(job, p, inputInfos[i].inputFormatClass);
        paths.add(p);
    }
}

From source file:org.apache.tajo.storage.s3.InMemoryFileSystemStore.java

License:Apache License

@Override
public Set<Path> listSubPaths(Path path) throws IOException {
    Path normalizedPath = normalize(path);
    // This is inefficient but more than adequate for testing purposes.
    Set<Path> subPaths = new LinkedHashSet<Path>();
    for (Path p : inodes.tailMap(normalizedPath).keySet()) {
        if (normalizedPath.equals(p.getParent())) {
            subPaths.add(p);/*  w  ww  .  ja  va 2s .co  m*/
        }
    }
    return subPaths;
}

From source file:org.apache.tajo.storage.s3.S3TableSpace.java

License:Apache License

/**
 * Calculate the total size of all objects in the indicated bucket
 *
 * @param path to use// w w  w  . j  av  a 2 s .  c  o  m
 * @return calculated size
 * @throws IOException
 */
@Override
public long calculateSize(Path path) throws IOException {
    long totalBucketSize = 0L;

    if (s3Enabled) {
        String key = pathToKey(path);

        final FileStatus fileStatus = fs.getFileStatus(path);

        if (fileStatus.isDirectory()) {
            if (!key.isEmpty()) {
                key = key + "/";
            }

            ListObjectsRequest request = new ListObjectsRequest();
            request.setBucketName(uri.getHost());
            request.setPrefix(key);
            request.setMaxKeys(maxKeys);

            if (LOG.isDebugEnabled()) {
                LOG.debug("listStatus: doing listObjects for directory " + key);
            }

            ObjectListing objects = s3.listObjects(request);

            while (true) {
                for (S3ObjectSummary summary : objects.getObjectSummaries()) {
                    Path keyPath = keyToPath(summary.getKey()).makeQualified(uri, fs.getWorkingDirectory());

                    // Skip over keys that are ourselves and old S3N _$folder$ files
                    if (keyPath.equals(path) || summary.getKey().endsWith(S3N_FOLDER_SUFFIX)) {
                        if (LOG.isDebugEnabled()) {
                            LOG.debug("Ignoring: " + keyPath);
                        }
                        continue;
                    }

                    if (!objectRepresentsDirectory(summary.getKey(), summary.getSize())) {
                        totalBucketSize += summary.getSize();
                    }
                }

                if (objects.isTruncated()) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("listStatus: list truncated - getting next batch");
                    }
                    objects = s3.listNextBatchOfObjects(objects);
                } else {
                    break;
                }
            }
        } else {
            return fileStatus.getLen();
        }
    } else {
        totalBucketSize = fs.getContentSummary(path).getLength();
    }

    return totalBucketSize;
}

From source file:org.bgi.flexlab.gaea.data.mapreduce.input.bam.GaeaBamInputFormat.java

License:Open Source License

private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
        throws IOException {
    Path file = ((FileSplit) splits.get(i)).getPath();

    SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file)));

    int splitsEnd = splits.size();
    for (int j = i; j < splitsEnd; j++) {
        if (!file.equals(((FileSplit) splits.get(j)).getPath()))
            splitsEnd = j;//from  w w w . j  a v a 2  s. c  o  m
    }
    for (int j = i; j < splitsEnd; j++) {
        FileSplit fileSplit = (FileSplit) splits.get(j);

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        Long blockStart = idx.nextAlignment(start);

        Long blockEnd = Long.valueOf(j == splitsEnd - 1 ? idx.prevAlignment(end).longValue() | 0xFFFF
                : idx.nextAlignment(end).longValue());

        if (blockStart == null) {
            throw new RuntimeException("Internal error or invalid index: no block start for " + start);
        }
        if (blockEnd == null) {
            throw new RuntimeException("Internal error or invalid index: no block end for " + end);
        }
        newSplits.add(new FileVirtualSplit(file, blockStart.longValue(), blockEnd.longValue(),
                fileSplit.getLocations()));
    }
    return splitsEnd;
}

From source file:org.broadinstitute.sting.gatk.hadoop.BAMInputFormat.java

License:Open Source License

private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
        throws IOException {
    final Path file = ((FileSplit) splits.get(i)).getPath();

    final SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file)));

    int splitsEnd = splits.size();
    for (int j = i; j < splitsEnd; ++j)
        if (!file.equals(((FileSplit) splits.get(j)).getPath()))
            splitsEnd = j;/*from  w  ww .  java2 s . c  o  m*/

    for (int j = i; j < splitsEnd; ++j) {
        final FileSplit fileSplit = (FileSplit) splits.get(j);

        final long start = fileSplit.getStart();
        final long end = start + fileSplit.getLength();

        final Long blockStart = idx.nextAlignment(start);

        // The last split needs to end where the last alignment ends, but
        // the index doesn't store that data (whoops); we only know where the
        // last alignment begins. Fortunately there's no need to change the index
        // format for this: we can just set the end to the maximal length of
        // the final BGZF block (0xffff), and then read until BAMRecordCodec
        // hits EOF.
        final Long blockEnd = j == splitsEnd - 1 ? idx.prevAlignment(end) | 0xffff : idx.nextAlignment(end);

        if (blockStart == null)
            throw new RuntimeException("Internal error or invalid index: no block start for " + start);

        if (blockEnd == null)
            throw new RuntimeException("Internal error or invalid index: no block end for " + end);

        newSplits.add(new FileVirtualSplit(file, blockStart, blockEnd, fileSplit.getLocations()));
    }
    return splitsEnd;
}

From source file:org.broadinstitute.sting.gatk.hadoop.LociInputFormat.java

License:Open Source License

private int addIndexedSplits(List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
        throws IOException {
    final Path file = ((FileSplit) splits.get(i)).getPath();

    final SplittingBAMIndex idx = new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file)));

    int splitsEnd = splits.size();
    for (int j = i; j < splitsEnd; ++j)
        if (!file.equals(((FileSplit) splits.get(j)).getPath()))
            splitsEnd = j;//from www . j a v a 2s.  c o m

    for (int j = i; j < splitsEnd; ++j) {
        final FileSplit fileSplit = (FileSplit) splits.get(j);

        final long start = fileSplit.getStart();
        final long end = start + fileSplit.getLength();

        final Long blockStart = idx.nextAlignment(start);

        // The last split needs to end where the last alignment ends, but
        // the index doesn't store that data (whoops); we only know where the
        // last alignment begins. Fortunately there's no need to change the index
        // format for this: we can just set the end to the maximal length of
        // the final BGZF block (0xffff), and then read until BAMRecordCodec
        // hits EOF.
        Long blockEnd =
                // j == splitsEnd-1 ? idx.prevAlignment(end) | 0xffff
                j == splitsEnd - 1 ? idx.prevAlignment(end) : idx.nextAlignment(end);

        if (blockStart == null)
            throw new RuntimeException("Internal error or invalid index: no block start for " + start);

        if (blockEnd == null)
            throw new RuntimeException("Internal error or invalid index: no block end for " + end);
        newSplits.add(new FileVirtualSplit(file, blockStart, blockEnd, fileSplit.getLocations()));
    }
    return splitsEnd;
}