Example usage for org.apache.hadoop.fs Path equals

List of usage examples for org.apache.hadoop.fs Path equals

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path equals.

Prototype

@Override
    public boolean equals(Object o) 

Source Link

Usage

From source file:com.facebook.presto.hive.metastore.SemiTransactionalHiveMetastore.java

License:Apache License

private static boolean isSameOrParent(Path parent, Path child) {
    int parentDepth = parent.depth();
    int childDepth = child.depth();
    if (parentDepth > childDepth) {
        return false;
    }/*w  w w  .j  a v a 2s. com*/
    for (int i = childDepth; i > parentDepth; i--) {
        child = child.getParent();
    }
    return parent.equals(child);
}

From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystem.java

License:Open Source License

@Override
public URI getGcsPath(Path hadoopPath) {
    LOG.debug("GHFS.getGcsPath: {}", hadoopPath);

    // Convert to fully qualified absolute path; the Path object will callback to get our current
    // workingDirectory as part of fully resolving the path.
    Path resolvedPath = hadoopPath.makeQualified(this);

    // Handle root.
    if (resolvedPath.equals(getFileSystemRoot())) {
        return GoogleCloudStorageFileSystem.GCS_ROOT;
    }/*from   ww w  .j  a va 2 s .  c  o  m*/

    // Need to convert scheme to GCS scheme and possibly move bucket into authority
    String authorityString = null;
    if (!Strings.isNullOrEmpty(resolvedPath.toUri().getAuthority())) {
        authorityString = "/" + resolvedPath.toUri().getAuthority();
    } else {
        authorityString = "";
    }
    // Construct GCS path uri.
    String path = GoogleCloudStorageFileSystem.SCHEME + ":/" + authorityString + resolvedPath.toUri().getPath();
    URI gcsPath = null;
    try {
        gcsPath = new URI(path);
    } catch (URISyntaxException e) {
        String msg = String.format("Invalid path: %s", hadoopPath);
        throw new IllegalArgumentException(msg, e);
    }

    LOG.debug("GHFS.getGcsPath: {} -> {}", hadoopPath, gcsPath);
    return gcsPath;
}

From source file:com.hadoop.mapreduce.FourMcInputFormat.java

License:BSD License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Configuration conf = HadoopUtils.getConfiguration(job);

    List<InputSplit> defaultSplits = super.getSplits(job);
    List<InputSplit> result = new ArrayList<InputSplit>();

    Path prevFile = null;/*from   ww  w .  ja  v a 2  s .  c  o  m*/
    FourMcBlockIndex prevIndex = null;

    for (InputSplit genericSplit : defaultSplits) {
        // Load the index.
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FourMcBlockIndex index;
        if (file.equals(prevFile)) {
            index = prevIndex;
        } else {
            index = FourMcBlockIndex.readIndex(fs, file);
            prevFile = file;
            prevIndex = index;
        }

        if (index == null) {
            throw new IOException("BlockIndex unreadable for " + file);
        }

        if (index.isEmpty()) { // leave the default split for empty block index
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long fourMcStart = index.alignSliceStartToIndex(start, end);
        long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (fourMcStart != FourMcBlockIndex.NOT_FOUND && fourMcEnd != FourMcBlockIndex.NOT_FOUND) {
            result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations()));
            LOG.debug("Added 4mc split for " + file + "[start=" + fourMcStart + ", length="
                    + (fourMcEnd - fourMcStart) + "]");
        }

    }

    return result;
}

From source file:com.hadoop.mapreduce.FourMzInputFormat.java

License:BSD License

@Override
public List<InputSplit> getSplits(JobContext job) throws IOException {
    Configuration conf = HadoopUtils.getConfiguration(job);

    List<InputSplit> defaultSplits = super.getSplits(job);
    List<InputSplit> result = new ArrayList<InputSplit>();

    Path prevFile = null;/*from w  ww .  ja va 2  s  . c  o m*/
    FourMzBlockIndex prevIndex = null;

    for (InputSplit genericSplit : defaultSplits) {
        // Load the index.
        FileSplit fileSplit = (FileSplit) genericSplit;
        Path file = fileSplit.getPath();
        FileSystem fs = file.getFileSystem(conf);

        FourMzBlockIndex index;
        if (file.equals(prevFile)) {
            index = prevIndex;
        } else {
            index = FourMzBlockIndex.readIndex(fs, file);
            prevFile = file;
            prevIndex = index;
        }

        if (index == null) {
            throw new IOException("BlockIndex unreadable for " + file);
        }

        if (index.isEmpty()) { // leave the default split for empty block index
            result.add(fileSplit);
            continue;
        }

        long start = fileSplit.getStart();
        long end = start + fileSplit.getLength();

        long fourMcStart = index.alignSliceStartToIndex(start, end);
        long fourMcEnd = index.alignSliceEndToIndex(end, fs.getFileStatus(file).getLen());

        if (fourMcStart != FourMzBlockIndex.NOT_FOUND && fourMcEnd != FourMzBlockIndex.NOT_FOUND) {
            result.add(new FileSplit(file, fourMcStart, fourMcEnd - fourMcStart, fileSplit.getLocations()));
            LOG.debug("Added 4mz split for " + file + "[start=" + fourMcStart + ", length="
                    + (fourMcEnd - fourMcStart) + "]");
        }

    }

    return result;
}

From source file:com.hdfs.concat.clean.Clean.java

License:Apache License

public void warnOrDelete(Path p) throws IOException {
    if (conf.getBoolean(WARN_MODE, false)) {
        System.out.println("DELETE " + p);
    } else {/*from   w  w w  . j  a v a  2s.c  om*/
        if (p.equals(new Path(conf.get(TARGET_DIR)))) {

        } else {
            fs.delete(p);
        }
    }
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

/**
 * Specific method because we need to set the input converter class according to the 
 * input infos. Note that any mapper instruction before reblock can work on binary block
 * if it can work on binary cell as well.
 * //from  www . j  a v  a 2s.c o  m
 * @param job
 * @param inputIndexes
 * @param inputs
 * @param inputInfos
 * @param inBlockRepresentation
 * @param brlens
 * @param bclens
 * @param setConverter
 * @param forCMJob
 * @throws Exception
 */
public static void setUpMultipleInputsReblock(JobConf job, byte[] inputIndexes, String[] inputs,
        InputInfo[] inputInfos, int[] brlens, int[] bclens) throws Exception {
    if (inputs.length != inputInfos.length)
        throw new Exception("number of inputs and inputInfos does not match");

    //set up names of the input matrices and their inputformat information
    job.setStrings(INPUT_MATRICIES_DIRS_CONFIG, inputs);
    MRJobConfiguration.setMapFunctionInputMatrixIndexes(job, inputIndexes);

    for (int i = 0; i < inputs.length; i++) {
        ConvertTarget target = ConvertTarget.CELL;
        if (inputInfos[i] == InputInfo.BinaryBlockInputInfo)
            target = ConvertTarget.BLOCK;
        setInputInfo(job, inputIndexes[i], inputInfos[i], brlens[i], bclens[i], target);
    }

    //remove redundant input files
    ArrayList<Path> paths = new ArrayList<Path>();
    for (int i = 0; i < inputs.length; i++) {
        String name = inputs[i];
        Path p = new Path(name);
        boolean redundant = false;
        for (Path ep : paths)
            if (ep.equals(p)) {
                redundant = true;
                break;
            }
        if (redundant)
            continue;
        MultipleInputs.addInputPath(job, p, inputInfos[i].inputFormatClass);
        paths.add(p);
    }
}

From source file:com.inmobi.conduit.distcp.tools.mapred.CopyCommitter.java

License:Apache License

private void preserveFileAttributes(Configuration conf) throws IOException {
    String attrSymbols = conf.get(DistCpConstants.CONF_LABEL_PRESERVE_STATUS);
    LOG.info("About to preserve attributes: " + attrSymbols);

    EnumSet<FileAttribute> attributes = DistCpUtils.unpackAttributes(attrSymbols);

    Path sourceListing = new Path(conf.get(DistCpConstants.CONF_LABEL_LISTING_FILE_PATH));
    FileSystem clusterFS = sourceListing.getFileSystem(conf);
    SequenceFile.Reader sourceReader = new SequenceFile.Reader(clusterFS, sourceListing, conf);
    long totalLen = clusterFS.getFileStatus(sourceListing).getLen();

    Path targetRoot = new Path(conf.get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

    long preservedEntries = 0;
    try {/*from   w w  w  .j a  v a2 s.  co  m*/
        FileStatus srcFileStatus = new FileStatus();
        Text srcRelPath = new Text();

        while (sourceReader.next(srcRelPath, srcFileStatus)) {
            if (!srcFileStatus.isDir())
                continue;

            Path targetFile = new Path(targetRoot.toString() + "/" + srcRelPath);

            //Skip the root folder, preserve the status after atomic commit is complete
            //If it is changed any earlier, then atomic commit may fail
            if (targetRoot.equals(targetFile))
                continue;

            FileSystem targetFS = targetFile.getFileSystem(conf);
            DistCpUtils.preserve(targetFS, targetFile, srcFileStatus, attributes);

            HadoopCompat.progress(taskAttemptContext);
            HadoopCompat.setStatus(taskAttemptContext, "Preserving status on directory entries. ["
                    + sourceReader.getPosition() * 100 / totalLen + "%]");
        }
    } finally {
        IOUtils.closeStream(sourceReader);
    }
    LOG.info("Preserved status on " + preservedEntries + " dir entries on target");
}

From source file:com.inmobi.conduit.distcp.tools.mapred.RetriableFileCopyCommand.java

License:Apache License

private Path getTmpFile(Path target, Mapper.Context context) {
    Path targetWorkPath = new Path(
            HadoopCompat.getTaskConfiguration(context).get(DistCpConstants.CONF_LABEL_TARGET_WORK_PATH));

    Path root = target.equals(targetWorkPath) ? targetWorkPath.getParent() : targetWorkPath;
    LOG.info("Creating temp file: " + new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString()));
    return new Path(root, ".distcp.tmp." + context.getTaskAttemptID().toString());
}

From source file:com.inmobi.databus.readers.CollectorStreamReader.java

License:Apache License

public Message readLine() throws IOException, InterruptedException {
    if (closed) {
        LOG.info("Stream closed");
        return null;
    }//  w  w w  . j a  v a  2  s  . c  o  m
    Message line = readNextLine();
    while (line == null) { // reached end of file?
        LOG.info("Read " + getCurrentFile() + " with lines:" + currentLineNum);
        if (closed) {
            LOG.info("Stream closed");
            break;
        }
        Path lastFile = getLastFile();
        // rebuild file list only if local stream is available because some files
        // may move to local stream
        if (isLocalStreamAvailable || !hasNextFile()) {
            build(); // rebuild file list
        }
        if (!hasNextFile()) { //there is no next files
            // stop reading if it read till stopTime
            if (hasReadFully()) {
                LOG.info("read all files till stop date");
                break;
            }
            if (!setIterator()) {
                LOG.info("Could not find current file in the stream");
                if (isWithinStream(getCurrentFile().getName()) || !isLocalStreamAvailable) {
                    LOG.info("Staying in collector stream as earlier files still exist");
                    startFromNextHigherAndOpen(getCurrentFile().getName());
                    updateLatestMinuteAlreadyReadForCollectorReader();
                    LOG.info("Reading from the next higher file");
                } else {
                    LOG.info("Current file would have been moved to Local Stream");
                    return null;
                }
            } else {
                waitForFlushAndReOpen();
                LOG.info("Reading from the same file after reopen");
            }
        } else {
            // reopen a file only if the file is last file on the stream
            // and local stream is not available
            if (moveToNext || (lastFile != null && !(lastFile.equals(getCurrentFile())))) {
                setNextFile();
                updateLatestMinuteAlreadyReadForCollectorReader();
                LOG.info("Reading from next file: " + getCurrentFile());
            } else {
                LOG.info("Reading from same file before moving to next");
                // open the same file
                reOpen();
                moveToNext = true;
            }
        }
        line = readNextLine();
    }
    return line;
}

From source file:com.inmobi.databus.readers.DatabusStreamWaitingReader.java

License:Apache License

/**
 * @returns Zero  if checkpoint is not present for that minute or
 *                checkpoint file and current file were not same.
 *          Line number from checkpoint// w ww  . j  a  va 2s  .  c o  m
 */
@Override
protected long getLineNumberForFirstFile(FileStatus firstFile) {
    int minute = getMinuteFromFile(firstFile);
    PartitionCheckpoint partitionChkPoint = pChkpoints.get(Integer.valueOf(minute)).pck;
    if (partitionChkPoint != null) {
        Path checkPointedFileName = new Path(streamDir, partitionChkPoint.getFileName());
        // check whether current file and checkpoint file are same
        if (checkPointedFileName.equals(firstFile.getPath())) {
            return partitionChkPoint.getLineNum();
        }
    }
    return 0;
}