Example usage for org.apache.hadoop.fs Path toString

List of usage examples for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString() 

Source Link

Usage

From source file:com.ibm.stocator.fs.common.StocatorPath.java

License:Open Source License

/**
 * Get object name with data root/*from w ww  . ja  v a2s .  c  o m*/
 *
 * @param fullPath the path
 * @param addTaskIdCompositeName add task id composite
 * @param dataRoot the data root
 * @param hostNameScheme hostname
 * @return composite of data root and object name
 * @throws IOException if object name is missing
 */
public String getObjectNameRoot(Path fullPath, boolean addTaskIdCompositeName, String dataRoot,
        String hostNameScheme) throws IOException {
    if (tempFileOriginator.equals(DEFAULT_FOUTPUTCOMMITTER_V1)) {
        return dataRoot + "/" + parseHadoopFOutputCommitterV1(fullPath, addTaskIdCompositeName, hostNameScheme);
    }
    return fullPath.toString();
}

From source file:com.ibm.stocator.fs.common.StocatorPath.java

License:Open Source License

/**
 * Extract object name from path. If addTaskIdCompositeName=true then
 * schema://tone1.lvm/aa/bb/cc/one3.txt/_temporary/0/_temporary/
 * attempt_201610052038_0001_m_000007_15/part-00007 will extract get
 * aa/bb/cc/201610052038_0001_m_000007_15-one3.txt
 * otherwise object name will be aa/bb/cc/one3.txt
 *
 * @param path path to extract from/*from w  w  w .  j a v  a  2s.co m*/
 * @param addTaskIdCompositeName if true will add task-id to the object name
 * @param hostNameScheme the host name
 * @return new object name
 * @throws IOException if object name is missing
 */
private String parseHadoopFOutputCommitterV1(Path fullPath, boolean addTaskIdCompositeName,
        String hostNameScheme) throws IOException {
    String boundary = HADOOP_TEMPORARY;
    String path = fullPath.toString();
    String noPrefix = path.substring(hostNameScheme.length());
    int npIdx = noPrefix.indexOf(boundary);
    String objectName = "";
    if (npIdx >= 0) {
        if (npIdx == 0 || npIdx == 1 && noPrefix.startsWith("/")) {
            //no object name present
            //schema://tone1.lvm/_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007
            //schema://tone1.lvm_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007
            throw new IOException("Object name is missing");
        } else {
            //path matches pattern in javadoc
            objectName = noPrefix.substring(0, npIdx - 1);
            if (addTaskIdCompositeName) {
                String taskAttempt = Utils.extractTaskID(path);
                String objName = fullPath.getName();
                if (taskAttempt != null && !objName.startsWith(HADOOP_ATTEMPT)) {
                    objName = fullPath.getName() + "-" + taskAttempt;
                }
                objectName = objectName + "/" + objName;
            }
        }
        return objectName;
    }
    return noPrefix;
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

@Override
public FileStatus getFileStatus(String hostName, Path path, String msg)
        throws IOException, FileNotFoundException {
    FileStatus res = null;// w  ww .ja v a2  s.  co m
    FileStatus cached = memoryCache.getFileStatus(path.toString());
    if (cached != null) {
        return cached;
    }
    LOG.trace("getFileStatus(start) for {}, hostname: {}", path, hostName);
    /*
     * The requested path is equal to hostName. HostName is equal to
     * hostNameScheme, thus the container. Therefore we have no object to look
     * for and we return the FileStatus as a directory. Containers have to
     * lastModified.
     */
    if (path.toString().equals(hostName) || (path.toString().length() + 1 == hostName.length())) {
        LOG.trace("getFileStatus(completed) {}", path);
        res = new FileStatus(0L, true, 1, mBlockSize, 0L, path);
        memoryCache.putFileStatus(path.toString(), res);
        return res;
    }
    if (path.toString().contains(HADOOP_TEMPORARY)) {
        LOG.debug("getFileStatus on temp object {}. Return not found", path.toString());
        throw new FileNotFoundException("Not found " + path.toString());
    }
    String key = pathToKey(hostName, path);
    LOG.debug("getFileStatus: on original key {}", key);
    try {
        FileStatus fileStatus = null;
        try {
            fileStatus = getFileStatusKeyBased(key, path);
        } catch (AmazonS3Exception e) {
            if (e.getStatusCode() != 404) {
                throw new IOException(e);
            }
        }
        if (fileStatus != null) {
            LOG.trace("getFileStatus(completed) {}", path);
            memoryCache.putFileStatus(path.toString(), fileStatus);
            return fileStatus;
        }
        // means key returned not found. Trying to call get file status on key/
        // probably not needed this call
        if (!key.endsWith("/")) {
            String newKey = key + "/";
            try {
                LOG.debug("getFileStatus: original key not found. Alternative key {}", key);
                fileStatus = getFileStatusKeyBased(newKey, path);
            } catch (AmazonS3Exception e) {
                if (e.getStatusCode() != 404) {
                    throw new IOException(e);
                }
            }

            if (fileStatus != null) {
                LOG.trace("getFileStatus(completed) {}", path);
                memoryCache.putFileStatus(path.toString(), fileStatus);
                return fileStatus;
            } else {
                // if here: both key and key/ returned not found.
                // trying to see if pseudo directory of the form
                // a/b/key/d/e (a/b/key/ doesn't exists by itself)
                // perform listing on the key
                LOG.debug("getFileStatus: Modifined key {} not found. Trying to lisr", key);
                key = maybeAddTrailingSlash(key);
                ListObjectsRequest request = new ListObjectsRequest();
                request.setBucketName(mBucket);
                request.setPrefix(key);
                request.setDelimiter("/");
                request.setMaxKeys(1);

                ObjectListing objects = mClient.listObjects(request);
                if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjectSummaries().isEmpty()) {
                    LOG.trace("getFileStatus(completed) {}", path);
                    res = new FileStatus(0, true, 1, 0, 0, path);
                    memoryCache.putFileStatus(path.toString(), res);
                    return res;
                } else if (key.isEmpty()) {
                    LOG.debug("Found root directory");
                    LOG.trace("getFileStatus(completed) {}", path);
                    res = new FileStatus(0, true, 1, 0, 0, path);
                    memoryCache.putFileStatus(path.toString(), res);
                    return res;
                }
            }
        }
    } catch (AmazonS3Exception e) {
        if (e.getStatusCode() == 403) {
            throw new IOException(e);
        }
    } catch (Exception e) {
        LOG.debug("Not found {}", path.toString());
        LOG.warn(e.getMessage());
        throw new FileNotFoundException("Not found " + path.toString());
    }
    throw new FileNotFoundException("Not found " + path.toString());
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

private FileStatus getFileStatusKeyBased(String key, Path path) throws AmazonS3Exception {
    LOG.trace("internal method - get file status by key {}, path {}", key, path);
    FileStatus cachedFS = memoryCache.getFileStatus(path.toString());
    if (cachedFS != null) {
        return cachedFS;
    }/*ww w  .j  av a2 s  .  co  m*/
    ObjectMetadata meta = mClient.getObjectMetadata(mBucket, key);
    String sparkOrigin = meta.getUserMetaDataOf("data-origin");
    boolean stocatorCreated = false;
    if (sparkOrigin != null) {
        String tmp = (String) sparkOrigin;
        if (tmp.equals("stocator")) {
            stocatorCreated = true;
        }
    }
    mCachedSparkOriginated.put(key, Boolean.valueOf(stocatorCreated));
    FileStatus fs = createFileStatus(meta.getContentLength(), key, meta.getLastModified(), path);
    memoryCache.putFileStatus(path.toString(), fs);
    return fs;
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

private FileStatus createFileStatus(long contentlength, String key, Date lastModified, Path path) {
    if (objectRepresentsDirectory(key, contentlength)) {
        LOG.debug("Found exact file: fake directory {}", path.toString());
        return new FileStatus(0, true, 1, 0, 0, path);
    } else {/*  w  w  w .ja  v a2s  .  c om*/
        LOG.debug("Found exact file: normal file {}", path.toString());
        long fileModificationTime = 0L;
        if (lastModified != null) {
            fileModificationTime = lastModified.getTime();
        }
        return new FileStatus(contentlength, false, 1, mBlockSize, fileModificationTime, path);
    }
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

@Override
public boolean exists(String hostName, Path path) throws IOException, FileNotFoundException {
    LOG.trace("Object exists: {}", path);
    String objName = path.toString();
    if (path.toString().startsWith(hostName)) {
        objName = path.toString().substring(hostName.length());
    }//from ww  w.  j a v a  2s .  co  m
    if (objName.contains(HADOOP_TEMPORARY)) {
        LOG.debug("Exists on temp object {}. Return false", objName);
        return false;
    }
    try {
        if (getFileStatus(hostName, path, "exists") != null) {
            return true;
        }
    } catch (FileNotFoundException e) {
        return false;
    }
    return false;
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

@Override
public FSDataInputStream getObject(String hostName, Path path) throws IOException {
    LOG.debug("Opening '{}' for reading.", path);
    String key = pathToKey(hostName, path);
    FileStatus fileStatus = memoryCache.getFileStatus(path.toString());
    if (fileStatus == null) {
        fileStatus = getFileStatus(hostName, path, "getObject");
    }//from   w w w.  j  av a  2  s  .  co m
    if (fileStatus.isDirectory()) {
        throw new FileNotFoundException("Can't open " + path + " because it is a directory");
    }
    COSInputStream inputStream = new COSInputStream(mBucket, key, fileStatus.getLen(), mClient, readAhead,
            inputPolicy);

    return new FSDataInputStream(inputStream);
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

@Override
public boolean delete(String hostName, Path path, boolean recursive) throws IOException {
    String obj = path.toString();
    if (path.toString().startsWith(hostName)) {
        obj = path.toString().substring(hostName.length());
    }/*from   w  w  w .j av  a 2  s  .co m*/
    LOG.debug("Object name to delete {}. Path {}", obj, path.toString());
    try {
        mClient.deleteObject(new DeleteObjectRequest(mBucket, obj));
        memoryCache.removeFileStatus(path.toString());
        return true;
    } catch (AmazonServiceException e) {
        if (e.getStatusCode() != 404) {
            throw new IOException(e);
        }
    }
    LOG.warn("Delete on {} not found. Nothing to delete");
    return false;
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

/**
 * {@inheritDoc}//from w  w w . j a  va2  s . co  m
 *
 * Prefix based
 * Return everything that starts with the prefix
 * Fill listing
 * Return all objects, even zero size
 * If fileStatus is null means the path is part of some name, neither object
 * or pseudo directory. Was called by Globber
 *
 * @param hostName hostName
 * @param path path
 * @param fullListing Return all objects, even zero size
 * @param prefixBased Return everything that starts with the prefix
 * @return list
 * @throws IOException if error
 */
/*
public FileStatus[] list(String hostName, Path path, boolean fullListing,
    boolean prefixBased) throws IOException {
  String key = pathToKey(hostName, path);
  ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
  ListObjectsRequest request = new ListObjectsRequest().withBucketName(mBucket).withPrefix(key);
        
  String curObj;
  if (path.toString().equals(mBucket)) {
    curObj = "";
  } else if (path.toString().startsWith(mBucket + "/")) {
    curObj = path.toString().substring(mBucket.length() + 1);
  } else if (path.toString().startsWith(hostName)) {
    curObj = path.toString().substring(hostName.length());
  } else {
    curObj = path.toString();
  }
        
  ObjectListing objectList = mClient.listObjects(request);
  List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
  if (objectSummaries.size() == 0) {
    FileStatus[] emptyRes = {};
    LOG.debug("List for bucket {} is empty", mBucket);
    return emptyRes;
  }
  boolean objectScanContinue = true;
  S3ObjectSummary prevObj = null;
  while (objectScanContinue) {
    for (S3ObjectSummary obj : objectSummaries) {
if (prevObj == null) {
  prevObj = obj;
  continue;
}
String objKey = obj.getKey();
String unifiedObjectName = extractUnifiedObjectName(objKey);
if (!prefixBased && !curObj.equals("") && !path.toString().endsWith("/")
    && !unifiedObjectName.equals(curObj) && !unifiedObjectName.startsWith(curObj + "/")) {
  LOG.trace("{} does not match {}. Skipped", unifiedObjectName, curObj);
  continue;
}
if (isSparkOrigin(unifiedObjectName) && !fullListing) {
  LOG.trace("{} created by Spark", unifiedObjectName);
  if (!isJobSuccessful(unifiedObjectName)) {
    LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName);
    if (fModeAutomaticDelete) {
      delete(hostName, new Path(objKey), true);
    }
    continue;
  } else {
    // if we here - data created by spark and job completed
    // successfully
    // however there be might parts of failed tasks that
    // were not aborted
    // we need to make sure there are no failed attempts
    if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
      // found failed that was not aborted.
      LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
      if (prevObj.getSize() < obj.getSize()) {
        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
        prevObj = obj;
      }
      continue;
    }
  }
}
if (prevObj.getSize() > 0 || fullListing) {
  FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path);
  tmpResult.add(fs);
}
prevObj = obj;
    }
    boolean isTruncated = objectList.isTruncated();
    if (isTruncated) {
objectList = mClient.listNextBatchOfObjects(objectList);
objectSummaries = objectList.getObjectSummaries();
    } else {
objectScanContinue = false;
    }
  }
  if (prevObj != null && (prevObj.getSize() > 0 || fullListing)) {
    FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path);
    tmpResult.add(fs);
  }
  if (LOG.isTraceEnabled()) {
    LOG.trace("COS List to return length {}", tmpResult.size());
    for (FileStatus fs: tmpResult) {
LOG.trace("{}", fs.getPath());
    }
  }
  return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}
*/
@Override
public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased,
        Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException {
    LOG.debug("Native direct list status for {}", path);
    ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>();
    String key = pathToKey(hostName, path);
    if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/")) {
        key = key + "/";
        LOG.debug("listNativeDirect modify key to {}", key);
    }

    Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>();
    ListObjectsRequest request = new ListObjectsRequest();
    request.setBucketName(mBucket);
    request.setMaxKeys(5000);
    request.setPrefix(key);
    if (!flatListing) {
        request.setDelimiter("/");
    }

    ObjectListing objectList = mClient.listObjects(request);

    List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries();
    List<String> commonPrefixes = objectList.getCommonPrefixes();

    boolean objectScanContinue = true;
    S3ObjectSummary prevObj = null;
    // start FTA logic
    boolean stocatorOrigin = isSparkOrigin(key, path.toString());
    if (stocatorOrigin) {
        LOG.debug("Stocator origin is true for {}", key);
        if (!isJobSuccessful(key)) {
            LOG.debug("{} created by failed Spark job. Skipped", key);
            if (fModeAutomaticDelete) {
                delete(hostName, new Path(key), true);
            }
            return new FileStatus[0];
        }
    }
    while (objectScanContinue) {
        for (S3ObjectSummary obj : objectSummaries) {
            if (prevObj == null) {
                prevObj = obj;
                continue;
            }
            String objKey = obj.getKey();
            String unifiedObjectName = extractUnifiedObjectName(objKey);
            LOG.debug("list candidate {}, unified name {}", objKey, unifiedObjectName);
            if (stocatorOrigin && !fullListing) {
                LOG.trace("{} created by Spark", unifiedObjectName);
                // if we here - data created by spark and job completed
                // successfully
                // however there be might parts of failed tasks that
                // were not aborted
                // we need to make sure there are no failed attempts
                if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) {
                    // found failed that was not aborted.
                    LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey);
                    if (prevObj.getSize() < obj.getSize()) {
                        LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey());
                        prevObj = obj;
                    }
                    continue;
                }
            }
            FileStatus fs = createFileStatus(prevObj, hostName, path);
            if (fs.getLen() > 0 || fullListing) {
                LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
                if (filter == null) {
                    tmpResult.add(fs);
                } else if (filter != null && filter.accept(fs.getPath())) {
                    tmpResult.add(fs);
                } else {
                    LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
                }
            } else {
                emptyObjects.put(fs.getPath().toString(), fs);
            }
            prevObj = obj;
        }
        boolean isTruncated = objectList.isTruncated();
        if (isTruncated) {
            objectList = mClient.listNextBatchOfObjects(objectList);
            objectSummaries = objectList.getObjectSummaries();
        } else {
            objectScanContinue = false;
        }
    }

    if (prevObj != null) {
        FileStatus fs = createFileStatus(prevObj, hostName, path);
        LOG.debug("Adding the last object from the list {}", fs.getPath());
        if (fs.getLen() > 0 || fullListing) {
            LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen());
            if (filter == null) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else if (filter != null && filter.accept(fs.getPath())) {
                memoryCache.putFileStatus(fs.getPath().toString(), fs);
                tmpResult.add(fs);
            } else {
                LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter);
            }
        } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) {
            emptyObjects.put(fs.getPath().toString(), fs);
        }
    }

    // get common prefixes
    for (String comPrefix : commonPrefixes) {
        LOG.debug("Common prefix is {}", comPrefix);
        if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString())
                || emptyObjects.isEmpty()) {
            FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix));
            LOG.debug("Match between common prefix and empty object {}. Adding to result", comPrefix);
            if (filter == null) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else if (filter != null && filter.accept(status.getPath())) {
                memoryCache.putFileStatus(status.getPath().toString(), status);
                tmpResult.add(status);
            } else {
                LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(),
                        filter);
            }
        }
    }
    return tmpResult.toArray(new FileStatus[tmpResult.size()]);
}

From source file:com.ibm.stocator.fs.cos.COSAPIClient.java

License:Apache License

/**
 * Merge between two paths//w ww.j  av  a 2  s  .c  o m
 *
 * @param hostName
 * @param p path
 * @param objectKey
 * @return merged path
 */
private String getMergedPath(String hostName, Path p, String objectKey) {
    if ((p.getParent() != null) && (p.getName() != null) && (p.getParent().toString().equals(hostName))) {
        if (objectKey.equals(p.getName())) {
            return p.toString();
        }
        return hostName + objectKey;
    }
    return hostName + objectKey;
}