List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.ibm.stocator.fs.common.StocatorPath.java
License:Open Source License
/** * Get object name with data root/*from w ww . ja v a2s . c o m*/ * * @param fullPath the path * @param addTaskIdCompositeName add task id composite * @param dataRoot the data root * @param hostNameScheme hostname * @return composite of data root and object name * @throws IOException if object name is missing */ public String getObjectNameRoot(Path fullPath, boolean addTaskIdCompositeName, String dataRoot, String hostNameScheme) throws IOException { if (tempFileOriginator.equals(DEFAULT_FOUTPUTCOMMITTER_V1)) { return dataRoot + "/" + parseHadoopFOutputCommitterV1(fullPath, addTaskIdCompositeName, hostNameScheme); } return fullPath.toString(); }
From source file:com.ibm.stocator.fs.common.StocatorPath.java
License:Open Source License
/** * Extract object name from path. If addTaskIdCompositeName=true then * schema://tone1.lvm/aa/bb/cc/one3.txt/_temporary/0/_temporary/ * attempt_201610052038_0001_m_000007_15/part-00007 will extract get * aa/bb/cc/201610052038_0001_m_000007_15-one3.txt * otherwise object name will be aa/bb/cc/one3.txt * * @param path path to extract from/*from w w w . j a v a 2s.co m*/ * @param addTaskIdCompositeName if true will add task-id to the object name * @param hostNameScheme the host name * @return new object name * @throws IOException if object name is missing */ private String parseHadoopFOutputCommitterV1(Path fullPath, boolean addTaskIdCompositeName, String hostNameScheme) throws IOException { String boundary = HADOOP_TEMPORARY; String path = fullPath.toString(); String noPrefix = path.substring(hostNameScheme.length()); int npIdx = noPrefix.indexOf(boundary); String objectName = ""; if (npIdx >= 0) { if (npIdx == 0 || npIdx == 1 && noPrefix.startsWith("/")) { //no object name present //schema://tone1.lvm/_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007 //schema://tone1.lvm_temporary/0/_temporary/attempt_201610038_0001_m_000007_15/part-0007 throw new IOException("Object name is missing"); } else { //path matches pattern in javadoc objectName = noPrefix.substring(0, npIdx - 1); if (addTaskIdCompositeName) { String taskAttempt = Utils.extractTaskID(path); String objName = fullPath.getName(); if (taskAttempt != null && !objName.startsWith(HADOOP_ATTEMPT)) { objName = fullPath.getName() + "-" + taskAttempt; } objectName = objectName + "/" + objName; } } return objectName; } return noPrefix; }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
@Override public FileStatus getFileStatus(String hostName, Path path, String msg) throws IOException, FileNotFoundException { FileStatus res = null;// w ww .ja v a2 s. co m FileStatus cached = memoryCache.getFileStatus(path.toString()); if (cached != null) { return cached; } LOG.trace("getFileStatus(start) for {}, hostname: {}", path, hostName); /* * The requested path is equal to hostName. HostName is equal to * hostNameScheme, thus the container. Therefore we have no object to look * for and we return the FileStatus as a directory. Containers have to * lastModified. */ if (path.toString().equals(hostName) || (path.toString().length() + 1 == hostName.length())) { LOG.trace("getFileStatus(completed) {}", path); res = new FileStatus(0L, true, 1, mBlockSize, 0L, path); memoryCache.putFileStatus(path.toString(), res); return res; } if (path.toString().contains(HADOOP_TEMPORARY)) { LOG.debug("getFileStatus on temp object {}. Return not found", path.toString()); throw new FileNotFoundException("Not found " + path.toString()); } String key = pathToKey(hostName, path); LOG.debug("getFileStatus: on original key {}", key); try { FileStatus fileStatus = null; try { fileStatus = getFileStatusKeyBased(key, path); } catch (AmazonS3Exception e) { if (e.getStatusCode() != 404) { throw new IOException(e); } } if (fileStatus != null) { LOG.trace("getFileStatus(completed) {}", path); memoryCache.putFileStatus(path.toString(), fileStatus); return fileStatus; } // means key returned not found. Trying to call get file status on key/ // probably not needed this call if (!key.endsWith("/")) { String newKey = key + "/"; try { LOG.debug("getFileStatus: original key not found. Alternative key {}", key); fileStatus = getFileStatusKeyBased(newKey, path); } catch (AmazonS3Exception e) { if (e.getStatusCode() != 404) { throw new IOException(e); } } if (fileStatus != null) { LOG.trace("getFileStatus(completed) {}", path); memoryCache.putFileStatus(path.toString(), fileStatus); return fileStatus; } else { // if here: both key and key/ returned not found. // trying to see if pseudo directory of the form // a/b/key/d/e (a/b/key/ doesn't exists by itself) // perform listing on the key LOG.debug("getFileStatus: Modifined key {} not found. Trying to lisr", key); key = maybeAddTrailingSlash(key); ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(mBucket); request.setPrefix(key); request.setDelimiter("/"); request.setMaxKeys(1); ObjectListing objects = mClient.listObjects(request); if (!objects.getCommonPrefixes().isEmpty() || !objects.getObjectSummaries().isEmpty()) { LOG.trace("getFileStatus(completed) {}", path); res = new FileStatus(0, true, 1, 0, 0, path); memoryCache.putFileStatus(path.toString(), res); return res; } else if (key.isEmpty()) { LOG.debug("Found root directory"); LOG.trace("getFileStatus(completed) {}", path); res = new FileStatus(0, true, 1, 0, 0, path); memoryCache.putFileStatus(path.toString(), res); return res; } } } } catch (AmazonS3Exception e) { if (e.getStatusCode() == 403) { throw new IOException(e); } } catch (Exception e) { LOG.debug("Not found {}", path.toString()); LOG.warn(e.getMessage()); throw new FileNotFoundException("Not found " + path.toString()); } throw new FileNotFoundException("Not found " + path.toString()); }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
private FileStatus getFileStatusKeyBased(String key, Path path) throws AmazonS3Exception { LOG.trace("internal method - get file status by key {}, path {}", key, path); FileStatus cachedFS = memoryCache.getFileStatus(path.toString()); if (cachedFS != null) { return cachedFS; }/*ww w .j av a2 s . co m*/ ObjectMetadata meta = mClient.getObjectMetadata(mBucket, key); String sparkOrigin = meta.getUserMetaDataOf("data-origin"); boolean stocatorCreated = false; if (sparkOrigin != null) { String tmp = (String) sparkOrigin; if (tmp.equals("stocator")) { stocatorCreated = true; } } mCachedSparkOriginated.put(key, Boolean.valueOf(stocatorCreated)); FileStatus fs = createFileStatus(meta.getContentLength(), key, meta.getLastModified(), path); memoryCache.putFileStatus(path.toString(), fs); return fs; }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
private FileStatus createFileStatus(long contentlength, String key, Date lastModified, Path path) { if (objectRepresentsDirectory(key, contentlength)) { LOG.debug("Found exact file: fake directory {}", path.toString()); return new FileStatus(0, true, 1, 0, 0, path); } else {/* w w w .ja v a2s . c om*/ LOG.debug("Found exact file: normal file {}", path.toString()); long fileModificationTime = 0L; if (lastModified != null) { fileModificationTime = lastModified.getTime(); } return new FileStatus(contentlength, false, 1, mBlockSize, fileModificationTime, path); } }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
@Override public boolean exists(String hostName, Path path) throws IOException, FileNotFoundException { LOG.trace("Object exists: {}", path); String objName = path.toString(); if (path.toString().startsWith(hostName)) { objName = path.toString().substring(hostName.length()); }//from ww w. j a v a 2s . co m if (objName.contains(HADOOP_TEMPORARY)) { LOG.debug("Exists on temp object {}. Return false", objName); return false; } try { if (getFileStatus(hostName, path, "exists") != null) { return true; } } catch (FileNotFoundException e) { return false; } return false; }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
@Override public FSDataInputStream getObject(String hostName, Path path) throws IOException { LOG.debug("Opening '{}' for reading.", path); String key = pathToKey(hostName, path); FileStatus fileStatus = memoryCache.getFileStatus(path.toString()); if (fileStatus == null) { fileStatus = getFileStatus(hostName, path, "getObject"); }//from w w w. j av a 2 s . co m if (fileStatus.isDirectory()) { throw new FileNotFoundException("Can't open " + path + " because it is a directory"); } COSInputStream inputStream = new COSInputStream(mBucket, key, fileStatus.getLen(), mClient, readAhead, inputPolicy); return new FSDataInputStream(inputStream); }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
@Override public boolean delete(String hostName, Path path, boolean recursive) throws IOException { String obj = path.toString(); if (path.toString().startsWith(hostName)) { obj = path.toString().substring(hostName.length()); }/*from w w w .j av a 2 s .co m*/ LOG.debug("Object name to delete {}. Path {}", obj, path.toString()); try { mClient.deleteObject(new DeleteObjectRequest(mBucket, obj)); memoryCache.removeFileStatus(path.toString()); return true; } catch (AmazonServiceException e) { if (e.getStatusCode() != 404) { throw new IOException(e); } } LOG.warn("Delete on {} not found. Nothing to delete"); return false; }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
/** * {@inheritDoc}//from w w w . j a va2 s . co m * * Prefix based * Return everything that starts with the prefix * Fill listing * Return all objects, even zero size * If fileStatus is null means the path is part of some name, neither object * or pseudo directory. Was called by Globber * * @param hostName hostName * @param path path * @param fullListing Return all objects, even zero size * @param prefixBased Return everything that starts with the prefix * @return list * @throws IOException if error */ /* public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased) throws IOException { String key = pathToKey(hostName, path); ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>(); ListObjectsRequest request = new ListObjectsRequest().withBucketName(mBucket).withPrefix(key); String curObj; if (path.toString().equals(mBucket)) { curObj = ""; } else if (path.toString().startsWith(mBucket + "/")) { curObj = path.toString().substring(mBucket.length() + 1); } else if (path.toString().startsWith(hostName)) { curObj = path.toString().substring(hostName.length()); } else { curObj = path.toString(); } ObjectListing objectList = mClient.listObjects(request); List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries(); if (objectSummaries.size() == 0) { FileStatus[] emptyRes = {}; LOG.debug("List for bucket {} is empty", mBucket); return emptyRes; } boolean objectScanContinue = true; S3ObjectSummary prevObj = null; while (objectScanContinue) { for (S3ObjectSummary obj : objectSummaries) { if (prevObj == null) { prevObj = obj; continue; } String objKey = obj.getKey(); String unifiedObjectName = extractUnifiedObjectName(objKey); if (!prefixBased && !curObj.equals("") && !path.toString().endsWith("/") && !unifiedObjectName.equals(curObj) && !unifiedObjectName.startsWith(curObj + "/")) { LOG.trace("{} does not match {}. Skipped", unifiedObjectName, curObj); continue; } if (isSparkOrigin(unifiedObjectName) && !fullListing) { LOG.trace("{} created by Spark", unifiedObjectName); if (!isJobSuccessful(unifiedObjectName)) { LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName); if (fModeAutomaticDelete) { delete(hostName, new Path(objKey), true); } continue; } else { // if we here - data created by spark and job completed // successfully // however there be might parts of failed tasks that // were not aborted // we need to make sure there are no failed attempts if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) { // found failed that was not aborted. LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey); if (prevObj.getSize() < obj.getSize()) { LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey()); prevObj = obj; } continue; } } } if (prevObj.getSize() > 0 || fullListing) { FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path); tmpResult.add(fs); } prevObj = obj; } boolean isTruncated = objectList.isTruncated(); if (isTruncated) { objectList = mClient.listNextBatchOfObjects(objectList); objectSummaries = objectList.getObjectSummaries(); } else { objectScanContinue = false; } } if (prevObj != null && (prevObj.getSize() > 0 || fullListing)) { FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path); tmpResult.add(fs); } if (LOG.isTraceEnabled()) { LOG.trace("COS List to return length {}", tmpResult.size()); for (FileStatus fs: tmpResult) { LOG.trace("{}", fs.getPath()); } } return tmpResult.toArray(new FileStatus[tmpResult.size()]); } */ @Override public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException { LOG.debug("Native direct list status for {}", path); ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>(); String key = pathToKey(hostName, path); if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/")) { key = key + "/"; LOG.debug("listNativeDirect modify key to {}", key); } Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>(); ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(mBucket); request.setMaxKeys(5000); request.setPrefix(key); if (!flatListing) { request.setDelimiter("/"); } ObjectListing objectList = mClient.listObjects(request); List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries(); List<String> commonPrefixes = objectList.getCommonPrefixes(); boolean objectScanContinue = true; S3ObjectSummary prevObj = null; // start FTA logic boolean stocatorOrigin = isSparkOrigin(key, path.toString()); if (stocatorOrigin) { LOG.debug("Stocator origin is true for {}", key); if (!isJobSuccessful(key)) { LOG.debug("{} created by failed Spark job. Skipped", key); if (fModeAutomaticDelete) { delete(hostName, new Path(key), true); } return new FileStatus[0]; } } while (objectScanContinue) { for (S3ObjectSummary obj : objectSummaries) { if (prevObj == null) { prevObj = obj; continue; } String objKey = obj.getKey(); String unifiedObjectName = extractUnifiedObjectName(objKey); LOG.debug("list candidate {}, unified name {}", objKey, unifiedObjectName); if (stocatorOrigin && !fullListing) { LOG.trace("{} created by Spark", unifiedObjectName); // if we here - data created by spark and job completed // successfully // however there be might parts of failed tasks that // were not aborted // we need to make sure there are no failed attempts if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) { // found failed that was not aborted. LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey); if (prevObj.getSize() < obj.getSize()) { LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey()); prevObj = obj; } continue; } } FileStatus fs = createFileStatus(prevObj, hostName, path); if (fs.getLen() > 0 || fullListing) { LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen()); if (filter == null) { tmpResult.add(fs); } else if (filter != null && filter.accept(fs.getPath())) { tmpResult.add(fs); } else { LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter); } } else { emptyObjects.put(fs.getPath().toString(), fs); } prevObj = obj; } boolean isTruncated = objectList.isTruncated(); if (isTruncated) { objectList = mClient.listNextBatchOfObjects(objectList); objectSummaries = objectList.getObjectSummaries(); } else { objectScanContinue = false; } } if (prevObj != null) { FileStatus fs = createFileStatus(prevObj, hostName, path); LOG.debug("Adding the last object from the list {}", fs.getPath()); if (fs.getLen() > 0 || fullListing) { LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen()); if (filter == null) { memoryCache.putFileStatus(fs.getPath().toString(), fs); tmpResult.add(fs); } else if (filter != null && filter.accept(fs.getPath())) { memoryCache.putFileStatus(fs.getPath().toString(), fs); tmpResult.add(fs); } else { LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter); } } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) { emptyObjects.put(fs.getPath().toString(), fs); } } // get common prefixes for (String comPrefix : commonPrefixes) { LOG.debug("Common prefix is {}", comPrefix); if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString()) || emptyObjects.isEmpty()) { FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix)); LOG.debug("Match between common prefix and empty object {}. Adding to result", comPrefix); if (filter == null) { memoryCache.putFileStatus(status.getPath().toString(), status); tmpResult.add(status); } else if (filter != null && filter.accept(status.getPath())) { memoryCache.putFileStatus(status.getPath().toString(), status); tmpResult.add(status); } else { LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter); } } } return tmpResult.toArray(new FileStatus[tmpResult.size()]); }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
/** * Merge between two paths//w ww.j av a 2 s .c o m * * @param hostName * @param p path * @param objectKey * @return merged path */ private String getMergedPath(String hostName, Path p, String objectKey) { if ((p.getParent() != null) && (p.getName() != null) && (p.getParent().toString().equals(hostName))) { if (objectKey.equals(p.getName())) { return p.toString(); } return hostName + objectKey; } return hostName + objectKey; }