List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>//w w w . j av a2 s . c o m * Reads binary cells from a file. A matrix characteristic is created which * contains the characteristics of the matrix read from the file and the * values. * </p> * * @param directory * directory containing the matrix * @return matrix characteristics */ @SuppressWarnings("deprecation") public static BinaryMatrixCharacteristics readCellsFromSequenceFile(String directory) { try { FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.listStatus(new Path(directory)); HashMap<MatrixIndexes, Double> valueMap = new HashMap<MatrixIndexes, Double>(); int rows = 0; int cols = 0; MatrixIndexes indexes = new MatrixIndexes(); MatrixCell value = new MatrixCell(); for (FileStatus file : files) { SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), file.getPath(), conf); while (reader.next(indexes, value)) { if (rows < indexes.getRowIndex()) rows = (int) indexes.getRowIndex(); if (cols < indexes.getColumnIndex()) cols = (int) indexes.getColumnIndex(); valueMap.put(new MatrixIndexes(indexes), value.getValue()); } reader.close(); } double[][] values = new double[rows][cols]; long nonZeros = 0; for (MatrixIndexes index : valueMap.keySet()) { values[(int) index.getRowIndex() - 1][(int) index.getColumnIndex() - 1] = valueMap.get(index); if (valueMap.get(index) != 0) nonZeros++; } return new BinaryMatrixCharacteristics(values, rows, cols, 0, 0, 0, 0, nonZeros); } catch (IOException e) { e.printStackTrace(); fail("unable to read sequence file in " + directory); } return null; }
From source file:com.ibm.bi.dml.test.utils.TestUtils.java
License:Open Source License
/** * <p>/*www . j ava2 s. c o m*/ * Reads binary blocks from a file. A matrix characteristic is created which * contains the characteristics of the matrix read from the file and the * values. * </p> * * @param directory * directory containing the matrix * @param rowsInBlock * rows in block * @param colsInBlock * columns in block * @return matrix characteristics */ @SuppressWarnings("deprecation") public static BinaryMatrixCharacteristics readBlocksFromSequenceFile(String directory, int rowsInBlock, int colsInBlock) { try { FileSystem fs = FileSystem.get(conf); FileStatus[] files = fs.listStatus(new Path(directory)); HashMap<MatrixIndexes, Double> valueMap = new HashMap<MatrixIndexes, Double>(); int rowsInLastBlock = -1; int colsInLastBlock = -1; int rows = 0; int cols = 0; MatrixIndexes indexes = new MatrixIndexes(); MatrixBlock value = new MatrixBlock(); for (FileStatus file : files) { SequenceFile.Reader reader = new SequenceFile.Reader(FileSystem.get(conf), file.getPath(), conf); while (reader.next(indexes, value)) { if (value.getNumRows() < rowsInBlock) { if (rowsInLastBlock == -1) rowsInLastBlock = value.getNumRows(); else if (rowsInLastBlock != value.getNumRows()) fail("invalid block sizes"); rows = (int) ((indexes.getRowIndex() - 1) * rowsInBlock + value.getNumRows()); } else if (value.getNumRows() == rowsInBlock) { if (rows <= (indexes.getRowIndex() * rowsInBlock + value.getNumRows())) { if (rowsInLastBlock == -1) rows = (int) ((indexes.getRowIndex() - 1) * rowsInBlock + value.getNumRows()); else fail("invalid block sizes"); } } else { fail("invalid block sizes"); } if (value.getNumColumns() < colsInBlock) { if (colsInLastBlock == -1) colsInLastBlock = value.getNumColumns(); else if (colsInLastBlock != value.getNumColumns()) fail("invalid block sizes"); cols = (int) ((indexes.getColumnIndex() - 1) * colsInBlock + value.getNumColumns()); } else if (value.getNumColumns() == colsInBlock) { if (cols <= (indexes.getColumnIndex() * colsInBlock + value.getNumColumns())) { if (colsInLastBlock == -1) cols = (int) ((indexes.getColumnIndex() - 1) * colsInBlock + value.getNumColumns()); else fail("invalid block sizes"); } } else { fail("invalid block sizes"); } if (value.isInSparseFormat()) { SparseRowsIterator iter = value.getSparseRowsIterator(); while (iter.hasNext()) { IJV cell = iter.next(); valueMap.put(new MatrixIndexes(((indexes.getRowIndex() - 1) * rowsInBlock + cell.i), (int) ((indexes.getColumnIndex() - 1) * colsInBlock + cell.j)), cell.v); } } else { double[] valuesInBlock = value.getDenseArray(); for (int i = 0; i < value.getNumRows(); i++) { for (int j = 0; j < value.getNumColumns(); j++) { valueMap.put( new MatrixIndexes(((indexes.getRowIndex() - 1) * rowsInBlock + i), (int) ((indexes.getColumnIndex() - 1) * colsInBlock + j)), valuesInBlock[i * value.getNumColumns() + j]); } } } } reader.close(); } long nonZeros = 0; double[][] values = new double[rows][cols]; for (MatrixIndexes index : valueMap.keySet()) { values[(int) index.getRowIndex()][(int) index.getColumnIndex()] = valueMap.get(index); if (valueMap.get(index) != 0) nonZeros++; } return new BinaryMatrixCharacteristics(values, rows, cols, rowsInBlock, rowsInLastBlock, colsInBlock, colsInLastBlock, nonZeros); } catch (IOException e) { e.printStackTrace(); fail("unable to read sequence file in " + directory); } return null; }
From source file:com.ibm.crail.hdfs.CrailHadoopFileSystem.java
License:Apache License
@Override public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException { try {//from w w w . j a v a2s .c om CrailBlockLocation[] _locations = dfs.lookup(file.getPath().toUri().getRawPath()).get().asFile() .getBlockLocations(start, len); BlockLocation[] locations = new BlockLocation[_locations.length]; for (int i = 0; i < locations.length; i++) { locations[i] = new BlockLocation(); locations[i].setOffset(_locations[i].getOffset()); locations[i].setLength(_locations[i].getLength()); locations[i].setNames(_locations[i].getNames()); locations[i].setHosts(_locations[i].getHosts()); locations[i].setTopologyPaths(_locations[i].getTopology()); } return locations; } catch (Exception e) { throw new IOException(e); } }
From source file:com.ibm.jaql.io.hadoop.DefaultHadoopOutputAdapter.java
License:Apache License
@Override public JsonValue expand() throws Exception { BufferedJsonRecord jr = (BufferedJsonRecord) this.options.getCopy(null); BufferedJsonArray ja = new BufferedJsonArray(); String namenode = null;//from w ww .j a v a 2 s. c o m String dfsport = null; if (this.location != null) { Configuration conf = new Configuration(); namenode = conf.get("fs.default.name"); dfsport = conf.get("dfs.namenode.http-address"); FileSystem fs = FileSystem.get(conf); FileStatus files = fs.getFileStatus(new Path(this.location)); if (files.isDir()) { StringBuilder sb = new StringBuilder(); FileStatus[] dirContent = fs.listStatus(new Path(this.location)); for (FileStatus file : dirContent) { if (!file.isDir()) { ja.add(new JsonString(file.getPath().toString())); } } } else { ja.add(new JsonString(files.getPath().toString())); } } jr.add(Adapter.LOCATION_NAME, ja); jr.add(Adapter.TYPE_NAME, args.get(Adapter.TYPE_NAME)); jr.add(new JsonString("expanded"), JsonBool.make(true)); if (namenode != null) jr.add(new JsonString("fs.default.name"), new JsonString(namenode)); if (dfsport != null) jr.add(new JsonString("dfs.namenode.http-address"), new JsonString(dfsport)); return jr; }
From source file:com.ibm.jaql.lang.expr.system.LsFn.java
License:Apache License
@Override public JsonIterator iter(final Context context) throws Exception { JsonString glob = (JsonString) exprs[0].eval(context); // Configuration conf = context.getConfiguration(); Configuration conf = new Configuration(); // TODO: get from context, incl options //URI uri;//from www . j av a2s . c o m //FileSystem fs = FileSystem.get(uri, conf); Path inpath = new Path(glob.toString()); FileSystem fs = inpath.getFileSystem(conf); //final FileStatus[] stats = fs.listStatus(path, filter); final FileStatus[] stats = fs.globStatus(inpath); if (stats == null || stats.length == 0) { return JsonIterator.EMPTY; } final MutableJsonDate accessTime = new MutableJsonDate(); final MutableJsonDate modifyTime = new MutableJsonDate(); final MutableJsonLong length = new MutableJsonLong(); final MutableJsonLong blockSize = new MutableJsonLong(); final MutableJsonLong replication = new MutableJsonLong(); final MutableJsonString path = new MutableJsonString(); final MutableJsonString owner = new MutableJsonString(); final MutableJsonString group = new MutableJsonString(); final MutableJsonString permission = new MutableJsonString(); final JsonValue[] values = new JsonValue[] { accessTime, modifyTime, length, blockSize, replication, path, owner, group, permission }; final BufferedJsonRecord rec = new BufferedJsonRecord(); rec.set(LsField.names, values, values.length, false); return new JsonIterator(rec) { int i = 0; @Override public boolean moveNext() throws Exception { if (i >= stats.length) { return false; } FileStatus stat = stats[i++]; // fs.getUri().toString(); long x = HadoopShim.getAccessTime(stat); if (x <= 0) { values[LsField.ACCESS_TIME.ordinal()] = null; } else { accessTime.set(x); values[LsField.ACCESS_TIME.ordinal()] = accessTime; } modifyTime.set(stat.getModificationTime()); length.set(stat.getLen()); blockSize.set(stat.getBlockSize()); replication.set(stat.getReplication()); path.setCopy(stat.getPath().toString()); owner.setCopy(stat.getOwner()); group.setCopy(stat.getGroup()); permission.setCopy(stat.getPermission().toString()); return true; } }; }
From source file:com.ibm.stocator.fs.common.ObjectStoreGlobber.java
License:Open Source License
public FileStatus[] glob() throws IOException { // First we get the scheme and authority of the pattern that was passed // in.//from w w w .j a v a2s. c o m LOG.debug("Welcome to glob : " + pathPattern.toString()); String scheme = schemeFromPath(pathPattern); String authority = authorityFromPath(pathPattern); // Next we strip off everything except the pathname itself, and expand all // globs. Expansion is a process which turns "grouping" clauses, // expressed as brackets, into separate path patterns. String pathPatternString = pathPattern.toUri().getPath(); List<String> flattenedPatterns = ObjectStoreGlobExpander.expand(pathPatternString); LOG.debug("expanded : " + pathPatternString); // Now loop over all flattened patterns. In every case, we'll be trying to // match them to entries in the filesystem. ArrayList<FileStatus> results = new ArrayList<FileStatus>(flattenedPatterns.size()); boolean sawWildcard = false; for (String flatPattern : flattenedPatterns) { LOG.debug("pattern from list: " + flatPattern); Path absPattern = new Path(flatPattern.isEmpty() ? Path.CUR_DIR : flatPattern); List<String> components = getPathComponents(absPattern.toUri().getPath()); ArrayList<FileStatus> candidates = new ArrayList<FileStatus>(1); FileStatus rootPlaceholder = new FileStatus(0, true, 0, 0, 0, new Path(scheme, authority, Path.SEPARATOR)); LOG.debug("Going to add candidate: " + rootPlaceholder.getPath().toString()); candidates.add(rootPlaceholder); String cmpCombined = ""; ObjectStoreGlobFilter globFilter = null; for (int componentIdx = 0; componentIdx < components.size() && !sawWildcard; componentIdx++) { globFilter = new ObjectStoreGlobFilter(components.get(componentIdx)); if (globFilter.hasPattern()) { sawWildcard = true; } else { cmpCombined = cmpCombined + "/" + components.get(componentIdx); } } String component = unescapePathComponent(cmpCombined); if (component != null && component.length() > 0) { for (FileStatus candidate : candidates) { candidate.setPath(new Path(candidate.getPath(), component)); } } else { globFilter = new ObjectStoreGlobFilter(components.get(0)); } ArrayList<FileStatus> newCandidates = new ArrayList<FileStatus>(candidates.size()); for (FileStatus candidate : candidates) { if (globFilter.hasPattern()) { FileStatus[] children = listStatus(candidate.getPath()); if (children.length == 1) { if (!getFileStatus(candidate.getPath()).isDirectory()) { continue; } } for (FileStatus child : children) { if (globFilter.accept(child.getPath())) { newCandidates.add(child); } } } else { FileStatus childStatus = null; childStatus = getFileStatus(new Path(candidate.getPath(), component)); if (childStatus != null) { newCandidates.add(childStatus); } } } candidates = newCandidates; for (FileStatus status : candidates) { if (status == rootPlaceholder) { status = getFileStatus(rootPlaceholder.getPath()); if (status == null) { continue; } } if (filter.accept(status.getPath())) { results.add(status); } } } if (!sawWildcard && results.isEmpty() && (flattenedPatterns.size() <= 1)) { return null; } return results.toArray(new FileStatus[0]); }
From source file:com.ibm.stocator.fs.cos.COSAPIClient.java
License:Apache License
/** * {@inheritDoc}//ww w . j ava 2 s . c o m * * Prefix based * Return everything that starts with the prefix * Fill listing * Return all objects, even zero size * If fileStatus is null means the path is part of some name, neither object * or pseudo directory. Was called by Globber * * @param hostName hostName * @param path path * @param fullListing Return all objects, even zero size * @param prefixBased Return everything that starts with the prefix * @return list * @throws IOException if error */ /* public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased) throws IOException { String key = pathToKey(hostName, path); ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>(); ListObjectsRequest request = new ListObjectsRequest().withBucketName(mBucket).withPrefix(key); String curObj; if (path.toString().equals(mBucket)) { curObj = ""; } else if (path.toString().startsWith(mBucket + "/")) { curObj = path.toString().substring(mBucket.length() + 1); } else if (path.toString().startsWith(hostName)) { curObj = path.toString().substring(hostName.length()); } else { curObj = path.toString(); } ObjectListing objectList = mClient.listObjects(request); List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries(); if (objectSummaries.size() == 0) { FileStatus[] emptyRes = {}; LOG.debug("List for bucket {} is empty", mBucket); return emptyRes; } boolean objectScanContinue = true; S3ObjectSummary prevObj = null; while (objectScanContinue) { for (S3ObjectSummary obj : objectSummaries) { if (prevObj == null) { prevObj = obj; continue; } String objKey = obj.getKey(); String unifiedObjectName = extractUnifiedObjectName(objKey); if (!prefixBased && !curObj.equals("") && !path.toString().endsWith("/") && !unifiedObjectName.equals(curObj) && !unifiedObjectName.startsWith(curObj + "/")) { LOG.trace("{} does not match {}. Skipped", unifiedObjectName, curObj); continue; } if (isSparkOrigin(unifiedObjectName) && !fullListing) { LOG.trace("{} created by Spark", unifiedObjectName); if (!isJobSuccessful(unifiedObjectName)) { LOG.trace("{} created by failed Spark job. Skipped", unifiedObjectName); if (fModeAutomaticDelete) { delete(hostName, new Path(objKey), true); } continue; } else { // if we here - data created by spark and job completed // successfully // however there be might parts of failed tasks that // were not aborted // we need to make sure there are no failed attempts if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) { // found failed that was not aborted. LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey); if (prevObj.getSize() < obj.getSize()) { LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey()); prevObj = obj; } continue; } } } if (prevObj.getSize() > 0 || fullListing) { FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path); tmpResult.add(fs); } prevObj = obj; } boolean isTruncated = objectList.isTruncated(); if (isTruncated) { objectList = mClient.listNextBatchOfObjects(objectList); objectSummaries = objectList.getObjectSummaries(); } else { objectScanContinue = false; } } if (prevObj != null && (prevObj.getSize() > 0 || fullListing)) { FileStatus fs = getFileStatusObjSummaryBased(prevObj, hostName, path); tmpResult.add(fs); } if (LOG.isTraceEnabled()) { LOG.trace("COS List to return length {}", tmpResult.size()); for (FileStatus fs: tmpResult) { LOG.trace("{}", fs.getPath()); } } return tmpResult.toArray(new FileStatus[tmpResult.size()]); } */ @Override public FileStatus[] list(String hostName, Path path, boolean fullListing, boolean prefixBased, Boolean isDirectory, boolean flatListing, PathFilter filter) throws FileNotFoundException, IOException { LOG.debug("Native direct list status for {}", path); ArrayList<FileStatus> tmpResult = new ArrayList<FileStatus>(); String key = pathToKey(hostName, path); if (isDirectory != null && isDirectory.booleanValue() && !key.endsWith("/")) { key = key + "/"; LOG.debug("listNativeDirect modify key to {}", key); } Map<String, FileStatus> emptyObjects = new HashMap<String, FileStatus>(); ListObjectsRequest request = new ListObjectsRequest(); request.setBucketName(mBucket); request.setMaxKeys(5000); request.setPrefix(key); if (!flatListing) { request.setDelimiter("/"); } ObjectListing objectList = mClient.listObjects(request); List<S3ObjectSummary> objectSummaries = objectList.getObjectSummaries(); List<String> commonPrefixes = objectList.getCommonPrefixes(); boolean objectScanContinue = true; S3ObjectSummary prevObj = null; // start FTA logic boolean stocatorOrigin = isSparkOrigin(key, path.toString()); if (stocatorOrigin) { LOG.debug("Stocator origin is true for {}", key); if (!isJobSuccessful(key)) { LOG.debug("{} created by failed Spark job. Skipped", key); if (fModeAutomaticDelete) { delete(hostName, new Path(key), true); } return new FileStatus[0]; } } while (objectScanContinue) { for (S3ObjectSummary obj : objectSummaries) { if (prevObj == null) { prevObj = obj; continue; } String objKey = obj.getKey(); String unifiedObjectName = extractUnifiedObjectName(objKey); LOG.debug("list candidate {}, unified name {}", objKey, unifiedObjectName); if (stocatorOrigin && !fullListing) { LOG.trace("{} created by Spark", unifiedObjectName); // if we here - data created by spark and job completed // successfully // however there be might parts of failed tasks that // were not aborted // we need to make sure there are no failed attempts if (nameWithoutTaskID(objKey).equals(nameWithoutTaskID(prevObj.getKey()))) { // found failed that was not aborted. LOG.trace("Colisiion found between {} and {}", prevObj.getKey(), objKey); if (prevObj.getSize() < obj.getSize()) { LOG.trace("New candidate is {}. Removed {}", obj.getKey(), prevObj.getKey()); prevObj = obj; } continue; } } FileStatus fs = createFileStatus(prevObj, hostName, path); if (fs.getLen() > 0 || fullListing) { LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen()); if (filter == null) { tmpResult.add(fs); } else if (filter != null && filter.accept(fs.getPath())) { tmpResult.add(fs); } else { LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter); } } else { emptyObjects.put(fs.getPath().toString(), fs); } prevObj = obj; } boolean isTruncated = objectList.isTruncated(); if (isTruncated) { objectList = mClient.listNextBatchOfObjects(objectList); objectSummaries = objectList.getObjectSummaries(); } else { objectScanContinue = false; } } if (prevObj != null) { FileStatus fs = createFileStatus(prevObj, hostName, path); LOG.debug("Adding the last object from the list {}", fs.getPath()); if (fs.getLen() > 0 || fullListing) { LOG.debug("Native direct list. Adding {} size {}", fs.getPath(), fs.getLen()); if (filter == null) { memoryCache.putFileStatus(fs.getPath().toString(), fs); tmpResult.add(fs); } else if (filter != null && filter.accept(fs.getPath())) { memoryCache.putFileStatus(fs.getPath().toString(), fs); tmpResult.add(fs); } else { LOG.trace("{} rejected by path filter during list. Filter {}", fs.getPath(), filter); } } else if (!fs.getPath().getName().equals(HADOOP_SUCCESS)) { emptyObjects.put(fs.getPath().toString(), fs); } } // get common prefixes for (String comPrefix : commonPrefixes) { LOG.debug("Common prefix is {}", comPrefix); if (emptyObjects.containsKey(keyToQualifiedPath(hostName, comPrefix).toString()) || emptyObjects.isEmpty()) { FileStatus status = new COSFileStatus(true, false, keyToQualifiedPath(hostName, comPrefix)); LOG.debug("Match between common prefix and empty object {}. Adding to result", comPrefix); if (filter == null) { memoryCache.putFileStatus(status.getPath().toString(), status); tmpResult.add(status); } else if (filter != null && filter.accept(status.getPath())) { memoryCache.putFileStatus(status.getPath().toString(), status); tmpResult.add(status); } else { LOG.trace("Common prefix {} rejected by path filter during list. Filter {}", status.getPath(), filter); } } } return tmpResult.toArray(new FileStatus[tmpResult.size()]); }
From source file:com.ibm.stocator.fs.ObjectStoreFileSystem.java
License:Open Source License
@Override public boolean delete(Path f, boolean recursive) throws IOException { LOG.debug("delete method: {}. recursive {}", f.toString(), recursive); String objNameModified = getObjectNameRoot(f, HADOOP_TEMPORARY, true); LOG.debug("Modified object name {}", objNameModified); if (objNameModified.contains(HADOOP_TEMPORARY)) { return true; }// w w w .j a va 2s.co m Path pathToObj = new Path(objNameModified); if (f.getName().startsWith(HADOOP_ATTEMPT)) { FileStatus[] fsList = storageClient.list(hostNameScheme, pathToObj.getParent(), true); if (fsList.length > 0) { for (FileStatus fs : fsList) { if (fs.getPath().getName().endsWith(f.getName())) { storageClient.delete(hostNameScheme, fs.getPath(), recursive); } } } } else { FileStatus[] fsList = storageClient.list(hostNameScheme, pathToObj, true); if (fsList.length > 0) { for (FileStatus fs : fsList) { storageClient.delete(hostNameScheme, fs.getPath(), recursive); } } } return true; }
From source file:com.ibm.stocator.fs.swift2d.SwiftAPIClientTest.java
License:Open Source License
@Test public void getFileStatusTest() throws Exception { String objectName = "data7-1-23-a.txt/part-00002-attempt_201612062056_0000_m_000002_2"; String hostName = "swift2d://aa-bb-cc.lvm/"; String pathName = "data7-1-23-a.txt"; mStoredObject = new StoredObjectMock(mContainer, objectName); mStoredObject.uploadObject(new byte[] { 1, 2, 3 }); //test to see if correct length is returned FileStatus fs = Whitebox.invokeMethod(mSwiftAPIClient, "getFileStatus", mStoredObject, mContainer, hostName, new Path(pathName)); Assert.assertEquals("getFileStatus() shows incorrect length", 3, fs.getLen()); //test to see if correct path is returned String result = Whitebox.invokeMethod(mSwiftAPIClient, "getMergedPath", hostName, new Path(pathName), objectName);//from ww w. ja v a 2 s .c o m Assert.assertEquals("getFileStatus() shows incorrect path", new Path(result), fs.getPath()); }
From source file:com.ibm.stocator.fs.swift2d.systemtests.SwiftTestUtils.java
License:Open Source License
/** * Deletes all files in a container/*from w w w .j a v a 2s .c om*/ * @param fileSystem * @param BaseUri * @throws IOException */ public static void cleanupAllFiles(FileSystem fileSystem, String BaseUri) throws IOException { try { if (fileSystem != null) { // Clean up generated files Path rootDir = new Path(BaseUri); FileStatus[] files = fileSystem.listStatus(rootDir); for (FileStatus file : files) { fileSystem.delete(file.getPath(), false); } } } catch (Exception e) { LOG.error("Error in deleting all files."); } }