List of usage examples for org.apache.hadoop.fs FileStatus getBlockSize
public long getBlockSize()
From source file:eagle.security.hdfs.entity.FileStatusEntity.java
License:Apache License
public FileStatusEntity(FileStatus status) throws IOException { //this.path = status.getPath(); this.length = status.getLen(); this.isdir = status.isDirectory(); this.block_replication = status.getReplication(); this.blocksize = status.getBlockSize(); this.modification_time = status.getModificationTime(); this.access_time = status.getAccessTime(); this.permission = status.getPermission(); this.owner = status.getOwner(); this.group = status.getGroup(); if (status.isSymlink()) { this.symlink = status.getSymlink(); }/*from w w w .jav a 2 s . c o m*/ }
From source file:edu.ucsb.cs.hadoop.CustomFileInputFormat.java
License:Apache License
/** * Splits files returned by {@link #listStatus(JobConf)} when they're too * big./* ww w . j a v a 2 s . c o m*/ */ @SuppressWarnings("deprecation") public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); long totalSize = 0; // compute total size for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); long minSize = Math.max(job.getLong("mapred.min.split.size", 1), minSplitSize); // generate splits ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); NetworkTopology clusterMap = new NetworkTopology(); for (FileStatus file : files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(goalSize, minSize, blockSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, splitHosts)); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap); splits.add(new FileSplit(path, 0, length, splitHosts)); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } } LOG.debug("Total # of splits: " + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
From source file:edu.umn.cs.spatialHadoop.mapred.CombinedSpatialInputFormat.java
License:Apache License
public void splitFile(JobConf job, Path path, List<FileSplit> splits) throws IOException { NetworkTopology clusterMap = new NetworkTopology(); FileSystem fs = path.getFileSystem(job); FileStatus file = fs.getFileStatus(path); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if (length != 0) { long blockSize = file.getBlockSize(); long splitSize = blockSize; long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { String[] splitHosts = getSplitHosts(blkLocations, length - bytesRemaining, splitSize, clusterMap); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, splitHosts)); bytesRemaining -= splitSize; }/*from w w w . ja v a 2 s. c o m*/ if (bytesRemaining != 0) { splits.add(new FileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { String[] splitHosts = getSplitHosts(blkLocations, 0, length, clusterMap); splits.add(new FileSplit(path, 0, length, splitHosts)); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } }
From source file:edu.umn.cs.spatialHadoop.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount, boolean background) throws IOException { JobConf job = new JobConf(Plot.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setMapOutputKeyClass(Rectangle.class); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Rectangle fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); FileStatus inFileStatus = inFs.getFileStatus(inFile); CellInfo[] cellInfos;//from www . j a va 2 s . c o m GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile); if (gindex == null) { // A heap file. The map function should partition the file GridInfo gridInfo = new GridInfo(fileMbr.x1, fileMbr.y1, fileMbr.x2, fileMbr.y2); gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize()); cellInfos = gridInfo.getAllCells(); // Doesn't make sense to show any partition information in a heap file showBorders = showBlockCount = showRecordCount = false; } else { cellInfos = SpatialSite.cellsOf(inFs, inFile); } // Set cell information in the job configuration to be used by the mapper SpatialSite.setCells(job, cellInfos); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else { width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } LOG.info("Creating an image of size " + width + "x" + height); ImageOutputFormat.setFileMBR(job, fileMbr); ImageOutputFormat.setImageWidth(job, width); ImageOutputFormat.setImageHeight(job, height); job.setBoolean(ShowBorders, showBorders); job.setBoolean(ShowBlockCount, showBlockCount); job.setBoolean(ShowRecordCount, showRecordCount); job.setInt(StrokeColor, color.getRGB()); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); // Set output committer which will stitch images together after all reducers // finish job.setOutputCommitter(PlotOutputCommitter.class); job.setOutputFormat(ImageOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); } else { lastSubmittedJob = JobClient.runJob(job); } }
From source file:edu.umn.cs.spatialHadoop.operations.Sampler.java
License:Open Source License
private static int sampleWithRatio(Path[] files, final ResultCollector<? extends TextSerializable> output, OperationsParams params) throws IOException { FileSystem fs = files[0].getFileSystem(params); FileStatus inFStatus = fs.getFileStatus(files[0]); if (inFStatus.isDir() || inFStatus.getLen() / inFStatus.getBlockSize() > 1) { // Either a directory of file or a large file return sampleMapReduceWithRatio(files, output, params); } else {/*from w ww.ja v a 2s .co m*/ // A single small file, process it without MapReduce return sampleLocalWithRatio(files, output, params); } }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Lists the contents of a directory//from ww w . ja va 2 s.co m * @param request * @param response */ private void handleListFiles(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr); FileSystem fs = path.getFileSystem(commonParams); FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); Arrays.sort(fileStatuses, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { if (o1.isDirectory() && o2.isFile()) return -1; if (o1.isFile() && o2.isDirectory()) return 1; return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase()); } }); response.setContentType("application/json;charset=utf-8"); response.setStatus(HttpServletResponse.SC_OK); PrintWriter out = response.getWriter(); out.print("{\"FileStatuses\":{"); if (pathStr.endsWith("/")) { pathStr = pathStr.substring(0, pathStr.length() - 1); } out.printf("\"BaseDir\":\"%s\",", pathStr); if (path.getParent() != null) out.printf("\"ParentDir\":\"%s\",", path.getParent()); out.print("\"FileStatus\":["); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (i != 0) out.print(','); String filename = fileStatus.getPath().getName(); int idot = filename.lastIndexOf('.'); String extension = idot == -1 ? "" : filename.substring(idot + 1); out.printf( "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d," + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d," + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\"," + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}", fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(), fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(), fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0, fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase()); } out.print("]}"); // Check if there is an image or master file FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("_master") || path.getName().equals("_data.png"); } }); for (FileStatus metaFile : metaFiles) { String metaFileName = metaFile.getPath().getName(); if (metaFileName.startsWith("_master")) { out.printf(",\"MasterPath\":\"%s\"", metaFileName); String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams); if (shape != null) out.printf(",\"Shape\":\"%s\"", shape); } else if (metaFileName.equals("_data.png")) out.printf(",\"ImagePath\":\"%s\"", metaFileName); } out.print("}"); out.close(); } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter.java
License:Apache License
/** * Write the contents of input stream into staging path. * * <p>// ww w. j av a 2s . co m * WriteAt indicates the path where the contents of the input stream should be written. When this method is called, * the path writeAt.getParent() will exist already, but the path writeAt will not exist. When this method is returned, * the path writeAt must exist. Any data written to any location other than writeAt or a descendant of writeAt * will be ignored. * </p> * * @param inputStream {@link FSDataInputStream} whose contents should be written to staging path. * @param writeAt {@link Path} at which contents should be written. * @param copyableFile {@link gobblin.data.management.copy.CopyEntity} that generated this copy operation. * @throws IOException */ protected void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException { final short replication = copyableFile.getPreserve().preserve(PreserveAttributes.Option.REPLICATION) ? copyableFile.getOrigin().getReplication() : this.fs.getDefaultReplication(writeAt); final long blockSize = copyableFile.getPreserve().preserve(PreserveAttributes.Option.BLOCK_SIZE) ? copyableFile.getOrigin().getBlockSize() : this.fs.getDefaultBlockSize(writeAt); Predicate<FileStatus> fileStatusAttributesFilter = new Predicate<FileStatus>() { @Override public boolean apply(FileStatus input) { return input.getReplication() == replication && input.getBlockSize() == blockSize; } }; Optional<FileStatus> persistedFile = this.recoveryHelper.findPersistedFile(this.state, copyableFile, fileStatusAttributesFilter); if (persistedFile.isPresent()) { log.info(String.format("Recovering persisted file %s to %s.", persistedFile.get().getPath(), writeAt)); this.fs.rename(persistedFile.get().getPath(), writeAt); } else { OutputStream os = this.fs.create(writeAt, true, this.fs.getConf().getInt("io.file.buffer.size", 4096), replication, blockSize); if (encryptionConfig != null) { os = EncryptionFactory.buildStreamCryptoProvider(encryptionConfig).encodeOutputStream(os); } try { StreamThrottler<GobblinScopeTypes> throttler = this.taskBroker .getSharedResource(new StreamThrottler.Factory<GobblinScopeTypes>(), new EmptyKey()); ThrottledInputStream throttledInputStream = throttler.throttleInputStream().inputStream(inputStream) .sourceURI(copyableFile.getOrigin().getPath().toUri()) .targetURI(this.fs.makeQualified(writeAt).toUri()).build(); StreamCopier copier = new StreamCopier(throttledInputStream, os).withBufferSize(this.bufferSize); if (isInstrumentationEnabled()) { copier.withCopySpeedMeter(this.copySpeedMeter); } this.bytesWritten.addAndGet(copier.copy()); if (isInstrumentationEnabled()) { log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate()); } else { log.info("File {} copied.", copyableFile.getOrigin().getPath()); } } catch (NotConfiguredException nce) { log.warn("Broker error. Some features of stream copier may not be available.", nce); } finally { os.close(); inputStream.close(); } } }
From source file:gobblin.util.filesystem.InstrumentedFileSystemUtils.java
License:Apache License
/** * Replace the scheme of the input {@link FileStatus} if it matches the string to replace. *//*w w w . j a va 2s .c o m*/ public static FileStatus replaceScheme(FileStatus st, String replace, String replacement) { if (replace != null && replace.equals(replacement)) { return st; } try { return new FileStatus(st.getLen(), st.isDir(), st.getReplication(), st.getBlockSize(), st.getModificationTime(), st.getAccessTime(), st.getPermission(), st.getOwner(), st.getGroup(), st.isSymlink() ? st.getSymlink() : null, replaceScheme(st.getPath(), replace, replacement)); } catch (IOException ioe) { throw new RuntimeException(ioe); } }
From source file:gr.ntua.h2rdf.inputFormat.MyFileInputFormat.java
License:Open Source License
/** * Generate the list of files and make them into FileSplits. *//*from www . ja va 2 s . co m*/ public List<InputSplit> getSplits(JobContext job) throws IOException { long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize(job)); long maxSize = getMaxSplitSize(job); // generate splits List<InputSplit> splits = new ArrayList<InputSplit>(); for (FileStatus file : listStatus(job)) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job.getConfiguration()); long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(job, path)) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new MyFileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add(new MyFileSplit(path, length - bytesRemaining, bytesRemaining, blkLocations[blkLocations.length - 1].getHosts())); } } else if (length != 0) { splits.add(new MyFileSplit(path, 0, length, blkLocations[0].getHosts())); } else { //Create empty hosts array for zero length files splits.add(new MyFileSplit(path, 0, length, new String[0])); } } LOG.debug("Total # of splits: " + splits.size()); String p = job.getConfiguration().get("mapred.fairscheduler.pool"); int max = Integer.parseInt(p.substring(p.indexOf("l") + 1)); if (splits.size() <= max) job.getConfiguration().setInt("mapred.reduce.tasks", splits.size()); else job.getConfiguration().setInt("mapred.reduce.tasks", max); return splits; }
From source file:hadoop.example.hdfs.ShowFileStatusTestCase.java
License:Open Source License
@Test public void fileStatusForFile() throws IOException { Path file = new Path("/dir/file"); FileStatus stat = fs.getFileStatus(file); Assert.assertEquals("/dir/file", stat.getPath().toUri().getPath()); Assert.assertFalse(stat.isDir());//from w w w . j a va 2 s . co m Assert.assertEquals(stat.getLen(), 7L); Assert.assertEquals(stat.getReplication(), 1); Assert.assertEquals(stat.getBlockSize(), 64 * 1024 * 1024L); Assert.assertEquals(stat.getOwner(), "haint"); Assert.assertEquals(stat.getGroup(), "supergroup"); Assert.assertEquals(stat.getPermission().toString(), "rw-r--r--"); }