List of usage examples for org.apache.hadoop.fs FileSystem getContentSummary
public ContentSummary getContentSummary(Path f) throws IOException
From source file:com.github.seqware.queryengine.tutorial.Poster.java
License:Open Source License
private void recordSpace(String key) throws IOException { try {//from w w w . jav a 2s.c o m Configuration conf = new Configuration(); HBaseStorage.configureHBaseConfig(conf); HBaseConfiguration.addHbaseResources(conf); FileSystem fs = FileSystem.get(conf); Path homeDirectory = fs.getHomeDirectory(); Path root = homeDirectory.getParent().getParent(); Path hbase = new Path(root, "hbase"); ContentSummary contentSummary = fs.getContentSummary(hbase); long spaceConsumedinGB = convertToGB(contentSummary); keyValues.put(key + "-total-space-in-GB", Long.toString(spaceConsumedinGB)); /** * * if (spaceConsumedinGB > CUT_OFF){ return; } * */ Path featureTable = new Path(hbase, Constants.Term.NAMESPACE.getTermValue(String.class) + ".hbaseTestTable_v2.Feature." + HG_19); contentSummary = fs.getContentSummary(featureTable); spaceConsumedinGB = convertToGB(contentSummary); keyValues.put(key + "-feature-space-in-GB", Long.toString(spaceConsumedinGB)); } catch (FileNotFoundException e) { /** * throw away, this is ok the first time * */ } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.opt.OptimizerRuleBased.java
License:Open Source License
/** * Increasing the partition replication factor is beneficial if partitions are * read multiple times (e.g., in nested loops) because partitioning (done once) * gets slightly slower but there is a higher probability for local access * /* ww w .j a v a 2 s .co m*/ * NOTE: this rewrite requires 'set data partitioner' to be executed in order to * leverage the partitioning information in the plan tree. * * @param n * @throws DMLRuntimeException */ protected void rewriteSetPartitionReplicationFactor(OptNode n, HashMap<String, PDataPartitionFormat> partitionedMatrices, LocalVariableMap vars) throws DMLRuntimeException { boolean apply = false; double sizeReplicated = 0; int replication = ParForProgramBlock.WRITE_REPLICATION_FACTOR; ParForProgramBlock pfpb = (ParForProgramBlock) OptTreeConverter.getAbstractPlanMapping() .getMappedProg(n.getID())[1]; if (n.getExecType() == ExecType.MR && n.getParam(ParamType.DATA_PARTITIONER).equals(PDataPartitioner.REMOTE_MR.toString()) && n.hasNestedParallelism(false) && n.hasNestedPartitionReads(false)) { apply = true; //account for problem and cluster constraints replication = (int) Math.min(_N, _rnk); //account for internal max constraint (note hadoop will warn if max > 10) replication = (int) Math.min(replication, MAX_REPLICATION_FACTOR_EXPORT); //account for remaining hdfs capacity try { FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); long hdfsCapacityRemain = fs.getStatus().getRemaining(); long sizeInputs = 0; //sum of all input sizes (w/o replication) for (String var : partitionedMatrices.keySet()) { MatrixObject mo = (MatrixObject) vars.get(var); Path fname = new Path(mo.getFileName()); if (fs.exists(fname)) //non-existing (e.g., CP) -> small file sizeInputs += fs.getContentSummary(fname).getLength(); } replication = (int) Math.min(replication, Math.floor(0.9 * hdfsCapacityRemain / sizeInputs)); //ensure at least replication 1 replication = Math.max(replication, ParForProgramBlock.WRITE_REPLICATION_FACTOR); sizeReplicated = replication * sizeInputs; } catch (Exception ex) { throw new DMLRuntimeException("Failed to analyze remaining hdfs capacity.", ex); } } //modify the runtime plan if (apply) pfpb.setPartitionReplicationFactor(replication); _numEvaluatedPlans++; LOG.debug(getOptMode() + " OPT: rewrite 'set partition replication factor' - result=" + apply + ((apply) ? " (" + replication + ", " + toMB(sizeReplicated) + ")" : "")); }
From source file:com.ibm.bi.dml.runtime.util.MapReduceTool.java
License:Open Source License
/** * Returns the size of a file or directory on hdfs in bytes. * //w ww .j a v a 2s . c om * @param path * @return * @throws IOException */ public static long getFilesizeOnHDFS(Path path) throws IOException { FileSystem fs = FileSystem.get(_rJob); long ret = 0; //in bytes if (fs.isDirectory(path)) ret = fs.getContentSummary(path).getLength(); else ret = fs.getFileStatus(path).getLen(); //note: filestatus would return 0 on directories return ret; }
From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStream.java
License:Apache License
private static int[] getFilesTotals(FileSystem fs, Path[] activityFiles) { int tbc = 0;//from www.j av a 2s .c o m int tlc = 0; if (ArrayUtils.isNotEmpty(activityFiles)) { for (Path f : activityFiles) { try { ContentSummary cSummary = fs.getContentSummary(f); tbc += cSummary.getLength(); tlc += Utils.countLines(fs.open(f)); } catch (IOException exc) { } } } return new int[] { tbc, tlc }; }
From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStreamTest.java
License:Apache License
@Test() public void test() throws Exception { FileSystem fs = mock(FileSystem.class); HdfsFileLineStream stream = new HdfsFileLineStream(); TestFileList files = new TestFileList(false); final String fileName = ("file:////" + files.get(0).getParentFile() + File.separator + files.getPrefix() // NON-NLS + "*.TST").replace("\\", "/"); // NON-NLS Map<String, String> props = new HashMap<>(2); props.put(StreamProperties.PROP_FILENAME, fileName); props.put(StreamProperties.PROP_RESTORE_STATE, "false"); // NON-NLS when(fs.open(any(Path.class))).thenReturn(new FSDataInputStream(new TestInputStreamStub())); final FileStatus fileStatusMock = mock(FileStatus.class); final FileStatus[] array = new FileStatus[10]; Arrays.fill(array, fileStatusMock); when(fs.listStatus(any(Path.class), any(PathFilter.class))).thenReturn(array); when(fileStatusMock.getModificationTime()).thenReturn(1L, 2L, 3L); when(fileStatusMock.getPath()).thenReturn(mock(Path.class)); when(fs.getContentSummary(any(Path.class))).thenReturn(mock(ContentSummary.class)); Method m = FileSystem.class.getDeclaredMethod("addFileSystemForTesting", URI.class, Configuration.class, // NON-NLS FileSystem.class); m.setAccessible(true);/* w ww. ja va 2 s. c o m*/ m.invoke(FileSystem.class, URI.create(fileName), new Configuration(), fs); StreamThread st = mock(StreamThread.class); st.setName("HdfsFileLineStreamTestThreadName"); // NON-NLS stream.setOwnerThread(st); stream.setProperties(props.entrySet()); stream.startStream(); verify(fileStatusMock, atLeastOnce()).getModificationTime(); verify(fileStatusMock, atLeastOnce()).getPath(); verify(fs, atLeastOnce()).listStatus(any(Path.class), any(PathFilter.class)); stream.cleanup(); }
From source file:com.mellanox.r4h.DistributedFileSystem.java
License:Apache License
@Override public ContentSummary getContentSummary(Path f) throws IOException { statistics.incrementReadOps(1);// w w w.j av a2 s. com Path absF = fixRelativePart(f); return new FileSystemLinkResolver<ContentSummary>() { @Override public ContentSummary doCall(final Path p) throws IOException, UnresolvedLinkException { return dfs.getContentSummary(getPathName(p)); } @Override public ContentSummary next(final FileSystem fs, final Path p) throws IOException { return fs.getContentSummary(p); } }.resolve(this, absF); }
From source file:com.rim.logdriver.util.IndexLogs.java
License:Apache License
private static void updateComponent(Map<String, Map<String, Map<String, Map<String, Component>>>> data, FileSystem fs, FileStatus matchedFolder, Path path) throws IOException, ParseException { // Parse path by splitting it across slashes. To determine service (which might contain slashes) grab // everything after the DC name, but before the matched date string. String[] pathPieces = matchedFolder.getPath().toString().split("/"); String[] servicePieces = path.toString().split(pathPieces[4] + "/"); servicePieces = servicePieces[1].split("/" + pathPieces[pathPieces.length - 5]); String DC = pathPieces[4];//from w w w. j a v a 2 s. com String service = servicePieces[0]; String component = pathPieces[pathPieces.length - 2]; String type = pathPieces[pathPieces.length - 5]; String status = pathPieces[pathPieces.length - 1]; Date date = inputFormat.parse(pathPieces[pathPieces.length - 4]); // Check if there is a matching component, create one if not. if (!componentExists(data, DC, service, type, component)) { data.get(DC).get(service).get(type).put(component, new Component(DC, service, type, component, date)); } Component thisComponent = data.get(DC).get(service).get(type).get(component); // Update the start or end date if the current date is before or after, respectively. if (date.before(thisComponent.startDate)) { thisComponent.startDate = date; } else if (date.after(thisComponent.endDate)) { thisComponent.endDate = date; } // Is the current folder an archive? If so and date is later than the current archiveDate, update it. if (status.matches("archive") && date.after(thisComponent.archiveDate)) { thisComponent.archiveDate = date; } // Add size data if (status.matches("data")) { thisComponent.addDataSize(fs.getContentSummary(matchedFolder.getPath()).getLength()); } else if (status.matches("incoming")) { thisComponent.addIncomingSize(fs.getContentSummary(matchedFolder.getPath()).getLength()); } else if (status.matches("archive")) { thisComponent.addArchiveSize(fs.getContentSummary(matchedFolder.getPath()).getLength()); } }
From source file:com.rim.logdriver.util.LogStats.java
License:Apache License
public static double[] getDataOverTime(FileSystem fs, Component component, Date startDate, Date endDate) { if (startDate.after(component.endDate) || endDate.before(component.startDate)) { return new double[0]; }/*from w ww . j av a 2 s.c o m*/ // If the date range specified overlaps archived data, notify the user if (startDate.before(component.archiveDate)) { System.out.println("Warning: Time range specified includes archived data"); } // Set up variable and array. Fill with -1 to indicate if hours are unused at the end. long totalHours = (endDate.getTime() - startDate.getTime()) / oneHour; int logVolumesIndex = 0; double[] logVolumes = new double[(int) totalHours]; String basePath = "/service/" + component.DC + "/" + component.service + "/" + component.type + "/"; for (Long currentDate = startDate.getTime(); currentDate < endDate.getTime(); currentDate += oneHour) { String dateAndHour = inputFormat.format(new Date(currentDate)) + "/" + String.format("%02d", new Date(currentDate).getHours()) + "/"; Path path = new Path(basePath + dateAndHour + component.component); if (component.startDate.getTime() - oneDay < currentDate && component.endDate.getTime() + oneDay > currentDate) { try { logVolumes[logVolumesIndex] = fs.getContentSummary(path).getLength(); } catch (IOException e) { logVolumes[logVolumesIndex] = 0; } } else { logVolumes[logVolumesIndex] = 0; } logVolumesIndex++; } return logVolumes; }
From source file:com.splicemachine.derby.impl.io.HdfsDirFile.java
License:Apache License
@Override public long length() { if (!exists() || isDirectory()) { return 0; } // As specified in the Javadoc. try {/*from ww w . j av a2 s.co m*/ FileSystem fs = getFileSystem(); return fs.getContentSummary(new Path(path)).getLength(); } catch (IOException e) { LOG.error(String.format("An exception occurred while getting the size of the file '%s'.", path), e); return 0; } }
From source file:com.tripadvisor.hadoop.BackupHdfs.java
License:Apache License
/** * Method to move files from HDFS to local filesystem * * localPath: Path on the machines filesystem * fs:FileSystem object from HDFS/* w w w . j a v a 2s . c o m*/ * pathList:List of paths for files that might need to be backed * up * size:max size in bytes to be backed up * * ReturnsDate of the last files backed up if reached size limit, * else, zero **/ public long backupFiles(String localPath, String preservePath, FileSystem fs, ArrayList<Path> pathList, long size) { Path fsPath; long tmpSize = 0; long tmpDate = 0; // Start iterating over all paths for (Path hdfsPath : pathList) { try { long nFileSize = fs.getContentSummary(hdfsPath).getLength(); tmpSize = tmpSize + nFileSize; if ((tmpSize <= size) || (size == 0)) { FileStatus stat = fs.getFileStatus(hdfsPath); System.err.println("File " + hdfsPath.toUri().getPath() + " " + nFileSize + " bytes, " + "perms: " + stat.getOwner() + "/" + stat.getGroup() + ", " + stat.getPermission().toString()); tmpDate = stat.getModificationTime() / 1000; String sFsPath = localPath + hdfsPath.toUri().getPath(); fsPath = new Path(sFsPath); File f = new File(sFsPath); // COMMENTED OUT: until a few backup cycles run // and the mtime gets in fact set on all copied // files. // // ignore it if the file exists and has the same mtime // if (f.exists() && f.isFile() && f.lastModified() == stat.getModificationTime()) // { // System.out.println("no need to backup " + f.toString() + ", mtime matches hdfs"); // continue; // } if (false == m_bDryRun) { // check if we need to back up the local file // (not directory), if it already exists. if (f.exists() && f.isFile()) { // ignore files with substrings in the // no-preserve file if (true == doPreserveFile(sFsPath)) { // move it to the backup path String sNewPath = preservePath + hdfsPath.toUri().getPath(); File newFile = new File(sNewPath); // create directory structure for new file? if (false == newFile.getParentFile().exists()) { if (false == newFile.getParentFile().mkdirs()) { System.err .println("Failed to mkdirs " + newFile.getParentFile().toString()); System.exit(1); } } // rename existing file to new location if (false == f.renameTo(newFile)) { System.err.println( "Failed to renameTo " + f.toString() + " to " + newFile.toString()); System.exit(1); } System.out.println("preserved " + f.toString() + " into " + newFile.toString()); } else { System.out.println("skipped preservation of " + f.toString()); } } // copy from hdfs to local filesystem fs.copyToLocalFile(hdfsPath, fsPath); // set the mtime to match hdfs file f.setLastModified(stat.getModificationTime()); // compare checksums on both files compareChecksums(fs, hdfsPath, sFsPath); } // don't print the progress after every file -- go // by at least 1% increments long nPercentDone = (long) (100 * tmpSize / m_nTotalBytes); if (nPercentDone > m_nLastPercentBytesDone) { System.out.println("progress: copied " + prettyPrintBytes(tmpSize) + ", " + nPercentDone + "% done" + ", tstamp=" + tmpDate); m_nLastPercentBytesDone = nPercentDone; } if (m_nSleepSeconds > 0) { try { Thread.sleep(1000 * m_nSleepSeconds); } catch (Exception e2) { // ignore } } } else { return tmpDate; } } catch (IOException e) { System.err.println("FATAL ERROR: Something wrong with the file"); System.err.println(e); System.out.println(tmpDate); System.exit(1); return 0; } } return 0; }