List of usage examples for org.apache.hadoop.fs ContentSummary getLength
public long getLength()
From source file:org.exem.flamingo.agent.nn.hdfs.HdfsFileInfo.java
License:Apache License
public HdfsFileInfo(FileStatus fileStatus, ContentSummary contentSummary) { this.fullyQualifiedPath = fileStatus.getPath().toUri().getPath(); this.filename = isEmpty(getFilename(fullyQualifiedPath)) ? getDirectoryName(fullyQualifiedPath) : getFilename(fullyQualifiedPath); this.length = fileStatus.isFile() ? fileStatus.getLen() : contentSummary.getLength(); this.path = getPath(fullyQualifiedPath); this.directory = fileStatus.isDirectory(); this.file = !fileStatus.isDirectory(); this.owner = fileStatus.getOwner(); this.group = fileStatus.getGroup(); this.blockSize = fileStatus.getBlockSize(); this.replication = fileStatus.getReplication(); this.modificationTime = fileStatus.getModificationTime(); if (contentSummary != null) { this.spaceConsumed = contentSummary.getSpaceConsumed(); this.spaceQuota = contentSummary.getSpaceQuota(); this.quota = contentSummary.getQuota(); this.directoryCount = contentSummary.getDirectoryCount(); this.fileCount = contentSummary.getFileCount(); }//from w w w.j a v a 2 s . c om this.accessTime = fileStatus.getAccessTime(); this.permission = fileStatus.getPermission().toString(); }
From source file:org.openflamingo.fs.hdfs.HdfsFileSystemProvider.java
License:Apache License
@Override public FileInfo getFileInfo(String path) { try {// w ww. jav a 2 s. c om FileStatus fileStatus = fs.getFileStatus(new Path(path)); HdfsFileInfo hdfsFileInfo = new HdfsFileInfo(fileStatus); ContentSummary summary = fs.getContentSummary(new Path(path)); hdfsFileInfo.setBlockSize(fileStatus.getBlockSize()); hdfsFileInfo.setReplication(fileStatus.getReplication()); hdfsFileInfo.setDirectoryCount(summary.getDirectoryCount()); hdfsFileInfo.setFileCount(summary.getFileCount()); hdfsFileInfo.setQuota(summary.getQuota()); hdfsFileInfo.setSpaceQuota(summary.getSpaceQuota()); hdfsFileInfo.setSpaceConsumed(StringUtils.byteDesc(summary.getSpaceConsumed())); hdfsFileInfo.setLength(summary.getLength()); return hdfsFileInfo; } catch (Exception ex) { throw new FileSystemException(bundle.message("S_FS", "CANNOT_GET_FILE_INFO", path), ex); } }
From source file:org.slc.sli.aggregation.mapreduce.map.ValueMapperTest.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test/* ww w .jav a 2 s .co m*/ public void testMap() throws Exception { TenantAndIdEmittableKey key = new TenantAndIdEmittableKey(); ValueMapper m = new MockValueMapper(); BSONObject entry = new BasicBSONObject("found", "data"); BSONWritable entity = new BSONWritable(entry); Context context = Mockito.mock(Context.class); PowerMockito.when(context, "write", Matchers.any(EmittableKey.class), Matchers.any(BSONObject.class)) .thenAnswer(new Answer<BSONObject>() { @Override public BSONObject answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); assertNotNull(args); assertEquals(args.length, 2); assertTrue(args[0] instanceof TenantAndIdEmittableKey); assertTrue(args[1] instanceof ContentSummary); TenantAndIdEmittableKey id = (TenantAndIdEmittableKey) args[0]; assertNotNull(id); ContentSummary e = (ContentSummary) args[1]; assertEquals(e.getLength(), 1); assertEquals(e.getFileCount(), 2); assertEquals(e.getDirectoryCount(), 3); return null; } }); m.map(key, entity, context); }
From source file:org.springframework.data.hadoop.fs.AbstractROFsShellTest.java
License:Apache License
@Test public void testCount() throws Exception { String name1 = "local/" + UUID.randomUUID() + ".txt"; int length1 = name1.length(); String name2 = "local/" + UUID.randomUUID() + ".txt"; Resource res1 = TestUtils.writeToFS(cfg, name1); name1 = res1.getURI().getPath();/*from w ww . j a va 2 s . c o m*/ Resource res2 = TestUtils.writeToFS(cfg, name2); name2 = res2.getURI().getPath(); Map<Path, ContentSummary> count = shell.count(name1, name2); assertTrue(count.size() >= 2); for (ContentSummary summary : count.values()) { assertEquals(length1, summary.getLength()); } assertTrue(count.toString().contains(name1)); assertTrue(count.toString().contains(name2)); }
From source file:org.springframework.data.hadoop.fs.AbstractROFsShellTest.java
License:Apache License
@Test public void testCountWithQuota() throws Exception { String name1 = "local/" + UUID.randomUUID() + ".txt"; String name2 = "local/" + UUID.randomUUID() + ".txt"; int length1 = name1.length(); Resource res1 = TestUtils.writeToFS(cfg, name1); Resource res2 = TestUtils.writeToFS(cfg, name2); name1 = res1.getURI().getPath();/*from w w w . jav a 2 s . co m*/ name2 = res2.getURI().getPath(); Map<Path, ContentSummary> count = shell.count(true, name1, name2); assertTrue(count.size() >= 2); for (ContentSummary summary : count.values()) { assertEquals(length1, summary.getLength()); } assertTrue(count.toString().contains(name1)); assertTrue(count.toString().contains(name2)); }
From source file:test.hiveserver.parse.SemanticAnalyzer.java
License:Apache License
private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException { // bypass for explain queries for now if (ctx.getExplain()) { return;//from w w w .j a v a2 s . c om } // user has told us to run in local mode or doesn't want auto-local mode if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) { return; } final Context lCtx = ctx; PathFilter p = new PathFilter() { public boolean accept(Path file) { return !lCtx.isMRTmpFileURI(file.toUri().getPath()); } }; List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks); // map-reduce jobs will be run locally based on data size // first find out if any of the jobs needs to run non-locally boolean hasNonLocalJob = false; for (ExecDriver mrtask : mrtasks) { try { ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p); int numReducers = getNumberOfReducers(mrtask.getWork(), conf); long estimatedInput; if (globalLimitCtx != null && globalLimitCtx.isEnable()) { // If the global limit optimization is triggered, we will // estimate input data actually needed based on limit rows. // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2) // long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE); estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow; long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE); long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1; estimatedInput = estimatedInput * (estimatedNumMap + 1); } else { estimatedInput = inputSummary.getLength(); } if (LOG.isDebugEnabled()) { LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput); } if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) { hasNonLocalJob = true; break; } else { mrtask.setLocalMode(true); } } catch (IOException e) { throw new SemanticException(e); } } if (!hasNonLocalJob) { // none of the mapred tasks needs to be run locally. That means that the // query can be executed entirely in local mode. Save the current tracker // value and restore it when done ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT)); conf.setVar(HiveConf.ConfVars.HADOOPJT, "local"); console.printInfo("Automatically selecting local only mode for query"); // If all the tasks can be run locally, we can use local disk for // storing intermediate data. /** * This code is commented out pending further testing/development * for (Task<? extends Serializable> t: rootTasks) * t.localizeMRTmpFiles(ctx); */ } }