List of usage examples for org.apache.hadoop.fs ContentSummary getFileCount
public long getFileCount()
From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtilTest.java
License:Apache License
@Test public void stagePluginsForCache() throws Exception { DistributedCacheUtil ch = new DistributedCacheUtil(); Configuration conf = new Configuration(); org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(conf); Path pluginsDir = new Path("bin/test/plugins-installation-dir"); FileObject pluginDir = createTestFolderWithContent(); try {/* w ww. j av a 2 s. c o m*/ ch.stagePluginsForCache(fs, pluginsDir, true, Arrays.asList(pluginDir)); Path pluginInstallPath = new Path(pluginsDir, pluginDir.getURL().toURI().getPath()); assertTrue(fs.exists(pluginInstallPath)); ContentSummary summary = fs.getContentSummary(pluginInstallPath); assertEquals(3, summary.getFileCount()); assertEquals(2, summary.getDirectoryCount()); } finally { pluginDir.delete(new AllFileSelector()); fs.delete(pluginsDir, true); } }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheTestUtil.java
License:Apache License
/** * Utility to attempt to stage a file to HDFS for use with Distributed Cache. * * @param ch Distributed Cache Helper * @param source File or directory to stage * @param fs FileSystem to stage to * @param root Root directory to clean up when this test is complete * @param dest Destination path to stage to * @param expectedFileCount Expected number of files to exist in the destination once staged * @param expectedDirCount Expected number of directories to exist in the destiation once staged * @throws Exception//from www . ja v a2 s . c om */ static void stageForCacheTester(DistributedCacheUtilImpl ch, FileObject source, FileSystem fs, Path root, Path dest, int expectedFileCount, int expectedDirCount) throws Exception { try { ch.stageForCache(source, fs, dest, true); assertTrue(fs.exists(dest)); ContentSummary cs = fs.getContentSummary(dest); assertEquals(expectedFileCount, cs.getFileCount()); assertEquals(expectedDirCount, cs.getDirectoryCount()); assertEquals(FsPermission.createImmutable((short) 0755), fs.getFileStatus(dest).getPermission()); } finally { // Clean up after ourself if (!fs.delete(root, true)) { System.err.println("error deleting FileSystem temp dir " + root); } } }
From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImplOSDependentTest.java
License:Apache License
@Test public void stagePluginsForCache() throws Exception { DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG); Configuration conf = new Configuration(); FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf); Path pluginsDir = new Path("bin/test/plugins-installation-dir"); FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent(); try {/*from w w w.j av a2s . c om*/ ch.stagePluginsForCache(fs, pluginsDir, "bin/test/sample-folder"); Path pluginInstallPath = new Path(pluginsDir, "bin/test/sample-folder"); assertTrue(fs.exists(pluginInstallPath)); ContentSummary summary = fs.getContentSummary(pluginInstallPath); assertEquals(6, summary.getFileCount()); assertEquals(6, summary.getDirectoryCount()); } finally { pluginDir.delete(new AllFileSelector()); fs.delete(pluginsDir, true); } }
From source file:org.slc.sli.aggregation.mapreduce.map.ValueMapperTest.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) @Test//from w w w .j a v a2s. c om public void testMap() throws Exception { TenantAndIdEmittableKey key = new TenantAndIdEmittableKey(); ValueMapper m = new MockValueMapper(); BSONObject entry = new BasicBSONObject("found", "data"); BSONWritable entity = new BSONWritable(entry); Context context = Mockito.mock(Context.class); PowerMockito.when(context, "write", Matchers.any(EmittableKey.class), Matchers.any(BSONObject.class)) .thenAnswer(new Answer<BSONObject>() { @Override public BSONObject answer(InvocationOnMock invocation) throws Throwable { Object[] args = invocation.getArguments(); assertNotNull(args); assertEquals(args.length, 2); assertTrue(args[0] instanceof TenantAndIdEmittableKey); assertTrue(args[1] instanceof ContentSummary); TenantAndIdEmittableKey id = (TenantAndIdEmittableKey) args[0]; assertNotNull(id); ContentSummary e = (ContentSummary) args[1]; assertEquals(e.getLength(), 1); assertEquals(e.getFileCount(), 2); assertEquals(e.getDirectoryCount(), 3); return null; } }); m.map(key, entity, context); }
From source file:test.hiveserver.parse.SemanticAnalyzer.java
License:Apache License
private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx, GlobalLimitCtx globalLimitCtx) throws SemanticException { // bypass for explain queries for now if (ctx.getExplain()) { return;//from w ww . j a v a 2 s . c o m } // user has told us to run in local mode or doesn't want auto-local mode if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) { return; } final Context lCtx = ctx; PathFilter p = new PathFilter() { public boolean accept(Path file) { return !lCtx.isMRTmpFileURI(file.toUri().getPath()); } }; List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks); // map-reduce jobs will be run locally based on data size // first find out if any of the jobs needs to run non-locally boolean hasNonLocalJob = false; for (ExecDriver mrtask : mrtasks) { try { ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p); int numReducers = getNumberOfReducers(mrtask.getWork(), conf); long estimatedInput; if (globalLimitCtx != null && globalLimitCtx.isEnable()) { // If the global limit optimization is triggered, we will // estimate input data actually needed based on limit rows. // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2) // long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE); estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow; long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE); long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1; estimatedInput = estimatedInput * (estimatedNumMap + 1); } else { estimatedInput = inputSummary.getLength(); } if (LOG.isDebugEnabled()) { LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + "," + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: " + estimatedInput); } if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput, inputSummary.getFileCount()) != null) { hasNonLocalJob = true; break; } else { mrtask.setLocalMode(true); } } catch (IOException e) { throw new SemanticException(e); } } if (!hasNonLocalJob) { // none of the mapred tasks needs to be run locally. That means that the // query can be executed entirely in local mode. Save the current tracker // value and restore it when done ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT)); conf.setVar(HiveConf.ConfVars.HADOOPJT, "local"); console.printInfo("Automatically selecting local only mode for query"); // If all the tasks can be run locally, we can use local disk for // storing intermediate data. /** * This code is commented out pending further testing/development * for (Task<? extends Serializable> t: rootTasks) * t.localizeMRTmpFiles(ctx); */ } }