Example usage for org.apache.hadoop.fs ContentSummary getFileCount

List of usage examples for org.apache.hadoop.fs ContentSummary getFileCount

Introduction

In this page you can find the example usage for org.apache.hadoop.fs ContentSummary getFileCount.

Prototype

public long getFileCount() 

Source Link

Usage

From source file:org.pentaho.di.job.entries.hadooptransjobexecutor.DistributedCacheUtilTest.java

License:Apache License

@Test
public void stagePluginsForCache() throws Exception {
    DistributedCacheUtil ch = new DistributedCacheUtil();

    Configuration conf = new Configuration();
    org.apache.hadoop.fs.FileSystem fs = org.apache.hadoop.fs.FileSystem.getLocal(conf);

    Path pluginsDir = new Path("bin/test/plugins-installation-dir");

    FileObject pluginDir = createTestFolderWithContent();

    try {/*  w  ww.  j  av  a  2 s.  c  o m*/
        ch.stagePluginsForCache(fs, pluginsDir, true, Arrays.asList(pluginDir));
        Path pluginInstallPath = new Path(pluginsDir, pluginDir.getURL().toURI().getPath());
        assertTrue(fs.exists(pluginInstallPath));
        ContentSummary summary = fs.getContentSummary(pluginInstallPath);
        assertEquals(3, summary.getFileCount());
        assertEquals(2, summary.getDirectoryCount());
    } finally {
        pluginDir.delete(new AllFileSelector());
        fs.delete(pluginsDir, true);
    }
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheTestUtil.java

License:Apache License

/**
 * Utility to attempt to stage a file to HDFS for use with Distributed Cache.
 *
 * @param ch                Distributed Cache Helper
 * @param source            File or directory to stage
 * @param fs                FileSystem to stage to
 * @param root              Root directory to clean up when this test is complete
 * @param dest              Destination path to stage to
 * @param expectedFileCount Expected number of files to exist in the destination once staged
 * @param expectedDirCount  Expected number of directories to exist in the destiation once staged
 * @throws Exception//from  www .  ja  v  a2 s  .  c om
 */
static void stageForCacheTester(DistributedCacheUtilImpl ch, FileObject source, FileSystem fs, Path root,
        Path dest, int expectedFileCount, int expectedDirCount) throws Exception {
    try {
        ch.stageForCache(source, fs, dest, true);

        assertTrue(fs.exists(dest));
        ContentSummary cs = fs.getContentSummary(dest);
        assertEquals(expectedFileCount, cs.getFileCount());
        assertEquals(expectedDirCount, cs.getDirectoryCount());
        assertEquals(FsPermission.createImmutable((short) 0755), fs.getFileStatus(dest).getPermission());
    } finally {
        // Clean up after ourself
        if (!fs.delete(root, true)) {
            System.err.println("error deleting FileSystem temp dir " + root);
        }
    }
}

From source file:org.pentaho.hadoop.shim.common.DistributedCacheUtilImplOSDependentTest.java

License:Apache License

@Test
public void stagePluginsForCache() throws Exception {
    DistributedCacheUtilImpl ch = new DistributedCacheUtilImpl(TEST_CONFIG);

    Configuration conf = new Configuration();
    FileSystem fs = DistributedCacheTestUtil.getLocalFileSystem(conf);

    Path pluginsDir = new Path("bin/test/plugins-installation-dir");

    FileObject pluginDir = DistributedCacheTestUtil.createTestFolderWithContent();

    try {/*from   w w w.j  av a2s .  c om*/
        ch.stagePluginsForCache(fs, pluginsDir, "bin/test/sample-folder");
        Path pluginInstallPath = new Path(pluginsDir, "bin/test/sample-folder");
        assertTrue(fs.exists(pluginInstallPath));
        ContentSummary summary = fs.getContentSummary(pluginInstallPath);
        assertEquals(6, summary.getFileCount());
        assertEquals(6, summary.getDirectoryCount());
    } finally {
        pluginDir.delete(new AllFileSelector());
        fs.delete(pluginsDir, true);
    }
}

From source file:org.slc.sli.aggregation.mapreduce.map.ValueMapperTest.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
@Test//from   w  w  w .j  a v  a2s.  c om
public void testMap() throws Exception {
    TenantAndIdEmittableKey key = new TenantAndIdEmittableKey();
    ValueMapper m = new MockValueMapper();
    BSONObject entry = new BasicBSONObject("found", "data");
    BSONWritable entity = new BSONWritable(entry);

    Context context = Mockito.mock(Context.class);
    PowerMockito.when(context, "write", Matchers.any(EmittableKey.class), Matchers.any(BSONObject.class))
            .thenAnswer(new Answer<BSONObject>() {

                @Override
                public BSONObject answer(InvocationOnMock invocation) throws Throwable {

                    Object[] args = invocation.getArguments();

                    assertNotNull(args);
                    assertEquals(args.length, 2);

                    assertTrue(args[0] instanceof TenantAndIdEmittableKey);
                    assertTrue(args[1] instanceof ContentSummary);

                    TenantAndIdEmittableKey id = (TenantAndIdEmittableKey) args[0];
                    assertNotNull(id);

                    ContentSummary e = (ContentSummary) args[1];
                    assertEquals(e.getLength(), 1);
                    assertEquals(e.getFileCount(), 2);
                    assertEquals(e.getDirectoryCount(), 3);

                    return null;
                }
            });

    m.map(key, entity, context);
}

From source file:test.hiveserver.parse.SemanticAnalyzer.java

License:Apache License

private void decideExecMode(List<Task<? extends Serializable>> rootTasks, Context ctx,
        GlobalLimitCtx globalLimitCtx) throws SemanticException {

    // bypass for explain queries for now
    if (ctx.getExplain()) {
        return;//from   w  ww  . j  a v a 2 s .  c o m
    }

    // user has told us to run in local mode or doesn't want auto-local mode
    if (ctx.isLocalOnlyExecutionMode() || !conf.getBoolVar(HiveConf.ConfVars.LOCALMODEAUTO)) {
        return;
    }

    final Context lCtx = ctx;
    PathFilter p = new PathFilter() {
        public boolean accept(Path file) {
            return !lCtx.isMRTmpFileURI(file.toUri().getPath());
        }
    };
    List<ExecDriver> mrtasks = Utilities.getMRTasks(rootTasks);

    // map-reduce jobs will be run locally based on data size
    // first find out if any of the jobs needs to run non-locally
    boolean hasNonLocalJob = false;
    for (ExecDriver mrtask : mrtasks) {
        try {
            ContentSummary inputSummary = Utilities.getInputSummary(ctx, (MapredWork) mrtask.getWork(), p);
            int numReducers = getNumberOfReducers(mrtask.getWork(), conf);

            long estimatedInput;

            if (globalLimitCtx != null && globalLimitCtx.isEnable()) {
                // If the global limit optimization is triggered, we will
                // estimate input data actually needed based on limit rows.
                // estimated Input = (num_limit * max_size_per_row) * (estimated_map + 2)
                //
                long sizePerRow = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVELIMITMAXROWSIZE);
                estimatedInput = globalLimitCtx.getGlobalLimit() * sizePerRow;
                long minSplitSize = HiveConf.getLongVar(conf, HiveConf.ConfVars.MAPREDMINSPLITSIZE);
                long estimatedNumMap = inputSummary.getLength() / minSplitSize + 1;
                estimatedInput = estimatedInput * (estimatedNumMap + 1);
            } else {
                estimatedInput = inputSummary.getLength();
            }

            if (LOG.isDebugEnabled()) {
                LOG.debug("Task: " + mrtask.getId() + ", Summary: " + inputSummary.getLength() + ","
                        + inputSummary.getFileCount() + "," + numReducers + ", estimated Input: "
                        + estimatedInput);
            }

            if (MapRedTask.isEligibleForLocalMode(conf, numReducers, estimatedInput,
                    inputSummary.getFileCount()) != null) {
                hasNonLocalJob = true;
                break;
            } else {
                mrtask.setLocalMode(true);
            }
        } catch (IOException e) {
            throw new SemanticException(e);
        }
    }

    if (!hasNonLocalJob) {
        // none of the mapred tasks needs to be run locally. That means that the
        // query can be executed entirely in local mode. Save the current tracker
        // value and restore it when done
        ctx.setOriginalTracker(conf.getVar(HiveConf.ConfVars.HADOOPJT));
        conf.setVar(HiveConf.ConfVars.HADOOPJT, "local");
        console.printInfo("Automatically selecting local only mode for query");

        // If all the tasks can be run locally, we can use local disk for
        // storing intermediate data.

        /**
         * This code is commented out pending further testing/development
         * for (Task<? extends Serializable> t: rootTasks)
         * t.localizeMRTmpFiles(ctx);
         */
    }
}