Example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext

List of usage examples for org.apache.hadoop.fs FileContext getLocalFSFileContext

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileContext getLocalFSFileContext.

Prototype

public static FileContext getLocalFSFileContext() throws UnsupportedFileSystemException 

Source Link

Usage

From source file:com.ikanow.aleph2.data_import_manager.harvest.actors.TestDataBucketChangeActor.java

License:Apache License

@Test
public void test_cacheJars() throws UnsupportedFileSystemException, InterruptedException, ExecutionException {
    try {// w w w.j  a  v  a2 s. co m
        // Preamble:
        // 0) Insert 2 library beans into the management db

        final DataBucketBean bucket = createBucket("test_tech_id_harvest");

        final String pathname1 = System.getProperty("user.dir")
                + "/misc_test_assets/simple-harvest-example.jar";
        final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1));
        final String pathname2 = System.getProperty("user.dir")
                + "/misc_test_assets/simple-harvest-example2.jar";
        final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2));

        List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2);

        final IManagementDbService underlying_db = _service_context
                .getService(IManagementDbService.class, Optional.empty()).get();
        final IManagementCrudService<SharedLibraryBean> library_crud = underlying_db.getSharedLibraryStore();
        library_crud.deleteDatastore().get();
        assertEquals(0L, (long) library_crud.countObjects().get());

        library_crud.storeObjects(lib_elements).get();

        assertEquals(3L, (long) library_crud.countObjects().get());

        // 0b) Create the more complex bucket

        final HarvestControlMetadataBean harvest_module = new HarvestControlMetadataBean("test_tech_name", true,
                null, Arrays.asList("test_module_id"), null, null);
        final DataBucketBean bucket2 = BeanTemplateUtils.clone(bucket)
                .with(DataBucketBean::harvest_technology_name_or_id, "test_tech_id_harvest")
                .with(DataBucketBean::harvest_configs, Arrays.asList(harvest_module)).done();

        // 1) Normal operation

        CompletableFuture<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure = LibraryCacheUtils
                .cacheJars(bucket, DataBucketHarvestChangeActor.getQuery(bucket, true),
                        _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(),
                        _service_context.getStorageService(), _service_context, "test1_source",
                        "test1_command");

        if (reply_structure.get().isFail()) {
            fail("About to crash with: " + reply_structure.get().fail().message());
        }
        assertTrue("cacheJars should return valid reply", reply_structure.get().isSuccess());

        final Map<String, Tuple2<SharedLibraryBean, String>> reply_map = reply_structure.get().success();

        assertEquals(2L, reply_map.size()); // (harves tech only, one for name, one for id) 

        // 2) Normal operation - tech + module

        CompletableFuture<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure2 = LibraryCacheUtils
                .cacheJars(bucket, DataBucketHarvestChangeActor.getQuery(bucket2, false),
                        _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(),
                        _service_context.getStorageService(), _service_context, "test2_source",
                        "test2_command");

        if (reply_structure2.get().isFail()) {
            fail("About to crash with: " + reply_structure2.get().fail().message());
        }
        assertTrue("cacheJars should return valid reply", reply_structure2.get().isSuccess());

        final Map<String, Tuple2<SharedLibraryBean, String>> reply_map2 = reply_structure2.get().success();

        assertEquals(4L, reply_map2.size()); // (harves tech only, one for name, one for id)

        // 3) Couple of error cases:

        DataBucketBean bucket3 = BeanTemplateUtils.clone(bucket)
                .with(DataBucketBean::harvest_technology_name_or_id, "failtest").done();

        CompletableFuture<Validation<BasicMessageBean, Map<String, Tuple2<SharedLibraryBean, String>>>> reply_structure3 = LibraryCacheUtils
                .cacheJars(bucket, DataBucketHarvestChangeActor.getQuery(bucket3, false),
                        _service_context.getCoreManagementDbService(), _service_context.getGlobalProperties(),
                        _service_context.getStorageService(), _service_context, "test2_source",
                        "test2_command");

        assertTrue("cacheJars should return error", reply_structure3.get().isFail());
    } catch (Exception e) {
        System.out.println(ErrorUtils.getLongForm("guice? {0}", e));
        throw e;
    }
}

From source file:com.ikanow.aleph2.data_import_manager.harvest.actors.TestDataBucketChangeActor.java

License:Apache License

@Test
public void test_actor() throws UnsupportedFileSystemException, IllegalArgumentException, InterruptedException,
        ExecutionException, TimeoutException {
    // Set up the DB
    // Preamble:// ww  w .  jav a  2 s.c  o m
    // 0) Insert 2 library beans into the management db

    final DataBucketBean bucket = createBucket("test_tech_id_harvest");

    final String pathname1 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example.jar";
    final Path path1 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname1));
    final String pathname2 = System.getProperty("user.dir") + "/misc_test_assets/simple-harvest-example2.jar";
    final Path path2 = FileContext.getLocalFSFileContext().makeQualified(new Path(pathname2));

    final List<SharedLibraryBean> lib_elements = createSharedLibraryBeans(path1, path2);

    final IManagementDbService underlying_db = _service_context
            .getService(IManagementDbService.class, Optional.empty()).get();
    final IManagementCrudService<SharedLibraryBean> library_crud = underlying_db.getSharedLibraryStore();
    library_crud.deleteDatastore().get();
    assertEquals(0L, (long) library_crud.countObjects().get());
    @SuppressWarnings("unused")
    Tuple2<Supplier<List<Object>>, Supplier<Long>> oo = library_crud.storeObjects(lib_elements).get();

    assertEquals(3L, (long) library_crud.countObjects().get());
    assertEquals(3L,
            (long) _service_context.getCoreManagementDbService().getSharedLibraryStore().countObjects().get());

    // Create an actor:

    final ActorRef handler = _db_actor_context.getActorSystem()
            .actorOf(Props.create(DataBucketHarvestChangeActor.class), "test_host");
    _db_actor_context.getBucketActionMessageBus().subscribe(handler, ActorUtils.BUCKET_ACTION_EVENT_BUS);

    // create the inbox:
    final Inbox inbox = Inbox.create(_actor_context.getActorSystem());

    // Send it some messages:

    // 1) A message that it will ignore because it's the wrong type

    inbox.send(handler, "IGNOREME");
    try {
        inbox.receive(Duration.create(1L, TimeUnit.SECONDS));
        fail("should have timed out");
    } catch (Exception e) {
        assertEquals(TimeoutException.class, e.getClass());
    }

    // 2) A message that it will ignore because it's not for this actor

    final BucketActionMessage.DeleteBucketActionMessage delete = new BucketActionMessage.DeleteBucketActionMessage(
            bucket, new HashSet<String>(Arrays.asList("a", "b")));

    inbox.send(handler, delete);
    try {
        inbox.receive(Duration.create(1L, TimeUnit.SECONDS));
        fail("should have timed out");
    } catch (Exception e) {
        assertEquals(TimeoutException.class, e.getClass());
    }

    // 3) A message that it will process because it's a broadcast

    final BucketActionMessage.BucketActionOfferMessage broadcast = new BucketActionMessage.BucketActionOfferMessage(
            bucket, null, Collections.emptySet());

    _db_actor_context.getBucketActionMessageBus()
            .publish(new BucketActionEventBusWrapper(inbox.getRef(), broadcast));

    final Object msg = inbox.receive(Duration.create(5L, TimeUnit.SECONDS));

    assertEquals(BucketActionReplyMessage.BucketActionWillAcceptMessage.class, msg.getClass());

    // 4) A message that it will process because it's for this actor

    final BucketActionMessage.UpdateBucketActionMessage update = new BucketActionMessage.UpdateBucketActionMessage(
            bucket, true, bucket,
            new HashSet<String>(Arrays.asList(_actor_context.getInformationService().getHostname())));

    final CompletableFuture<BucketActionReplyMessage> reply4 = AkkaFutureUtils.efficientWrap(
            Patterns.ask(handler, update, 5000L), _db_actor_context.getActorSystem().dispatcher());
    final BucketActionReplyMessage msg4 = reply4.get();

    assertEquals(BucketActionReplyMessage.BucketActionHandlerMessage.class, msg4.getClass());
    final BucketActionReplyMessage.BucketActionHandlerMessage msg4b = (BucketActionReplyMessage.BucketActionHandlerMessage) msg4;

    assertEquals(true, msg4b.reply().success());
    assertEquals("called onUpdatedSource true", msg4b.reply().message());
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.DistributedCacheManager.java

License:Apache License

/**
 * Set up the distributed cache by localizing the resources, and updating
 * the configuration with references to the localized resources.
 * @param conf job configuration/*w  w w.j  av a2  s . c om*/
 * @throws IOException
 */
public void setup(Configuration conf) throws IOException {
    //If we are not 0th worker, wait for 0th worker to set up the cache
    if (InvocationWorker.getIgWorkerIndex() > 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().waitForComplete(ACTION_NAME, SYNCHRONIZATION_WAIT_MS,
                    WAIT_GRANULARITY_MS);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
        return;
    }

    File workDir = new File(System.getProperty("user.dir"));

    // Generate YARN local resources objects corresponding to the distributed
    // cache configuration
    Map<String, LocalResource> localResources = new LinkedHashMap<String, LocalResource>();
    MRApps.setupDistributedCache(conf, localResources);

    //CODE CHANGE FROM ORIGINAL FILE:
    //We need to clear the resources from jar files, since they are distributed through the IG.
    //
    Iterator<Map.Entry<String, LocalResource>> iterator = localResources.entrySet().iterator();
    while (iterator.hasNext()) {
        Entry<String, LocalResource> entry = iterator.next();
        if (entry.getKey().endsWith(".jar")) {
            iterator.remove();
        }
    }

    // Generating unique numbers for FSDownload.

    AtomicLong uniqueNumberGenerator = new AtomicLong(System.currentTimeMillis());

    // Find which resources are to be put on the local classpath
    Map<String, Path> classpaths = new HashMap<String, Path>();
    Path[] archiveClassPaths = DistributedCache.getArchiveClassPaths(conf);
    if (archiveClassPaths != null) {
        for (Path p : archiveClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    Path[] fileClassPaths = DistributedCache.getFileClassPaths(conf);
    if (fileClassPaths != null) {
        for (Path p : fileClassPaths) {
            FileSystem remoteFS = p.getFileSystem(conf);
            p = remoteFS.resolvePath(p.makeQualified(remoteFS.getUri(), remoteFS.getWorkingDirectory()));
            classpaths.put(p.toUri().getPath().toString(), p);
        }
    }

    // Localize the resources
    LocalDirAllocator localDirAllocator = new LocalDirAllocator(MRConfig.LOCAL_DIR);
    FileContext localFSFileContext = FileContext.getLocalFSFileContext();
    UserGroupInformation ugi = UserGroupInformation.getCurrentUser();

    ExecutorService exec = null;
    try {
        ThreadFactory tf = new ThreadFactoryBuilder()
                .setNameFormat("LocalDistributedCacheManager Downloader #%d").build();
        exec = Executors.newCachedThreadPool(tf);
        Path destPath = localDirAllocator.getLocalPathForWrite(".", conf);
        Map<LocalResource, Future<Path>> resourcesToPaths = Maps.newHashMap();
        for (LocalResource resource : localResources.values()) {
            Callable<Path> download = new FSDownload(localFSFileContext, ugi, conf,
                    new Path(destPath, Long.toString(uniqueNumberGenerator.incrementAndGet())), resource);
            Future<Path> future = exec.submit(download);
            resourcesToPaths.put(resource, future);
        }
        for (Entry<String, LocalResource> entry : localResources.entrySet()) {
            LocalResource resource = entry.getValue();
            Path path;
            try {
                path = resourcesToPaths.get(resource).get();
            } catch (InterruptedException e) {
                throw new IOException(e);
            } catch (ExecutionException e) {
                throw new IOException(e);
            }
            String pathString = path.toUri().toString();
            String link = entry.getKey();
            String target = new File(path.toUri()).getPath();
            symlink(workDir, target, link);

            if (resource.getType() == LocalResourceType.ARCHIVE) {
                localArchives.add(pathString);
            } else if (resource.getType() == LocalResourceType.FILE) {
                localFiles.add(pathString);
            } else if (resource.getType() == LocalResourceType.PATTERN) {
                //PATTERN is not currently used in local mode
                throw new IllegalArgumentException(
                        "Resource type PATTERN is not " + "implemented yet. " + resource.getResource());
            }
            Path resourcePath;
            try {
                resourcePath = ConverterUtils.getPathFromYarnURL(resource.getResource());
            } catch (URISyntaxException e) {
                throw new IOException(e);
            }
            LOG.info(String.format("Localized %s as %s", resourcePath, path));
            String cp = resourcePath.toUri().getPath();
            if (classpaths.keySet().contains(cp)) {
                localClasspaths.add(path.toUri().getPath().toString());
            }
        }
    } finally {
        if (exec != null) {
            exec.shutdown();
        }
    }
    // Update the configuration object with localized data.
    if (!localArchives.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALARCHIVES,
                StringUtils.arrayToString(localArchives.toArray(new String[localArchives.size()])));
    }
    if (!localFiles.isEmpty()) {
        conf.set(MRJobConfig.CACHE_LOCALFILES,
                StringUtils.arrayToString(localFiles.toArray(new String[localArchives.size()])));
    }
    setupCalled = true;

    //If we are  0th worker, signal action complete
    if (InvocationWorker.getIgWorkerIndex() == 0 && InvocationWorker.getNumberOfWorkers() > 1) {
        try {
            InvocationWorker.getSynchronizationBarrier().signalComplete(ACTION_NAME);
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    }

}

From source file:io.hops.tensorflow.TestCluster.java

License:Apache License

protected void setupInternal(int numNodeManager) throws Exception {

    LOG.info("Starting up YARN cluster");

    conf = new YarnConfiguration();
    conf.setInt(YarnConfiguration.RM_SCHEDULER_MINIMUM_ALLOCATION_MB, 128);
    conf.set("yarn.log.dir", "target");
    conf.set("yarn.log-aggregation-enable", "true");
    conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true);
    conf.set(YarnConfiguration.RM_SCHEDULER, CapacityScheduler.class.getName());
    conf.setBoolean(YarnConfiguration.NODE_LABELS_ENABLED, true);
    conf.setBoolean(YarnConfiguration.NM_GPU_RESOURCE_ENABLED, false);

    if (yarnCluster == null) {
        yarnCluster = new MiniYARNCluster(TestCluster.class.getSimpleName(), 1, numNodeManager, 1, 1);
        yarnCluster.init(conf);/*from   w ww . ja  va2 s  .  c  o m*/

        yarnCluster.start();

        conf.set(YarnConfiguration.TIMELINE_SERVICE_WEBAPP_ADDRESS,
                MiniYARNCluster.getHostname() + ":" + yarnCluster.getApplicationHistoryServer().getPort());

        waitForNMsToRegister();

        URL url = Thread.currentThread().getContextClassLoader().getResource("yarn-site.xml");
        if (url == null) {
            throw new RuntimeException("Could not find 'yarn-site.xml' dummy file in classpath");
        }
        Configuration yarnClusterConfig = yarnCluster.getConfig();
        yarnClusterConfig.set("yarn.application.classpath", new File(url.getPath()).getParent());
        //write the document to a buffer (not directly to the file, as that
        //can cause the file being written to get read -which will then fail.
        ByteArrayOutputStream bytesOut = new ByteArrayOutputStream();
        yarnClusterConfig.writeXml(bytesOut);
        bytesOut.close();
        //write the bytes to the file in the classpath
        OutputStream os = new FileOutputStream(new File(url.getPath()));
        os.write(bytesOut.toByteArray());
        os.close();
    }
    FileContext fsContext = FileContext.getLocalFSFileContext();
    fsContext.delete(new Path(conf.get("yarn.timeline-service.leveldb-timeline-store.path")), true);
    try {
        Thread.sleep(2000);
    } catch (InterruptedException e) {
        LOG.info("setup thread sleep interrupted. message=" + e.getMessage());
    }
}

From source file:io.hops.tensorflow.TestCluster.java

License:Apache License

@After
public void tearDown() throws IOException {
    if (yarnCluster != null) {
        try {//from   ww w.  j a v a  2  s.com
            yarnCluster.stop();
        } finally {
            yarnCluster = null;
        }
    }
    FileContext fsContext = FileContext.getLocalFSFileContext();
    fsContext.delete(new Path(conf.get("yarn.timeline-service.leveldb-timeline-store.path")), true);
}

From source file:org.apache.apex.malhar.contrib.avro.AvroFileToPojoModuleTest.java

License:Apache License

@Test
public void testAvroToPojoModule() throws Exception {
    try {/*from   w ww . ja  v a  2 s  . co  m*/
        FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
        int cnt = 7;
        createAvroInput(cnt);
        writeAvroFile(new File(FILENAME));
        createAvroInput(cnt - 2);
        writeAvroFile(new File(OTHER_FILE));

        avroFileToPojoModule.setAvroFileDirectory(testMeta.dir);
        avroFileToPojoModule.setPojoClass(SimpleOrder.class);

        AvroToPojo avroToPojo = new AvroToPojo();
        avroToPojo.setPojoClass(SimpleOrder.class);

        EmbeddedAppLauncherImpl lma = new EmbeddedAppLauncherImpl();
        Configuration conf = new Configuration(false);

        AvroToPojoApplication avroToPojoApplication = new AvroToPojoApplication();
        avroToPojoApplication.setAvroFileToPojoModule(avroFileToPojoModule);

        lma.prepareDAG(avroToPojoApplication, conf);
        EmbeddedAppLauncherImpl.Controller lc = lma.getController();
        lc.run(10000);// runs for 10 seconds and quits
    } catch (ConstraintViolationException e) {
        Assert.fail("constraint violations: " + e.getConstraintViolations());
    }
}

From source file:org.apache.apex.malhar.contrib.parser.StreamingJsonParserTest.java

License:Apache License

@Test
public void testApplicationWithPojoConversion() throws IOException, Exception {
    try {/*from   ww w.ja va  2  s  . com*/
        FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
        int cnt = 7;
        createReaderInput(cnt);
        writeJsonInputFile(new File(FILENAME));
        FileInputOperator fileInput = new FileInputOperator();
        fileInput.setDirectory(testMeta.dir);
        LocalMode lma = LocalMode.newInstance();
        Configuration conf = new Configuration(false);
        JsonStreamingParserApp streamingParserApp = new JsonStreamingParserApp();
        streamingParserApp.setParser(jsonParser);
        streamingParserApp.setFileInput(fileInput);
        lma.prepareDAG(streamingParserApp, conf);
        LocalMode.Controller lc = lma.getController();
        lc.run(10000);// runs for 10 seconds and quits
    } catch (ConstraintViolationException e) {
        Assert.fail("constraint violations: " + e.getConstraintViolations());
    }
}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorFailureHandlingTest.java

License:Apache License

@Test
public void testFailureHandling() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.getDir()).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    // Create files with 100 records.
    for (int file = 0; file < 10; file++) {
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 10; line++) {
            lines.add("f" + file + "l" + line);
        }// w w  w.  ja v  a2s  .  c o  m
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.getDir(), "file" + file), StringUtils.join(lines, '\n'));
    }

    Thread.sleep(10);

    TestFileInputOperator oper = new TestFileInputOperator();

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.getDir());
    oper.getScanner().setFilePatternRegexp(".*file[\\d]");

    oper.setup(mockOperatorContext(1, new Attribute.AttributeMap.DefaultAttributeMap()));
    for (long wid = 0; wid < 1000; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();

    Assert.assertEquals("number tuples", 100, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));
    TestUtils.deleteTargetTestClassFolder(testMeta.desc);

}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

private void checkSubDir(boolean recursive) throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    HashSet<String> allLines = Sets.newHashSet();
    String subdir = "";
    for (int file = 0; file < 2; file++) {
        subdir += String.format("/depth_%d", file);
        HashSet<String> lines = Sets.newHashSet();
        for (int line = 0; line < 2; line++) {
            lines.add("f" + file + "l" + line);
        }//w  ww.  ja  v a 2 s  . c o m
        allLines.addAll(lines);
        FileUtils.write(new File(testMeta.dir + subdir, "file" + file), StringUtils.join(lines, '\n'));
    }

    LineByLineFileInputOperator oper = new LineByLineFileInputOperator();

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    oper.setDirectory(testMeta.dir);
    oper.getScanner().setFilePatternRegexp("((?!target).)*file[\\d]");
    oper.getScanner().setRecursive(recursive);

    oper.setup(testMeta.context);
    for (long wid = 0; wid < 3; wid++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
    }
    oper.teardown();

    int expectedNumTuples = 4;
    if (!recursive) {
        allLines = new HashSet<String>();
        expectedNumTuples = 0;
    }
    Assert.assertEquals("number tuples", expectedNumTuples, queryResults.collectedTuples.size());
    Assert.assertEquals("lines", allLines, new HashSet<String>(queryResults.collectedTuples));

}

From source file:org.apache.apex.malhar.lib.io.fs.AbstractFileInputOperatorTest.java

License:Apache License

@Test
public void testEmptyDirectory() throws Exception {
    FileContext.getLocalFSFileContext().delete(new Path(new File(testMeta.dir).getAbsolutePath()), true);
    Set<String> dPaths = Sets.newHashSet();
    dPaths.add(new File(testMeta.dir).getCanonicalPath());

    String subdir01 = "/a";
    dPaths.add(new File(testMeta.dir + subdir01).getCanonicalPath());
    FileUtils.forceMkdir((new File(testMeta.dir + subdir01)));

    String subdir02 = "/b";
    dPaths.add(new File(testMeta.dir + subdir02).getCanonicalPath());
    FileUtils.forceMkdir(new File(testMeta.dir + subdir02));

    String subdir03 = subdir02 + "/c";
    dPaths.add(new File(testMeta.dir + subdir03).getCanonicalPath());
    FileUtils.forceMkdir(new File(testMeta.dir + subdir03));

    String subdir04 = "/d";
    List<String> allLines = Lists.newArrayList();
    HashSet<String> lines = Sets.newHashSet();
    for (int line = 0; line < 5; line++) {
        lines.add("f0" + "l" + line);
    }/* w w w.  j  av a  2s  . c  o  m*/
    allLines.addAll(lines);
    File testFile = new File(testMeta.dir + subdir04, "file0");
    dPaths.add(new File(testMeta.dir + subdir04).getCanonicalPath());
    FileUtils.write(testFile, StringUtils.join(lines, '\n'));

    LineOperator oper = new LineOperator();
    oper.setDirectory(new File(testMeta.dir).getAbsolutePath());
    oper.setScanIntervalMillis(0);

    CollectorTestSink<String> queryResults = new CollectorTestSink<String>();
    @SuppressWarnings({ "unchecked", "rawtypes" })
    CollectorTestSink<Object> sink = (CollectorTestSink) queryResults;
    oper.output.setSink(sink);

    int wid = 0;

    // Read all records to populate processedList in operator.
    oper.setup(testMeta.context);
    for (int i = 0; i < 3; i++) {
        oper.beginWindow(wid);
        oper.emitTuples();
        oper.endWindow();
        wid++;
    }

    Assert.assertEquals("Size", 5, oper.dirPaths.size());
    Assert.assertTrue("Checking Sets", dPaths.equals(oper.dirPaths));
}