Example usage for org.apache.hadoop.fs FileUtil fullyDelete

List of usage examples for org.apache.hadoop.fs FileUtil fullyDelete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil fullyDelete.

Prototype

public static boolean fullyDelete(final File dir) 

Source Link

Document

Delete a directory and all its contents.

Usage

From source file:com.kylinolap.job.tools.ColumnCardinalityJobTest.java

License:Apache License

@Test
@Ignore/* w  w  w. j a  va  2s . com*/
public void testJob() throws Exception {
    final String input = "src/test/resources/data/test_cal_dt/";
    final String output = "target/test-output/column-cardinality/";

    FileUtil.fullyDelete(new File(output));

    String[] args = { "-input", input, "-output", output, "-cols", "1,2,3,4,5,6,9,0" };
    assertEquals("Job failed", 0, ToolRunner.run(new HiveColumnCardinalityJob(), args));
}

From source file:com.mellanox.r4h.MiniDFSCluster.java

License:Apache License

private void initMiniDFSCluster(Configuration conf, int numDataNodes, StorageType storageType, boolean format,
        boolean manageNameDfsDirs, boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy,
        boolean manageDataDfsDirs, StartupOption startOpt, StartupOption dnStartOpt, String[] racks,
        String[] hosts, long[] simulatedCapacities, String clusterId, boolean waitSafeMode,
        boolean setupHostsFile, MiniDFSNNTopology nnTopology, boolean checkExitOnShutdown,
        boolean checkDataNodeAddrConfig, boolean checkDataNodeHostConfig, Configuration[] dnConfOverlays)
        throws IOException {
    ExitUtil.disableSystemExit();// ww w.  jav  a 2s . c  o  m

    // Re-enable symlinks for tests, see HADOOP-10020 and HADOOP-10052
    FileSystem.enableSymlinks();

    synchronized (MiniDFSCluster.class) {
        instanceId = instanceCount++;
    }

    this.conf = conf;
    base_dir = new File(determineDfsBaseDir());
    data_dir = new File(base_dir, "data");
    this.waitSafeMode = waitSafeMode;
    this.checkExitOnShutdown = checkExitOnShutdown;

    int replication = conf.getInt(DFS_REPLICATION_KEY, 3);
    conf.setInt(DFS_REPLICATION_KEY, Math.min(replication, numDataNodes));
    int safemodeExtension = conf.getInt(DFS_NAMENODE_SAFEMODE_EXTENSION_TESTING_KEY, 0);
    conf.setInt(DFS_NAMENODE_SAFEMODE_EXTENSION_KEY, safemodeExtension);
    conf.setInt(DFS_NAMENODE_DECOMMISSION_INTERVAL_KEY, 3); // 3 second
    conf.setClass(NET_TOPOLOGY_NODE_SWITCH_MAPPING_IMPL_KEY, StaticMapping.class, DNSToSwitchMapping.class);

    // In an HA cluster, in order for the StandbyNode to perform checkpoints,
    // it needs to know the HTTP port of the Active. So, if ephemeral ports
    // are chosen, disable checkpoints for the test.
    if (!nnTopology.allHttpPortsSpecified() && nnTopology.isHA()) {
        LOG.info("MiniDFSCluster disabling checkpointing in the Standby node "
                + "since no HTTP ports have been specified.");
        conf.setBoolean(DFS_HA_STANDBY_CHECKPOINTS_KEY, false);
    }
    if (!nnTopology.allIpcPortsSpecified() && nnTopology.isHA()) {
        LOG.info("MiniDFSCluster disabling log-roll triggering in the "
                + "Standby node since no IPC ports have been specified.");
        conf.setInt(DFS_HA_LOGROLL_PERIOD_KEY, -1);
    }

    federation = nnTopology.isFederated();
    try {
        createNameNodesAndSetConf(nnTopology, manageNameDfsDirs, manageNameDfsSharedDirs,
                enableManagedDfsDirsRedundancy, format, startOpt, clusterId, conf);
    } catch (IOException ioe) {
        LOG.error("IOE creating namenodes. Permissions dump:\n" + createPermissionsDiagnosisString(data_dir));
        throw ioe;
    }
    if (format) {
        if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) {
            throw new IOException(
                    "Cannot remove data directory: " + data_dir + createPermissionsDiagnosisString(data_dir));
        }
    }

    if (startOpt == StartupOption.RECOVER) {
        return;
    }

    // Start the DataNodes
    startDataNodes(conf, numDataNodes, storageType, manageDataDfsDirs,
            dnStartOpt != null ? dnStartOpt : startOpt, racks, hosts, simulatedCapacities, setupHostsFile,
            checkDataNodeAddrConfig, checkDataNodeHostConfig, dnConfOverlays);
    waitClusterUp();
    // make sure ProxyUsers uses the latest conf
    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
}

From source file:com.mellanox.r4h.MiniDFSCluster.java

License:Apache License

private void createNameNodesAndSetConf(MiniDFSNNTopology nnTopology, boolean manageNameDfsDirs,
        boolean manageNameDfsSharedDirs, boolean enableManagedDfsDirsRedundancy, boolean format,
        StartupOption operation, String clusterId, Configuration conf) throws IOException {
    Preconditions.checkArgument(nnTopology.countNameNodes() > 0, "empty NN topology: no namenodes specified!");

    if (!federation && nnTopology.countNameNodes() == 1) {
        NNConf onlyNN = nnTopology.getOnlyNameNode();
        // we only had one NN, set DEFAULT_NAME for it. If not explicitly
        // specified initially, the port will be 0 to make NN bind to any
        // available port. It will be set to the right address after
        // NN is started.
        conf.set(FS_DEFAULT_NAME_KEY, "hdfs://127.0.0.1:" + MiniDFSClusterBridge.getNNConf_ipcPort(onlyNN));
    }//  w w w.  j a  va2 s.c  om

    List<String> allNsIds = Lists.newArrayList();
    for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
        if (nameservice.getId() != null) {
            allNsIds.add(nameservice.getId());
        }
    }
    if (!allNsIds.isEmpty()) {
        conf.set(DFS_NAMESERVICES, Joiner.on(",").join(allNsIds));
    }

    int nnCounter = 0;
    for (MiniDFSNNTopology.NSConf nameservice : nnTopology.getNameservices()) {
        String nsId = nameservice.getId();
        String lastDefaultFileSystem = null;

        Preconditions.checkArgument(!federation || nsId != null,
                "if there is more than one NS, they must have names");

        // First set up the configuration which all of the NNs
        // need to have - have to do this a priori before starting
        // *any* of the NNs, so they know to come up in standby.
        List<String> nnIds = Lists.newArrayList();
        // Iterate over the NNs in this nameservice
        for (NNConf nn : nameservice.getNNs()) {
            nnIds.add(MiniDFSClusterBridge.getNNConf_nnId(nn));

            initNameNodeAddress(conf, nameservice.getId(), nn);
        }

        // If HA is enabled on this nameservice, enumerate all the namenodes
        // in the configuration. Also need to set a shared edits dir
        if (nnIds.size() > 1) {
            conf.set(DFSUtil.addKeySuffixes(DFS_HA_NAMENODES_KEY_PREFIX, nameservice.getId()),
                    Joiner.on(",").join(nnIds));
            if (manageNameDfsSharedDirs) {
                URI sharedEditsUri = getSharedEditsDir(nnCounter, nnCounter + nnIds.size() - 1);
                conf.set(DFS_NAMENODE_SHARED_EDITS_DIR_KEY, sharedEditsUri.toString());
                // Clean out the shared edits dir completely, including all subdirectories.
                FileUtil.fullyDelete(new File(sharedEditsUri));
            }
        }

        // Now format first NN and copy the storage directory from that node to the others.
        int i = 0;
        Collection<URI> prevNNDirs = null;
        int nnCounterForFormat = nnCounter;
        for (NNConf nn : nameservice.getNNs()) {
            initNameNodeConf(conf, nsId, MiniDFSClusterBridge.getNNConf_nnId(nn), manageNameDfsDirs,
                    enableManagedDfsDirsRedundancy, nnCounterForFormat);
            Collection<URI> namespaceDirs = FSNamesystem.getNamespaceDirs(conf);
            if (format) {
                for (URI nameDirUri : namespaceDirs) {
                    File nameDir = new File(nameDirUri);
                    if (nameDir.exists() && !FileUtil.fullyDelete(nameDir)) {
                        throw new IOException("Could not fully delete " + nameDir);
                    }
                }
                Collection<URI> checkpointDirs = Util.stringCollectionAsURIs(
                        conf.getTrimmedStringCollection(DFS_NAMENODE_CHECKPOINT_DIR_KEY));
                for (URI checkpointDirUri : checkpointDirs) {
                    File checkpointDir = new File(checkpointDirUri);
                    if (checkpointDir.exists() && !FileUtil.fullyDelete(checkpointDir)) {
                        throw new IOException("Could not fully delete " + checkpointDir);
                    }
                }
            }

            boolean formatThisOne = format;
            if (format && i++ > 0) {
                // Don't format the second NN in an HA setup - that
                // would result in it having a different clusterID,
                // block pool ID, etc. Instead, copy the name dirs
                // from the first one.
                formatThisOne = false;
                assert (null != prevNNDirs);
                copyNameDirs(prevNNDirs, namespaceDirs, conf);
            }

            nnCounterForFormat++;
            if (formatThisOne) {
                // Allow overriding clusterID for specific NNs to test
                // misconfiguration.
                if (MiniDFSClusterBridge.getNNConf_cliusterId(nn) == null) {
                    StartupOption.FORMAT.setClusterId(clusterId);
                } else {
                    StartupOption.FORMAT.setClusterId(MiniDFSClusterBridge.getNNConf_cliusterId(nn));
                }
                DFSTestUtil.formatNameNode(conf);
            }
            prevNNDirs = namespaceDirs;
        }

        // Start all Namenodes
        for (NNConf nn : nameservice.getNNs()) {
            initNameNodeConf(conf, nsId, MiniDFSClusterBridge.getNNConf_nnId(nn), manageNameDfsDirs,
                    enableManagedDfsDirsRedundancy, nnCounter);
            createNameNode(nnCounter, conf, numDataNodes, false, operation, clusterId, nsId,
                    MiniDFSClusterBridge.getNNConf_nnId(nn));
            // Record the last namenode uri
            if (nameNodes[nnCounter] != null && nameNodes[nnCounter].conf != null) {
                lastDefaultFileSystem = nameNodes[nnCounter].conf.get(FS_DEFAULT_NAME_KEY);
            }
            nnCounter++;
        }
        if (!federation && lastDefaultFileSystem != null) {
            // Set the default file system to the actual bind address of NN.
            conf.set(FS_DEFAULT_NAME_KEY, lastDefaultFileSystem);
        }
    }

}

From source file:com.mellanox.r4h.MiniDFSCluster.java

License:Apache License

public static void copyNameDirs(Collection<URI> srcDirs, Collection<URI> dstDirs, Configuration dstConf)
        throws IOException {
    URI srcDir = Lists.newArrayList(srcDirs).get(0);
    FileSystem dstFS = FileSystem.getLocal(dstConf).getRaw();
    for (URI dstDir : dstDirs) {
        Preconditions.checkArgument(!dstDir.equals(srcDir), "src and dst are the same: " + dstDir);
        File dstDirF = new File(dstDir);
        if (dstDirF.exists()) {
            if (!FileUtil.fullyDelete(dstDirF)) {
                throw new IOException("Unable to delete: " + dstDirF);
            }/*from   ww  w . j a va  2s .co  m*/
        }
        LOG.info("Copying namedir from primary node dir " + srcDir + " to " + dstDir);
        FileUtil.copy(new File(srcDir), dstFS, new Path(dstDir), false, dstConf);
    }
}

From source file:com.mellanox.r4h.MiniDFSCluster.java

License:Apache License

public void formatDataNodeDirs() throws IOException {
    base_dir = new File(determineDfsBaseDir());
    data_dir = new File(base_dir, "data");
    if (data_dir.exists() && !FileUtil.fullyDelete(data_dir)) {
        throw new IOException("Cannot remove data directory: " + data_dir);
    }//from  w  w  w.  j a  v  a 2s.c  om
}

From source file:com.pegasus.ResultInfo.java

License:Apache License

public int run(final String[] args) throws Exception {

    Configuration conf = getConf();
    final FileSystem fs = FileSystem.get(conf);
    edge_path = new Path(conf.get("edge_path"));
    all_vertices = new Path(conf.get("all_vertices"));
    curbm_path = new Path(conf.get("iteration_state"));
    tempbm_path = new Path(conf.get("stage1out"));
    nextbm_path = new Path(conf.get("stage2out"));
    output_path = new Path(conf.get("stage3out"));
    grapherOut_path = new Path(conf.get("grapherout"));
    nreducers = Integer.parseInt(conf.get("num_reducers"));
    local_output_path = conf.get("local_output");

    // initital cleanup
    fs.delete(tempbm_path, true);//from ww w.j a v a 2s  .c o m
    fs.delete(nextbm_path, true);
    fs.delete(output_path, true);
    fs.delete(curbm_path, true);
    fs.delete(grapherOut_path, true);
    FileUtil.fullyDelete(new File(local_output_path));
    fs.mkdirs(curbm_path);
    //fs.mkdirs(grapherOut_path);

    FileStatus[] statusArray = fs.listStatus(all_vertices);
    for (int index = 0; index < statusArray.length; index++) {
        Path temp = statusArray[index].getPath();
        FileUtil.copy(fs, temp, fs, curbm_path, false, conf);
    }

    make_symmetric = 1;

    System.out.println("\n-----===[PEGASUS: A Peta-Scale Graph Mining System]===-----\n");

    // Iteratively calculate neighborhood function. 
    // rotate directory
    for (int i = cur_iter; i < MAX_ITERATIONS; i++) {
        cur_iter++;

        System.out.println("configStage1");
        JobClient.runJob(configStage1());
        System.out.println("configStage2");
        JobClient.runJob(configStage2());
        System.out.println("configStage3");
        JobClient.runJob(configStage3());

        FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

        // copy neighborhood information from HDFS to local disk, and read it!
        String new_path = local_output_path + "/" + i;
        fs.copyToLocalFile(output_path, new Path(new_path));
        ResultInfo ri = readIterationOutput(new_path);

        changed_nodes[iter_counter] = ri.changed;
        changed_nodes[iter_counter] = ri.unchanged;

        iter_counter++;

        System.out.println("Hop " + i + " : changed = " + ri.changed + ", unchanged = " + ri.unchanged);
        fs.delete(curbm_path);
        fs.delete(tempbm_path);
        fs.delete(output_path);
        fs.rename(nextbm_path, curbm_path);

        // Stop when the minimum neighborhood doesn't change
        if (ri.changed == 0) {
            System.out.println("All the component ids converged. Finishing...");
            fs.rename(curbm_path, grapherOut_path);
            break;
        }
    }
    FileUtil.fullyDelete(FileSystem.getLocal(getConf()), new Path(local_output_path));

    // finishing.
    System.out.println("\n[PEGASUS] Connected component computed.");
    System.out.println("[PEGASUS] Total Iteration = " + iter_counter);
    return 0;
}

From source file:com.splicemachine.derby.impl.io.HdfsDirFileTest.java

License:Apache License

@BeforeClass
public static void beforeClass() throws Exception {
    // necessary for mapr
    HConfiguration.unwrapDelegate().set("fs.default.name", "file:///");

    FileUtil.fullyDelete(new File(localBaseDir).getAbsoluteFile());
}

From source file:com.twitter.elephanttwin.lucene.indexing.AbstractLuceneIndexingReducer.java

License:Apache License

@Override
public void cleanup(Reducer<KIN, VIN, NullWritable, NullWritable>.Context context) throws IOException {
    // This may take a while...
    indexer.close();//from w  w w.  ja  v  a  2 s  .  c om
    LOG.info("Done finalizing index!");

    LOG.info(cnt + " records added to the index");
    LOG.info(skipped + " records skipped");

    // Copy from local back to HDFS.
    Path destination = new Path(context.getConfiguration().get(HDFS_INDEX_LOCATION));
    LOG.info("final index destination: " + destination);
    LOG.info("copying from " + tmpIndex + " to " + destination);

    FileSystem fs = FileSystem.get(context.getConfiguration());

    if (!fs.exists(destination)) {
        fs.mkdirs(destination);
    }

    fs.copyFromLocalFile(new Path(tmpIndex.getAbsolutePath()), destination);
    LOG.info("copying complete!");

    // Clean up local tmp directory.
    FileUtil.fullyDelete(tmpIndex);
    LOG.info("local directory " + tmpIndex + " removed!");

    heartbeatThread.interrupt();
}

From source file:com.twitter.elephanttwin.lzo.retrieval.TestLzoIndexing.java

License:Open Source License

/**
 * first create a .lzo input file which contains ExciteLog data, then index it
 * to then create a .lzo.index file finally start an indexing job on
 * uid.//  w  w  w.ja  v  a 2 s . c o  m
 *
 * @throws Exception
 */
@BeforeClass
public static void setUp() throws Exception {

    conf = new Configuration();

    conf.set("io.compression.codecs", "com.hadoop.compression.lzo.LzopCodec");
    conf.setInt(LzoCodec.LZO_BUFFER_SIZE_KEY, 256);
    codec = new LzopCodec();
    codec.setConf(conf);
    FileUtil.fullyDelete(new File(TESTDIR));
    inputDir.mkdirs();

    // close any FileySystem from previous tests:
    FileSystem.get(conf).close();

    //create 3 files to test globs and test on single lzo block in a split;
    //create File 1, which has only one lzo block.
    FileSystem fs = FileSystem.get(conf);
    String baseFilePath = TESTDIR + INPUTDIR;
    LzoIndex index;
    int repeatFactor1 = 1;
    createLZOFile(baseFilePath + "11.lzo", repeatFactor1, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "11.lzo"));
    if (index.getNumberOfBlocks() > 1)
        throw new RuntimeException(baseFilePath + "11.lzo has more than one " + "lzo block");

    //create File 2, which has more than 1 lzo blocks.
    int repeatFactor2 = 10;
    createLZOFile(baseFilePath + "21.lzo", repeatFactor2, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "21.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "21.lzo has only one lzo block");

    //create a new lzo file 3 to test combining lzo blocks.

    int repeatFactor3 = 30;
    createLZOFile(baseFilePath + "31.lzo", repeatFactor3, true); //b64 format
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "31.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "31.lzo has only one lzo block");

    int repeatFactor4 = 1;
    createLZOFile(baseFilePath + "b11.lzo", repeatFactor4, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b11.lzo"));
    if (index.getNumberOfBlocks() > 1)
        throw new RuntimeException(baseFilePath + "b11.lzo has more than one " + "lzo block");

    //create File 2, which has more than 1 lzo blocks.
    int repeatFactor5 = 10;
    createLZOFile(baseFilePath + "b21.lzo", repeatFactor5, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b21.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "b21.lzo has only one lzo block");

    int repeatFactor6 = 30;
    createLZOFile(baseFilePath + "b31.lzo", repeatFactor6, true);
    index = LzoIndex.readIndex(fs, new Path(baseFilePath + "b31.lzo"));
    if (index.getNumberOfBlocks() < 2)
        throw new RuntimeException(baseFilePath + "b31.lzo has only one lzo block");

    //index the created lzo files without combining lzo blocks;
    String[] args = new String[] { "-jobpoolsize=1", "-index=" + TESTDIR + INDEXDIR, "-input=" + baseFilePath,
            "-inputformat=com.twitter.elephantbird.mapreduce.input." + "LzoThriftB64LineInputFormat",
            "-value_class=com.twitter.elephanttwin.gen.ExciteLog", "-columnname=uid", "-num_partitions=1",
            "-sleeptime=10", "-overwrite=false", };

    GenericOptionsParser optParser = new GenericOptionsParser(args);
    ToolRunner.run(conf, new LZOBlockLevelIndexingJobs(), optParser.getRemainingArgs());

    // the number of each key appears in all files
    repeatFactor = repeatFactor1 + repeatFactor2 + repeatFactor3 + repeatFactor4 + repeatFactor5
            + repeatFactor6;
    // number of rows has the same unique key in two files matching *1.lzo globs
    globsCnt = repeatFactor;

    pigServer = new PigServer(ExecType.LOCAL);
    // set lzo codec:
    pigServer.getPigContext().getProperties().setProperty("io.compression.codecs",
            "com.hadoop.compression.lzo.LzopCodec");
    pigServer.getPigContext().getProperties().setProperty("io.compression.codec.lzo.class",
            "com.hadoop.compression.lzo.LzoCodec");

    System.err.println("ALL DONE SETTING UP");
    // Thread.sleep(500000);
}

From source file:com.twitter.elephanttwin.lzo.retrieval.TestLzoIndexing.java

License:Open Source License

@AfterClass
public static void tearDown() throws IOException {
    if (pigServer != null)
        pigServer.shutdown();/*from w  w  w  . j  a  va 2  s. c o m*/
    FileUtil.fullyDelete(new File(TESTDIR));
}