Example usage for org.apache.hadoop.fs FileSystem delete

List of usage examples for org.apache.hadoop.fs FileSystem delete

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem delete.

Prototype

@Deprecated
public boolean delete(Path f) throws IOException 

Source Link

Document

Delete a file/directory.

Usage

From source file:cn.edu.hfut.dmic.webcollectorcluster.generator.Merge.java

public static void install(Path crawldb) throws IOException {
    FileSystem fs = crawldb.getFileSystem(CrawlerConfiguration.create());
    Path newdb = new Path(crawldb, "new");
    Path currentdb = new Path(crawldb, "current");
    Path olddb = new Path(crawldb, "old");
    if (fs.exists(currentdb)) {
        if (fs.exists(olddb)) {
            fs.delete(olddb);
        }/* w  w w  . j a  v  a2 s.  c  o  m*/
        fs.rename(currentdb, olddb);
    }
    fs.mkdirs(crawldb);
    fs.rename(newdb, currentdb);
}

From source file:com.bigdog.hadoop.hdfs.HDFS_Test.java

public boolean deleteHDFSFile(String dst) throws IOException {
    Configuration config = new Configuration();
    FileSystem hdfs = FileSystem.get(config);

    Path path = new Path(dst);
    boolean isDeleted = hdfs.delete(path);

    hdfs.close();/*from   www  .ja v a 2  s .  com*/

    return isDeleted;
}

From source file:com.hadoop.secondarysort.SecondarySortDESC.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    // if (otherArgs.length != 2) {
    // System.err.println("Usage: secondarysrot <in> <out>");
    // System.exit(2);
    // }/*  w  ww. j av a  2 s  .c  o m*/

    // JobConf jobConf = new JobConf();

    Job job = new Job(conf, "secondary sort");
    job.setJarByClass(SecondarySortDESC.class);
    job.setMapperClass(MapClass.class);
    job.setReducerClass(Reduce.class);

    // group and partition by the first int in the pair
    job.setPartitionerClass(FirstPartitioner.class);
    job.setGroupingComparatorClass(FirstGroupingComparator.class);
    // conf.setClass("mapred.output.key.comparator.class",
    // KeyComparator.class, RawComparator.class);
    // job.setSortComparatorClass(SecondGroupingComparator.class);
    // the map output is IntPair, IntWritable
    job.setMapOutputKeyClass(IntPair.class);
    job.setMapOutputValueClass(IntWritable.class);

    // the reduce output is Text, IntWritable
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    FileInputFormat.addInputPath(job, new Path(inPath));
    FileOutputFormat.setOutputPath(job, new Path(outPath));
    FileSystem fileSystem = FileSystem.get(conf);
    if (fileSystem.exists(new Path(outPath))) {
        fileSystem.delete(new Path(outPath));
    }
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.inmobi.conduit.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];/*from   w w  w .ja v a  2s .c o  m*/
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList;
    try {
        fileList = fs.listStatus(new Path(dir));
    } catch (FileNotFoundException fe) {
        fileList = null;
    }
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.databus.distcp.DistcpBaseService.java

License:Apache License

protected void doFinalCommit(Map<Path, FileSystem> consumePaths) throws Exception {
    //commit distcp consume Path from remote cluster
    Set<Map.Entry<Path, FileSystem>> consumeEntries = consumePaths.entrySet();
    for (Map.Entry<Path, FileSystem> consumePathEntry : consumeEntries) {
        FileSystem fileSystem = consumePathEntry.getValue();
        Path consumePath = consumePathEntry.getKey();
        fileSystem.delete(consumePath);
        LOG.debug("Deleting/Commiting [" + consumePath + "]");
    }/*from  w  w w.j a va 2 s .c o m*/

}

From source file:com.inmobi.databus.readers.TestDatabusEmptyFolders.java

License:Apache License

private Path removeFilesIfAny() throws IOException {
    FileSystem fs = FileSystem.get(cluster.getHadoopConf());
    Path streamDir = DatabusUtil.getStreamDir(StreamType.LOCAL, new Path(cluster.getRootDir()), testStream);
    Path minuteDirPath = DatabusStreamReader.getMinuteDirPath(streamDir,
            modifyTime(new Date(), Calendar.MINUTE, -10));
    FileStatus[] fileStatuses = fs.listStatus(minuteDirPath.getParent());
    for (FileStatus folders : fileStatuses) {
        if (!folders.isDir()) {
            continue;
        }//ww  w .  ja  v a  2 s . c  om
        LOG.debug("Folder=" + folders.getPath().toString());
        FileStatus[] files = fs.listStatus(folders.getPath());
        for (FileStatus file : files) {
            if (file.isDir()) {
                continue;
            }
            fs.delete(file.getPath());
        }
    }
    Arrays.sort(fileStatuses, new java.util.Comparator<FileStatus>() {

        @Override
        public int compare(FileStatus o1, FileStatus o2) {
            try {
                return getDateFromFile(o1.getPath().toString()).before(getDateFromFile(o2.getPath().toString()))
                        ? -1
                        : 1;
            } catch (ParseException e) {
                e.printStackTrace();
            }
            return 0;
        }
    });
    return fileStatuses[fileStatuses.length - 1].getPath();
}

From source file:com.inmobi.databus.utils.CollapseFilesInDir.java

License:Apache License

public static void main(String[] args) throws Exception {
    Configuration configuration = new Configuration();
    configuration.set("fs.default.name", args[0]);
    String dir = args[1];//ww  w.jav a  2 s  .co  m
    FileSystem fs = FileSystem.get(configuration);
    FileStatus[] fileList = fs.listStatus(new Path(dir));
    if (fileList != null) {
        if (fileList.length > 1) {
            Set<Path> sourceFiles = new HashSet<Path>();
            Set<String> consumePaths = new HashSet<String>();
            //inputPath has have multiple files due to backlog
            //read all and create a tmp file
            for (int i = 0; i < fileList.length; i++) {
                Path consumeFilePath = fileList[i].getPath().makeQualified(fs);
                sourceFiles.add(consumeFilePath);
                FSDataInputStream fsDataInputStream = fs.open(consumeFilePath);
                try {
                    while (fsDataInputStream.available() > 0) {
                        String fileName = fsDataInputStream.readLine();
                        if (fileName != null) {
                            consumePaths.add(fileName.trim());
                            System.out.println("Adding [" + fileName + "] to pull");
                        }
                    }
                } finally {
                    fsDataInputStream.close();
                }
            }
            Path finalPath = new Path(dir, new Long(System.currentTimeMillis()).toString());
            FSDataOutputStream out = fs.create(finalPath);
            BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(out));
            try {
                for (String consumePath : consumePaths) {
                    System.out.println("Adding sourceFile [" + consumePath + "] to" + " distcp " + "FinalList");
                    writer.write(consumePath);
                    writer.write("\n");
                }
            } finally {
                writer.close();
            }
            LOG.warn("Final File - [" + finalPath + "]");
            for (Path deletePath : sourceFiles) {
                System.out.println("Deleting - [" + deletePath + "]");
                fs.delete(deletePath);
            }
        }
    }
}

From source file:com.inmobi.messaging.consumer.databus.TestDatabusConsumer.java

License:Apache License

@Test
public void testConsumerWithStopTimeBeyondCheckpoint() throws Exception {
    ClientConfig config = loadConfig();/*from   w ww .j  a v a 2 s .c  om*/
    config.set(DatabusConsumerConfig.databusRootDirsConfig, rootDirs[0].toUri().toString());

    FileSystem fs = rootDirs[0].getFileSystem(conf);
    try {
        // Deleting the dummy collector(COLLECTOR_PREFIX i.e. which does not have
        // any files to read).
        // Collector won't have any checkpoint if there are no files to read.
        // In this test, we wanted to test whether consumer is stopped if the
        // stop time is beyond the checkpoint.
        // If checkpoint is not present then consumer won't be closed completely.
        fs.delete(new Path(rootDirs[0].toUri().toString(), "data/" + testStream + "/" + COLLECTOR_PREFIX));
        Date absoluteStartTime = CollectorStreamReader.getDateFromCollectorFile(dataFiles[0]);
        config.set(MessageConsumerFactory.ABSOLUTE_START_TIME,
                AbstractMessageConsumer.minDirFormat.get().format(absoluteStartTime));
        config.set(DatabusConsumerConfig.checkpointDirConfig, ck12);
        Date stopDate = CollectorStreamReader.getDateFromCollectorFile(dataFiles[1]);
        Date stopDateForCheckpoint = CollectorStreamReader.getDateFromCollectorFile(dataFiles[0]);
        config.set(DatabusConsumerConfig.stopDateConfig,
                AbstractMessageConsumer.minDirFormat.get().format(stopDate));
        ConsumerUtil.testConsumerWithStopTimeBeyondCheckpoint(config, testStream, consumerName,
                absoluteStartTime, false, stopDateForCheckpoint);
    } finally {
        // create a dummy collector directory back
        fs.mkdirs(new Path(rootDirs[0].toUri().toString(), "data/" + testStream + "/" + COLLECTOR_PREFIX));
    }
}

From source file:com.liveramp.hank.hadoop.DomainBuilderAbstractOutputFormat.java

License:Apache License

public static void moveContentsAndDelete(Path srcDir, Path dstDir, FileSystem fs, Logger logger)
        throws IOException {
    if (!fs.exists(srcDir)) {
        return;/* w w w. java  2s.com*/
    }
    if (fs.exists(srcDir) && !fs.isDirectory(srcDir)) {
        throw new IllegalArgumentException(srcDir + " is not a directory");
    }
    if (fs.exists(dstDir) && !fs.isDirectory(dstDir)) {
        throw new IllegalArgumentException(dstDir + " is not a directory");
    }
    if (logger.isDebugEnabled()) {
        logger.debug("Moving contents of: " + srcDir + " to: " + dstDir);
    }
    FileStatus[] files = fs.listStatus(srcDir);
    for (FileStatus file : files) {
        Path sourcePath = file.getPath();
        Path targetPath = new Path(dstDir, file.getPath().getName());
        if (logger.isDebugEnabled()) {
            logger.debug("Moving: " + sourcePath + " to: " + targetPath);
        }
        if (!fs.mkdirs(targetPath.getParent())) {
            throw new IOException("Failed at creating directory " + targetPath.getParent());
        }
        if (!fs.rename(sourcePath, targetPath)) {
            throw new IOException("Failed at renaming " + sourcePath + " to " + targetPath);
        }
    }
    fs.delete(srcDir);
}

From source file:com.manning.hip.ch4.joins.improved.impl.OptimizedDataJoinJob.java

License:Apache License

public static JobConf createDataJoinJob(String args[]) throws IOException {

    String inputDir = args[0];/*from  w w  w. ja  v  a2s .c  om*/
    String outputDir = args[1];
    Class inputFormat = SequenceFileInputFormat.class;
    if (args[2].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileInputFormat: " + args[2]);
    } else {
        System.out.println("Using TextInputFormat: " + args[2]);
        inputFormat = TextInputFormat.class;
    }
    int numOfReducers = Integer.parseInt(args[3]);
    Class mapper = getClassByName(args[4]);
    Class reducer = getClassByName(args[5]);
    Class mapoutputValueClass = getClassByName(args[6]);
    Class outputFormat = TextOutputFormat.class;
    Class outputValueClass = Text.class;
    if (args[7].compareToIgnoreCase("text") != 0) {
        System.out.println("Using SequenceFileOutputFormat: " + args[7]);
        outputFormat = SequenceFileOutputFormat.class;
        outputValueClass = getClassByName(args[7]);
    } else {
        System.out.println("Using TextOutputFormat: " + args[7]);
    }
    long maxNumOfValuesPerGroup = 100;
    String jobName = "";
    if (args.length > 8) {
        maxNumOfValuesPerGroup = Long.parseLong(args[8]);
    }
    if (args.length > 9) {
        jobName = args[9];
    }
    Configuration defaults = new Configuration();
    JobConf job = new JobConf(defaults, OptimizedDataJoinJob.class);
    job.setJobName("DataJoinJob: " + jobName);

    FileSystem fs = FileSystem.get(defaults);
    fs.delete(new Path(outputDir));
    FileInputFormat.setInputPaths(job, inputDir);

    job.setInputFormat(inputFormat);

    job.setMapperClass(mapper);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.setOutputFormat(outputFormat);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    job.setMapOutputKeyClass(CompositeKey.class);
    job.setMapOutputValueClass(mapoutputValueClass);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(outputValueClass);
    job.setReducerClass(reducer);

    job.setPartitionerClass(CompositeKeyPartitioner.class);
    job.setOutputKeyComparatorClass(CompositeKeyComparator.class);
    job.setOutputValueGroupingComparator(CompositeKeyOnlyComparator.class);

    job.setNumMapTasks(1);
    job.setNumReduceTasks(numOfReducers);
    job.setLong("datajoin.maxNumOfValuesPerGroup", maxNumOfValuesPerGroup);
    return job;
}