Example usage for org.apache.hadoop.fs FileUtil stat2Paths

List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileUtil stat2Paths.

Prototype

public static Path[] stat2Paths(FileStatus[] stats) 

Source Link

Document

convert an array of FileStatus to an array of Path

Usage

From source file:boa.compiler.Test.java

License:Apache License

public static void main(String[] args) throws IOException, URISyntaxException {
    Configuration configuration = new Configuration();
    FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:54310"), configuration);
    FileStatus[] fileStatus = hdfs.listStatus(new Path("hdfs://localhost:54310/ast/"));
    Path[] paths = FileUtil.stat2Paths(fileStatus);
    System.out.println("***** Contents of the Directory *****");
    for (Path path : paths) {
        System.out.println(path);
    }// ww  w.j av a2  s .co  m
}

From source file:byte_import.HexastoreBulkImport.java

License:Open Source License

private void loadHFiles() throws Exception {
    conf = HBaseConfiguration.create();//from  w w w. jav a2s . c o m
    HBaseAdmin hadmin = new HBaseAdmin(conf);
    Path hfofDir = new Path("out");
    FileSystem fs = hfofDir.getFileSystem(conf);
    //if (!fs.exists(hfofDir)) {
    //  throw new FileNotFoundException("HFileOutputFormat dir " +
    //      hfofDir + " not found");
    //}
    FileStatus[] familyDirStatuses = fs.listStatus(hfofDir);
    //if (familyDirStatuses == null) {
    //  throw new FileNotFoundException("No families found in " + hfofDir);
    //}
    int length = 0;
    byte[][] splits = new byte[18000][];
    for (FileStatus stat : familyDirStatuses) {
        if (!stat.isDir()) {
            continue;
        }
        Path familyDir = stat.getPath();
        // Skip _logs, etc
        if (familyDir.getName().startsWith("_"))
            continue;
        //byte[] family = familyDir.getName().getBytes();
        Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir));
        for (Path hfile : hfiles) {
            if (hfile.getName().startsWith("_"))
                continue;

            HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf));
            //HFile.Reader hfr =    new HFile.Reader(fs, hfile, null, false);
            final byte[] first;
            try {
                hfr.loadFileInfo();
                first = hfr.getFirstRowKey();
            } finally {
                hfr.close();
            }
            splits[length] = first.clone();
            length++;
        }
    }
    //System.out.println(length);

    byte[][] splits1 = new byte[length][];

    for (int i = 0; i < splits1.length; i++) {
        splits1[i] = splits[i];
    }
    Arrays.sort(splits1, Bytes.BYTES_COMPARATOR);
    //HTableDescriptor desc = new HTableDescriptor("H2RDF");

    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("A");
    desc.addFamily(family);
    //for (int i = 0; i < splits.length; i++) {
    //   System.out.println(Bytes.toStringBinary(splits[i]));
    //}
    conf.setInt("zookeeper.session.timeout", 600000);
    if (hadmin.tableExists(TABLE_NAME)) {
        hadmin.disableTable(TABLE_NAME);
        hadmin.deleteTable(TABLE_NAME);
    } else {
        hadmin.createTable(desc, splits1);
    }
    //hadmin.createTable(desc);
    String[] args1 = new String[2];
    args1[0] = "out";
    args1[1] = TABLE_NAME;
    //args1[1]="new2";

    ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1);

}

From source file:cn.lhfei.hadoop.ch03.ListStatus.java

License:Apache License

public static void main(String[] args) {

    String uri = args[0];// ww  w.  j  a  va2 s . com
    Configuration conf = new Configuration();
    FileSystem fs = null;

    try {
        fs = FileSystem.get(URI.create(uri), conf);

        Path[] paths = new Path[args.length];
        for (int i = 0; i < paths.length; i++) {
            paths[i] = new Path(args[i]);
        }

        FileStatus[] status = fs.listStatus(paths);
        Path[] listPath = FileUtil.stat2Paths(status);

        for (Path p : listPath) {
            log.info(p.toString());
        }

    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:co.cask.hydrator.plugin.batch.action.FileAction.java

License:Apache License

@SuppressWarnings("ConstantConditions")
@Override/*from  w  w  w.  j a v a 2s .c o m*/
public void run(BatchActionContext context) throws Exception {
    if (!config.shouldRun(context)) {
        return;
    }
    config.substituteMacros(context);

    Job job = JobUtils.createInstance();
    Configuration conf = job.getConfiguration();
    FileSystem fileSystem = FileSystem.get(conf);
    Path[] paths;
    Path sourcePath = new Path(config.path);
    if (fileSystem.isDirectory(sourcePath)) {
        FileStatus[] status = fileSystem.listStatus(sourcePath);
        paths = FileUtil.stat2Paths(status);
    } else {
        paths = new Path[] { sourcePath };
    }

    //get regex pattern for file name filtering.
    boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern);
    if (patternSpecified) {
        regex = Pattern.compile(config.pattern);
    }

    switch (config.action.toLowerCase()) {
    case "delete":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                fileSystem.delete(path, true);
            }
        }
        break;
    case "move":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                Path targetFileMovePath = new Path(config.targetFolder, path.getName());
                fileSystem.rename(path, targetFileMovePath);
            }
        }
        break;
    case "archive":
        for (Path path : paths) {
            if (!patternSpecified || isFileNameMatch(path.getName())) {
                try (FSDataOutputStream archivedStream = fileSystem
                        .create(new Path(config.targetFolder, path.getName() + ".zip"));
                        ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream);
                        FSDataInputStream fdDataInputStream = fileSystem.open(path)) {
                    zipArchivedStream.putNextEntry(new ZipEntry(path.getName()));
                    int length;
                    byte[] buffer = new byte[1024];
                    while ((length = fdDataInputStream.read(buffer)) > 0) {
                        zipArchivedStream.write(buffer, 0, length);
                    }
                    zipArchivedStream.closeEntry();
                }
                fileSystem.delete(path, true);
            }
        }
        break;
    default:
        LOG.warn("No action required on the file.");
        break;
    }
}

From source file:co.cask.hydrator.plugin.batch.source.XMLReaderBatchSource.java

License:Apache License

@Override
public void onRunFinish(boolean succeeded, BatchSourceContext context) {
    super.onRunFinish(succeeded, context);
    try {/*from w  w w . ja  v  a 2  s.  c  o m*/
        FileStatus[] status = fileSystem.listStatus(tempDirectoryPath);
        long processingTime = new Date().getTime();
        Path[] paths = FileUtil.stat2Paths(status);
        if (paths != null && paths.length > 0) {
            for (Path path : paths) {
                try (FSDataInputStream input = fileSystem.open(path)) {
                    String key = input.readUTF();
                    processedFileTrackingTable.write(Bytes.toBytes(key), Bytes.toBytes(processingTime));
                }
            }
        }
    } catch (IOException exception) {
        LOG.error("IOException occurred while reading temp directory path : " + exception.getMessage());
    }
}

From source file:co.cask.hydrator.plugin.HDFSSinkTest.java

License:Apache License

@Test
public void testHDFSSink() throws Exception {
    String inputDatasetName = "input-hdfssinktest";
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputDatasetName));

    Path outputDir = dfsCluster.getFileSystem().getHomeDirectory();
    ETLStage sink = new ETLStage("HDFS",
            new ETLPlugin("HDFS", BatchSink.PLUGIN_TYPE,
                    ImmutableMap.<String, String>builder().put("path", outputDir.toUri().toString())
                            .put(Constants.Reference.REFERENCE_NAME, "HDFSinkTest").build(),
                    null));/*  w  ww  . j  a  v a 2  s  .  c o  m*/
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink)
            .addConnection(source.getName(), sink.getName()).build();

    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
    Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "HDFSTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);

    DataSetManager<Table> inputManager = getDataset(inputDatasetName);
    List<StructuredRecord> input = ImmutableList.of(
            StructuredRecord.builder(SCHEMA).set("ticker", "AAPL").set("num", 10).set("price", 400.23).build(),
            StructuredRecord.builder(SCHEMA).set("ticker", "CDAP").set("num", 13).set("price", 123.23).build());
    MockSource.writeInput(inputManager, input);

    MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
    mrManager.start();
    mrManager.waitForFinish(5, TimeUnit.MINUTES);

    Path[] outputFiles = FileUtil.stat2Paths(
            dfsCluster.getFileSystem().listStatus(outputDir, new Utils.OutputFileUtils.OutputFilesFilter()));
    Assert.assertNotNull(outputFiles);
    Assert.assertTrue(outputFiles.length > 0);
    int count = 0;
    List<String> lines = new ArrayList<>();
    for (Path path : outputFiles) {
        InputStream in = dfsCluster.getFileSystem().open(path);
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        String line;
        while ((line = reader.readLine()) != null) {
            lines.add(line);
            if (line.contains("AAPL") || line.contains("CDAP")) {
                count++;
            }
        }
        reader.close();
    }
    Assert.assertEquals(2, lines.size());
    Assert.assertEquals(2, count);
}

From source file:com.asakusafw.bulkloader.collector.ExportFileSend.java

License:Apache License

/**
 * ????TSV??/*from   w w  w.j av a 2  s.c  o  m*/
 * {@link com.asakusafw.bulkloader.transfer.FileList.Writer}????
 * @param <T> ?
 * @param targetTableModel Export??Model?
 * @param filePath Export
 * @param writer ?Writer
 * @param tableName ??
 * @return ?????????????????? -1
 * @throws BulkLoaderSystemException ??????
 */
protected <T extends Writable> long send(Class<T> targetTableModel, String filePath, FileList.Writer writer,
        String tableName) throws BulkLoaderSystemException {
    FileSystem fs = null;
    String fileName = null;

    // ??
    long maxSize = Long.parseLong(ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_LOAD_MAX_SIZE));

    try {
        TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel);
        Configuration conf = new Configuration();
        fs = FileSystem.get(new URI(filePath), conf);

        // ?????
        FileStatus[] status = fs.globStatus(new Path(filePath));
        Path[] listedPaths = FileUtil.stat2Paths(status);
        if (listedPaths == null) {
            LOG.info("TG-COLLECTOR-02006", tableName, filePath);
            return -1;
        } else {
            LOG.info("TG-COLLECTOR-02007", listedPaths.length, tableName, filePath);
        }
        long count = 0;
        boolean addEntry = false;
        for (Path path : listedPaths) {
            // ?????
            if (isSystemFile(path)) {
                continue;
            }

            // TODO ????
            // ??????
            ModelInput<T> input = TemporaryStorage.openInput(conf, targetTableModel, path);
            try {
                while (true) {
                    // 
                    addEntry = true;
                    fileName = FileNameUtil.createSendExportFileName(tableName, fileNameMap);
                    OutputStream output = writer.openNext(FileList.content(fileName));
                    try {
                        CountingOutputStream counter = new CountingOutputStream(output);
                        ModelOutput<T> modelOut = factory.createModelOutput(counter);
                        T model = factory.createModelObject();
                        LOG.info("TG-COLLECTOR-02004", tableName, path.toString(), fileName);

                        // ???ModelTSV??
                        boolean nextFile = false;
                        while (input.readTo(model)) {
                            // Modol???
                            modelOut.write(model);
                            count++;
                            // ???????
                            // char?byte?????????
                            // ??????(????)
                            if (counter.getByteCount() > maxSize) {
                                nextFile = true;
                                break;
                            }
                        }
                        modelOut.close();
                        LOG.info("TG-COLLECTOR-02005", tableName, path.toString(), fileName);

                        if (nextFile) {
                            // ???????
                            continue;
                        } else {
                            // ????????
                            break;
                        }
                    } finally {
                        output.close();
                    }
                }
            } finally {
                input.close();
            }
        }
        if (addEntry) {
            return count;
        } else {
            assert count == 0;
            return -1;
        }
    } catch (IOException e) {
        throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat
                .format("HDFS?{0} ???{1}", filePath, fileName));
    } catch (URISyntaxException e) {
        throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001",
                MessageFormat.format("HDFS???HDFS?{0}", filePath));
    } finally {
        if (fs != null) {
            try {
                fs.close();
            } catch (IOException e) {
                throw new BulkLoaderSystemException(e, this.getClass(), "TG-COLLECTOR-02001",
                        MessageFormat.format(
                                "HDFS???URI{0}",
                                filePath));
            }
        }
    }
}

From source file:com.asakusafw.cleaner.main.HDFSCleaner.java

License:Apache License

/**
 * ?//  ww w  . j ava 2  s .co  m
 * @param fs HDFS?
 * @param cleanPath HDFS??
 * @param isSetExecutionId ID????????
 * @param pattern 
 * @param keepDate ??
 * @param now ?
 * @param recursive ????
 * @return ?
 * @throws CleanerSystemException
 */
private boolean cleanDir(FileSystem fs, Path cleanPath, boolean isSetExecutionId, String pattern, int keepDate,
        Date now, boolean recursive) throws CleanerSystemException {
    try {
        if (!fs.exists(cleanPath)) {
            // ??????
            Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                    "??????", cleanPath.toString());
            return false;
        }
        if (!fs.getFileStatus(cleanPath).isDir()) {
            // ??????
            Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR,
                    "??????", cleanPath.toString());
            return false;
        }

        // ?
        Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE, cleanPath.toString());
        int cleanFileCount = 0;
        int cleanDirCount = 0;
        boolean result = true;
        FileStatus[] dirStatus = getListStatus(fs, cleanPath);
        Path[] listedPaths = FileUtil.stat2Paths(dirStatus);
        for (Path path : listedPaths) {
            FileStatus status = fs.getFileStatus(path);
            long lastModifiedTime = status.getModificationTime();
            if (status.isDir() && recursive) {
                // ????????
                if (isSetExecutionId) {
                    // ID??????MM???????
                    String executionId = path.getName();
                    if (isRunningJobFlow(executionId)) {
                        // ???????
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_EXEC, path.toString());
                        continue;
                    }
                }
                FileStatus[] childdirStatus = getListStatus(fs, path);
                if (childdirStatus.length == 0) {
                    // ???????
                    if (isExpired(lastModifiedTime, keepDate, now)) {
                        if (!fs.delete(path, false)) {
                            Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "",
                                    path.toString());
                            result = false;
                        } else {
                            cleanDirCount++;
                            Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString());
                        }
                    }
                } else {
                    // ?????????
                    if (cleanDir(fs, path, false, pattern, keepDate, now, recursive)) {
                        // ????????
                        childdirStatus = getListStatus(fs, path);
                        if (childdirStatus.length == 0) {
                            if (isExpired(lastModifiedTime, keepDate, now)) {
                                if (!fs.delete(path, false)) {
                                    Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "",
                                            path.toString());
                                    result = false;
                                } else {
                                    cleanDirCount++;
                                    Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString());
                                }
                            }
                        }
                    } else {
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString());
                        result = false;
                    }
                }
            } else if (!status.isDir()) {
                // ???????????
                if (isExpired(lastModifiedTime, keepDate, now) && isMatchPattern(path, pattern)) {
                    if (!fs.delete(path, false)) {
                        Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString());
                        result = false;
                    } else {
                        Log.log(CLASS, MessageIdConst.HCLN_DELETE_FILE, path.toString());
                        cleanFileCount++;
                    }
                }
            }
        }

        Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE_SUCCESS, cleanPath.toString(), cleanDirCount,
                cleanFileCount);

        return result;
    } catch (IOException e) {
        Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_EXCEPTION, cleanPath.getName());
        return false;
    }
}

From source file:com.asakusafw.testdriver.FlowPartTestDriver.java

License:Apache License

private void loadResult(String tablename, String excelFileName) throws IOException {
    Configuration conf = ConfigurationFactory.getDefault().newInstance();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.globStatus(new Path(computeOutputPath(fs, excelFileName)));
    Path[] listedPaths = FileUtil.stat2Paths(status);
    for (Path path : listedPaths) {
        if (isSystemFile(path)) {
            continue;
        }//from  w  ww . j  ava 2s . c o m
        LOG.info("????:Path=" + path);
        testUtils.loadFromTemporary(tablename, conf, path);
    }
}

From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java

License:Apache License

private void readMigratePartition(StaffSSControllerInterface sssc, int currentSuperStepCounter)
        throws IOException {
    BufferedReader br = null;/*  w  w  w . j  a  v a  2 s.  co m*/
    Path migratePartitionPath = new Path(migratePartitionDir);
    FileSystem fsFileSystem = FileSystem.get(this.getConf().getConf());
    FileStatus[] fs = fsFileSystem.listStatus(migratePartitionPath);
    Path[] listPath = FileUtil.stat2Paths(fs);
    for (Path p : listPath) {
        FSDataInputStream fsInput = fsFileSystem.open(p);
        br = new BufferedReader(new InputStreamReader(fsInput));
        String line = null;
        while (null != (line = br.readLine())) {
            String[] strs = line.split(":");
            this.partitioner.updateMigratePartition(new Text(strs[0]), Integer.parseInt(strs[1]));
        }
    }
}