List of usage examples for org.apache.hadoop.fs FileUtil stat2Paths
public static Path[] stat2Paths(FileStatus[] stats)
From source file:boa.compiler.Test.java
License:Apache License
public static void main(String[] args) throws IOException, URISyntaxException { Configuration configuration = new Configuration(); FileSystem hdfs = FileSystem.get(new URI("hdfs://localhost:54310"), configuration); FileStatus[] fileStatus = hdfs.listStatus(new Path("hdfs://localhost:54310/ast/")); Path[] paths = FileUtil.stat2Paths(fileStatus); System.out.println("***** Contents of the Directory *****"); for (Path path : paths) { System.out.println(path); }// ww w.j av a2 s .co m }
From source file:byte_import.HexastoreBulkImport.java
License:Open Source License
private void loadHFiles() throws Exception { conf = HBaseConfiguration.create();//from w w w. jav a2s . c o m HBaseAdmin hadmin = new HBaseAdmin(conf); Path hfofDir = new Path("out"); FileSystem fs = hfofDir.getFileSystem(conf); //if (!fs.exists(hfofDir)) { // throw new FileNotFoundException("HFileOutputFormat dir " + // hfofDir + " not found"); //} FileStatus[] familyDirStatuses = fs.listStatus(hfofDir); //if (familyDirStatuses == null) { // throw new FileNotFoundException("No families found in " + hfofDir); //} int length = 0; byte[][] splits = new byte[18000][]; for (FileStatus stat : familyDirStatuses) { if (!stat.isDir()) { continue; } Path familyDir = stat.getPath(); // Skip _logs, etc if (familyDir.getName().startsWith("_")) continue; //byte[] family = familyDir.getName().getBytes(); Path[] hfiles = FileUtil.stat2Paths(fs.listStatus(familyDir)); for (Path hfile : hfiles) { if (hfile.getName().startsWith("_")) continue; HFile.Reader hfr = HFile.createReader(fs, hfile, new CacheConfig(conf)); //HFile.Reader hfr = new HFile.Reader(fs, hfile, null, false); final byte[] first; try { hfr.loadFileInfo(); first = hfr.getFirstRowKey(); } finally { hfr.close(); } splits[length] = first.clone(); length++; } } //System.out.println(length); byte[][] splits1 = new byte[length][]; for (int i = 0; i < splits1.length; i++) { splits1[i] = splits[i]; } Arrays.sort(splits1, Bytes.BYTES_COMPARATOR); //HTableDescriptor desc = new HTableDescriptor("H2RDF"); HTableDescriptor desc = new HTableDescriptor(TABLE_NAME); HColumnDescriptor family = new HColumnDescriptor("A"); desc.addFamily(family); //for (int i = 0; i < splits.length; i++) { // System.out.println(Bytes.toStringBinary(splits[i])); //} conf.setInt("zookeeper.session.timeout", 600000); if (hadmin.tableExists(TABLE_NAME)) { hadmin.disableTable(TABLE_NAME); hadmin.deleteTable(TABLE_NAME); } else { hadmin.createTable(desc, splits1); } //hadmin.createTable(desc); String[] args1 = new String[2]; args1[0] = "out"; args1[1] = TABLE_NAME; //args1[1]="new2"; ToolRunner.run(new LoadIncrementalHFiles(HBaseConfiguration.create()), args1); }
From source file:cn.lhfei.hadoop.ch03.ListStatus.java
License:Apache License
public static void main(String[] args) { String uri = args[0];// ww w. j a va2 s . com Configuration conf = new Configuration(); FileSystem fs = null; try { fs = FileSystem.get(URI.create(uri), conf); Path[] paths = new Path[args.length]; for (int i = 0; i < paths.length; i++) { paths[i] = new Path(args[i]); } FileStatus[] status = fs.listStatus(paths); Path[] listPath = FileUtil.stat2Paths(status); for (Path p : listPath) { log.info(p.toString()); } } catch (IOException e) { e.printStackTrace(); } }
From source file:co.cask.hydrator.plugin.batch.action.FileAction.java
License:Apache License
@SuppressWarnings("ConstantConditions") @Override/*from w w w. j a v a 2s .c o m*/ public void run(BatchActionContext context) throws Exception { if (!config.shouldRun(context)) { return; } config.substituteMacros(context); Job job = JobUtils.createInstance(); Configuration conf = job.getConfiguration(); FileSystem fileSystem = FileSystem.get(conf); Path[] paths; Path sourcePath = new Path(config.path); if (fileSystem.isDirectory(sourcePath)) { FileStatus[] status = fileSystem.listStatus(sourcePath); paths = FileUtil.stat2Paths(status); } else { paths = new Path[] { sourcePath }; } //get regex pattern for file name filtering. boolean patternSpecified = !Strings.isNullOrEmpty(config.pattern); if (patternSpecified) { regex = Pattern.compile(config.pattern); } switch (config.action.toLowerCase()) { case "delete": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { fileSystem.delete(path, true); } } break; case "move": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { Path targetFileMovePath = new Path(config.targetFolder, path.getName()); fileSystem.rename(path, targetFileMovePath); } } break; case "archive": for (Path path : paths) { if (!patternSpecified || isFileNameMatch(path.getName())) { try (FSDataOutputStream archivedStream = fileSystem .create(new Path(config.targetFolder, path.getName() + ".zip")); ZipOutputStream zipArchivedStream = new ZipOutputStream(archivedStream); FSDataInputStream fdDataInputStream = fileSystem.open(path)) { zipArchivedStream.putNextEntry(new ZipEntry(path.getName())); int length; byte[] buffer = new byte[1024]; while ((length = fdDataInputStream.read(buffer)) > 0) { zipArchivedStream.write(buffer, 0, length); } zipArchivedStream.closeEntry(); } fileSystem.delete(path, true); } } break; default: LOG.warn("No action required on the file."); break; } }
From source file:co.cask.hydrator.plugin.batch.source.XMLReaderBatchSource.java
License:Apache License
@Override public void onRunFinish(boolean succeeded, BatchSourceContext context) { super.onRunFinish(succeeded, context); try {/*from w w w . ja v a 2 s. c o m*/ FileStatus[] status = fileSystem.listStatus(tempDirectoryPath); long processingTime = new Date().getTime(); Path[] paths = FileUtil.stat2Paths(status); if (paths != null && paths.length > 0) { for (Path path : paths) { try (FSDataInputStream input = fileSystem.open(path)) { String key = input.readUTF(); processedFileTrackingTable.write(Bytes.toBytes(key), Bytes.toBytes(processingTime)); } } } } catch (IOException exception) { LOG.error("IOException occurred while reading temp directory path : " + exception.getMessage()); } }
From source file:co.cask.hydrator.plugin.HDFSSinkTest.java
License:Apache License
@Test public void testHDFSSink() throws Exception { String inputDatasetName = "input-hdfssinktest"; ETLStage source = new ETLStage("source", MockSource.getPlugin(inputDatasetName)); Path outputDir = dfsCluster.getFileSystem().getHomeDirectory(); ETLStage sink = new ETLStage("HDFS", new ETLPlugin("HDFS", BatchSink.PLUGIN_TYPE, ImmutableMap.<String, String>builder().put("path", outputDir.toUri().toString()) .put(Constants.Reference.REFERENCE_NAME, "HDFSinkTest").build(), null));/* w ww . j a v a 2 s . c o m*/ ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink) .addConnection(source.getName(), sink.getName()).build(); AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig); Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "HDFSTest"); ApplicationManager appManager = deployApplication(appId, appRequest); DataSetManager<Table> inputManager = getDataset(inputDatasetName); List<StructuredRecord> input = ImmutableList.of( StructuredRecord.builder(SCHEMA).set("ticker", "AAPL").set("num", 10).set("price", 400.23).build(), StructuredRecord.builder(SCHEMA).set("ticker", "CDAP").set("num", 13).set("price", 123.23).build()); MockSource.writeInput(inputManager, input); MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME); mrManager.start(); mrManager.waitForFinish(5, TimeUnit.MINUTES); Path[] outputFiles = FileUtil.stat2Paths( dfsCluster.getFileSystem().listStatus(outputDir, new Utils.OutputFileUtils.OutputFilesFilter())); Assert.assertNotNull(outputFiles); Assert.assertTrue(outputFiles.length > 0); int count = 0; List<String> lines = new ArrayList<>(); for (Path path : outputFiles) { InputStream in = dfsCluster.getFileSystem().open(path); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line; while ((line = reader.readLine()) != null) { lines.add(line); if (line.contains("AAPL") || line.contains("CDAP")) { count++; } } reader.close(); } Assert.assertEquals(2, lines.size()); Assert.assertEquals(2, count); }
From source file:com.asakusafw.bulkloader.collector.ExportFileSend.java
License:Apache License
/** * ????TSV??/*from w w w.j av a 2 s.c o m*/ * {@link com.asakusafw.bulkloader.transfer.FileList.Writer}???? * @param <T> ? * @param targetTableModel Export??Model? * @param filePath Export * @param writer ?Writer * @param tableName ?? * @return ?????????????????? -1 * @throws BulkLoaderSystemException ?????? */ protected <T extends Writable> long send(Class<T> targetTableModel, String filePath, FileList.Writer writer, String tableName) throws BulkLoaderSystemException { FileSystem fs = null; String fileName = null; // ?? long maxSize = Long.parseLong(ConfigurationLoader.getProperty(Constants.PROP_KEY_EXP_LOAD_MAX_SIZE)); try { TsvIoFactory<T> factory = new TsvIoFactory<>(targetTableModel); Configuration conf = new Configuration(); fs = FileSystem.get(new URI(filePath), conf); // ????? FileStatus[] status = fs.globStatus(new Path(filePath)); Path[] listedPaths = FileUtil.stat2Paths(status); if (listedPaths == null) { LOG.info("TG-COLLECTOR-02006", tableName, filePath); return -1; } else { LOG.info("TG-COLLECTOR-02007", listedPaths.length, tableName, filePath); } long count = 0; boolean addEntry = false; for (Path path : listedPaths) { // ????? if (isSystemFile(path)) { continue; } // TODO ???? // ?????? ModelInput<T> input = TemporaryStorage.openInput(conf, targetTableModel, path); try { while (true) { // addEntry = true; fileName = FileNameUtil.createSendExportFileName(tableName, fileNameMap); OutputStream output = writer.openNext(FileList.content(fileName)); try { CountingOutputStream counter = new CountingOutputStream(output); ModelOutput<T> modelOut = factory.createModelOutput(counter); T model = factory.createModelObject(); LOG.info("TG-COLLECTOR-02004", tableName, path.toString(), fileName); // ???ModelTSV?? boolean nextFile = false; while (input.readTo(model)) { // Modol??? modelOut.write(model); count++; // ??????? // char?byte????????? // ??????(????) if (counter.getByteCount() > maxSize) { nextFile = true; break; } } modelOut.close(); LOG.info("TG-COLLECTOR-02005", tableName, path.toString(), fileName); if (nextFile) { // ??????? continue; } else { // ???????? break; } } finally { output.close(); } } } finally { input.close(); } } if (addEntry) { return count; } else { assert count == 0; return -1; } } catch (IOException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat .format("HDFS?{0} ???{1}", filePath, fileName)); } catch (URISyntaxException e) { throw new BulkLoaderSystemException(e, getClass(), "TG-COLLECTOR-02001", MessageFormat.format("HDFS???HDFS?{0}", filePath)); } finally { if (fs != null) { try { fs.close(); } catch (IOException e) { throw new BulkLoaderSystemException(e, this.getClass(), "TG-COLLECTOR-02001", MessageFormat.format( "HDFS???URI{0}", filePath)); } } } }
From source file:com.asakusafw.cleaner.main.HDFSCleaner.java
License:Apache License
/** * ?// ww w . j ava 2 s .co m * @param fs HDFS? * @param cleanPath HDFS?? * @param isSetExecutionId ID???????? * @param pattern * @param keepDate ?? * @param now ? * @param recursive ???? * @return ? * @throws CleanerSystemException */ private boolean cleanDir(FileSystem fs, Path cleanPath, boolean isSetExecutionId, String pattern, int keepDate, Date now, boolean recursive) throws CleanerSystemException { try { if (!fs.exists(cleanPath)) { // ?????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "??????", cleanPath.toString()); return false; } if (!fs.getFileStatus(cleanPath).isDir()) { // ?????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_ERROR, "??????", cleanPath.toString()); return false; } // ? Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE, cleanPath.toString()); int cleanFileCount = 0; int cleanDirCount = 0; boolean result = true; FileStatus[] dirStatus = getListStatus(fs, cleanPath); Path[] listedPaths = FileUtil.stat2Paths(dirStatus); for (Path path : listedPaths) { FileStatus status = fs.getFileStatus(path); long lastModifiedTime = status.getModificationTime(); if (status.isDir() && recursive) { // ???????? if (isSetExecutionId) { // ID??????MM??????? String executionId = path.getName(); if (isRunningJobFlow(executionId)) { // ??????? Log.log(CLASS, MessageIdConst.HCLN_CLEN_DIR_EXEC, path.toString()); continue; } } FileStatus[] childdirStatus = getListStatus(fs, path); if (childdirStatus.length == 0) { // ??????? if (isExpired(lastModifiedTime, keepDate, now)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { cleanDirCount++; Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString()); } } } else { // ????????? if (cleanDir(fs, path, false, pattern, keepDate, now, recursive)) { // ???????? childdirStatus = getListStatus(fs, path); if (childdirStatus.length == 0) { if (isExpired(lastModifiedTime, keepDate, now)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { cleanDirCount++; Log.log(CLASS, MessageIdConst.HCLN_DIR_DELETE, path.toString()); } } } } else { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } } } else if (!status.isDir()) { // ??????????? if (isExpired(lastModifiedTime, keepDate, now) && isMatchPattern(path, pattern)) { if (!fs.delete(path, false)) { Log.log(CLASS, MessageIdConst.HCLN_CLEN_FAIL, "", path.toString()); result = false; } else { Log.log(CLASS, MessageIdConst.HCLN_DELETE_FILE, path.toString()); cleanFileCount++; } } } } Log.log(CLASS, MessageIdConst.HCLN_FILE_DELETE_SUCCESS, cleanPath.toString(), cleanDirCount, cleanFileCount); return result; } catch (IOException e) { Log.log(e, CLASS, MessageIdConst.HCLN_CLEN_DIR_EXCEPTION, cleanPath.getName()); return false; } }
From source file:com.asakusafw.testdriver.FlowPartTestDriver.java
License:Apache License
private void loadResult(String tablename, String excelFileName) throws IOException { Configuration conf = ConfigurationFactory.getDefault().newInstance(); FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.globStatus(new Path(computeOutputPath(fs, excelFileName))); Path[] listedPaths = FileUtil.stat2Paths(status); for (Path path : listedPaths) { if (isSystemFile(path)) { continue; }//from w ww . j ava 2s . c o m LOG.info("????:Path=" + path); testUtils.loadFromTemporary(tablename, conf, path); } }
From source file:com.chinamobile.bcbsp.bspstaff.BSPStaff.java
License:Apache License
private void readMigratePartition(StaffSSControllerInterface sssc, int currentSuperStepCounter) throws IOException { BufferedReader br = null;/* w w w . j a v a 2 s. co m*/ Path migratePartitionPath = new Path(migratePartitionDir); FileSystem fsFileSystem = FileSystem.get(this.getConf().getConf()); FileStatus[] fs = fsFileSystem.listStatus(migratePartitionPath); Path[] listPath = FileUtil.stat2Paths(fs); for (Path p : listPath) { FSDataInputStream fsInput = fsFileSystem.open(p); br = new BufferedReader(new InputStreamReader(fsInput)); String line = null; while (null != (line = br.readLine())) { String[] strs = line.split(":"); this.partitioner.updateMigratePartition(new Text(strs[0]), Integer.parseInt(strs[1])); } } }