List of usage examples for org.apache.hadoop.fs FileStatus getPath
public Path getPath()
From source file:com.conductor.s3.S3InputFormatUtils.java
License:Apache License
/** * Converts the {@link org.apache.hadoop.fs.FileStatus}s to {@link org.apache.hadoop.mapred.InputSplit}s (MRV1 API). * <p>//from ww w. j a v a 2 s . c om * This is taken directly from {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, less any file system * operations that do not make sense when using {@code S3}. * * @param files * the files to convert * @param minSize * the minimum size of the splits * @param maxSize * the maximum size of the splits * @return the splits. */ static List<InputSplit> convertToInputSplitsMRV1(final Iterable<FileStatus> files, final long minSize, final long maxSize) { final List<InputSplit> splits = Lists.newArrayList(); for (final FileStatus file : files) { // check for valid data for this input format checkArgument(!file.isDirectory(), "Cannot pass directories to this method!"); final String path = file.getPath().toString(); checkArgument(path.startsWith("s3:") || path.startsWith("s3n:"), "Expected S3 input"); // create splits out of file final long length = file.getLen(); if (length > 0) { long blockSize = file.getBlockSize(); long splitSize = computeSplitSize(blockSize, minSize, maxSize); long bytesRemaining = length; while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) { splits.add(new FileSplit(file.getPath(), length - bytesRemaining, splitSize, S3_SPLIT_HOST)); bytesRemaining -= splitSize; } if (bytesRemaining != 0) { splits.add( new FileSplit(file.getPath(), length - bytesRemaining, bytesRemaining, S3_SPLIT_HOST)); } } } return splits; }
From source file:com.constellio.sdk.tests.FactoriesTestFeatures.java
private void deleteFromHadoop(String user, String url) { System.setProperty("HADOOP_USER_NAME", user); Configuration hadoopConfig = new Configuration(); if (url == null || user == null) { throw new RuntimeException("No config"); }//from www.j a va2 s . c o m hadoopConfig.set("fs.defaultFS", url); hadoopConfig.set("hadoop.job.ugi", user); try { FileSystem hdfs = FileSystem.get(hadoopConfig); for (FileStatus file : hdfs.globStatus(new Path("*"))) { hdfs.delete(file.getPath(), true); } } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.conversantmedia.mapreduce.example.distribute.DistributedObjectExample.java
License:Apache License
/** * Distributed {@link Set} containing the prepared list of blacklisted words. * //from w w w .ja v a 2 s. c o m * @return Set the set of blacklisted words * @throws IOException if it fails to read in the file */ @Distribute public Set<String> getBlacklist() throws IOException { Set<String> blacklist = null; if (StringUtils.isNotBlank(context.blacklist)) { blacklist = new HashSet<>(); InputStreamReader reader = null; try { FileSystem fs = FileSystem.get(new Configuration()); FileStatus file = fs.getFileStatus(new Path(context.blacklist)); reader = new InputStreamReader(fs.open(file.getPath())); for (String line : IOUtils.readLines(reader)) { blacklist.add(line); } } finally { IOUtils.closeQuietly(reader); } } return blacklist; }
From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java
License:Apache License
@DriverInit public void copyFilesToWorking() throws IOException { // Copy the input files into the 'workingDir' FileSystem fs = FileSystem.get(getConf()); this.workingDirectory = new Path("/tmp/" + UUID.randomUUID().toString()); fs.mkdirs(workingDirectory);//from ww w . j av a 2 s.c o m FileStatus[] files = fs.globStatus(new Path(context.getInput())); for (FileStatus file : files) { Path dest = new Path(workingDirectory, file.getPath().getName()); FileUtil.copy(fs, file.getPath(), fs, dest, false, getConf()); } }
From source file:com.conversantmedia.mapreduce.io.avro.CombineAvroKeyFileInputFormat.java
License:Apache License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { // Get the list from our parent... List<FileStatus> result = new ArrayList<>(); // Loop through and remove any that are empty for (FileStatus file : super.listStatus(job)) { if (file.getLen() < 1) { logger().error("Skipping Empty file: " + file.getPath()); } else {// w w w . j a va2 s . c om result.add(file); } } return result; }
From source file:com.conversantmedia.mapreduce.tool.BaseTool.java
License:Apache License
/** * Moves our inputs into the 'archive' path for * long term storage, or perhaps further processing. * @param context the job's driver context bean * @throws IOException if the inputs cannot be moved to * the archive path./*from w w w . ja v a2s. co m*/ */ protected void archiveInputs(T context) throws IOException { FileSystem fs = FileSystem.get(getConf()); fs.mkdirs(context.getArchive()); for (Path input : context.getInput()) { List<FileStatus> status = getInputFiles(input); for (FileStatus file : status) { Path dest = new Path(context.getArchive(), file.getPath().getName()); fs.rename(file.getPath(), dest); logger().debug("Moved [" + input + "] to [" + dest + "]"); } } }
From source file:com.curiousby.baoyou.cn.hadoop.HDFSUtils.java
License:Open Source License
/** * ?/*w w w.j ava 2 s . c om*/ * @param dirPath */ public List<String> dir(String dirPath) throws IOException { List<String> fileList = null; Path path = new Path(dirPath); if (fileSystem.exists(path)) { fileList = new ArrayList<String>(); FileStatus[] list = this.getFileStatus(path); for (FileStatus fileStatus : list) { fileList.add(fileStatus.getPath().toString()); } } else { System.out.println("?"); } return fileList; }
From source file:com.dasasian.chok.operation.master.IndexDeployOperation.java
License:Apache License
protected static List<Shard> readShardsFromFs(final String indexName, final String indexPathString) throws IndexDeployException { // get shard folders from source URI uri;/*from w w w . jav a 2 s. co m*/ try { uri = new URI(indexPathString); } catch (final URISyntaxException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to parse index path uri '" + indexPathString + "', make sure it starts with file:// or hdfs:// ", e); } FileSystem fileSystem; try { fileSystem = HadoopUtil.getFileSystem(new Path(uri.toString())); } catch (final IOException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to retrive file system for index path '" + indexPathString + "', make sure your path starts with hadoop support prefix like file:// or hdfs://", e); } List<Shard> shards = new ArrayList<>(); try { final Path indexPath = new Path(indexPathString); if (!fileSystem.exists(indexPath)) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index path '" + uri + "' does not exists"); } final FileStatus[] listStatus = fileSystem.listStatus(indexPath, new PathFilter() { public boolean accept(final Path aPath) { return !aPath.getName().startsWith("."); } }); for (final FileStatus fileStatus : listStatus) { String shardPath = fileStatus.getPath().toString(); if (fileStatus.isDir() || shardPath.endsWith(".zip")) { shards.add(new Shard(createShardName(indexName, shardPath), shardPath)); } } } catch (final IOException e) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "could not access index path: " + indexPathString, e); } if (shards.size() == 0) { throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index does not contain any shard"); } return shards; }
From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java
License:Apache License
public static void createTables(String database, TajoClient client) throws Exception { String dataDir = getDataDir(); if (dataDir == null || dataDir.isEmpty()) { throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>"); }/*from w w w . j a va2 s .co m*/ if (dataDir.startsWith("hdfs://")) { Path path = new Path(dataDir); FileSystem fs = path.getFileSystem(new Configuration()); for (String eachTable : tableNames) { Path tableDataDir = new Path(path, eachTable); if (!fs.exists(tableDataDir)) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } else { File dataDirFile = new File(dataDir); if (!dataDirFile.exists()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists."); } if (dataDirFile.isFile()) { throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory."); } for (String eachTable : tableNames) { File tableDataDir = new File(dataDirFile, eachTable); if (!tableDataDir.exists()) { throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists."); } } } KeyValueSet opt = new KeyValueSet(); opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER); LOG.info("Create database: " + database); client.executeQuery("create database if not exists " + database); Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString()); Path ddlPath = new Path(tpcdsResourceURL, "ddl"); FileSystem localFs = FileSystem.getLocal(new Configuration()); FileStatus[] files = localFs.listStatus(ddlPath); String dataDirWithPrefix = dataDir; if (dataDir.indexOf("://") < 0) { dataDirWithPrefix = "file://" + dataDir; } for (FileStatus eachFile : files) { if (eachFile.isFile()) { String tableName = eachFile.getPath().getName().split("\\.")[0]; String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri())); query = query.replace("${DB}", database); query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName); LOG.info("Create table:" + tableName + "," + query); client.executeQuery(query); } } }
From source file:com.datasalt.pangool.solr.SolrRecordWriter.java
License:Apache License
/** * Write a file to a zip output stream, removing leading path name components from the actual file name when creating * the zip file entry./*from w w w . j a v a2s .c o m*/ * * The entry placed in the zip file is <code>baseName</code>/ <code>relativePath</code>, where * <code>relativePath</code> is constructed by removing a leading <code>root</code> from the path for * <code>itemToZip</code>. * * If <code>itemToZip</code> is an empty directory, it is ignored. If <code>itemToZip</code> is a directory, the * contents of the directory are added recursively. * * @param zos * The zip output stream * @param baseName * The base name to use for the file name entry in the zip file * @param root * The path to remove from <code>itemToZip</code> to make a relative path name * @param itemToZip * The path to the file to be added to the zip file * @return the number of entries added * @throws IOException */ static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName, final String root, final Path itemToZip) throws IOException { LOG.info(String.format("zipDirectory: %s %s %s", baseName, root, itemToZip)); LocalFileSystem localFs = FileSystem.getLocal(conf); int count = 0; final FileStatus itemStatus = localFs.getFileStatus(itemToZip); if (itemStatus.isDir()) { final FileStatus[] statai = localFs.listStatus(itemToZip); // Add a directory entry to the zip file final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR); LOG.info(String.format("Adding directory %s to zip", zipDirName)); zos.putNextEntry(dirZipEntry); zos.closeEntry(); count++; if (statai == null || statai.length == 0) { LOG.info(String.format("Skipping empty directory %s", itemToZip)); return count; } for (FileStatus status : statai) { count += zipDirectory(conf, zos, baseName, root, status.getPath()); } LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip)); return count; } final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root); if (inZipPath.length() == 0) { LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName)); return 0; } // Take empty files in case the place holder is needed FSDataInputStream in = null; try { in = localFs.open(itemToZip); final ZipEntry ze = new ZipEntry(inZipPath); ze.setTime(itemStatus.getModificationTime()); // Comments confuse looking at the zip file // ze.setComment(itemToZip.toString()); zos.putNextEntry(ze); IOUtils.copyBytes(in, zos, conf, false); zos.closeEntry(); LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip)); return 1; } finally { in.close(); } }