List of usage examples for org.apache.hadoop.fs FileStatus isFile
public boolean isFile()
From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java
License:Apache License
/** * Creates a single directory.//from w ww. j av a 2 s . co m * * @param f path * @return true iff the directory exists, or was created */ private boolean mkdir(Path f) throws IOException { try { FileStatus fileStatus = getFileStatus(f); if (fileStatus.isFile()) { throw new FileAlreadyExistsException( String.format("Can't make directory for path '%s' since it is a file.", f)); } } catch (FileNotFoundException e) { if (LOG.isDebugEnabled()) LOG.debug("Making dir '" + f + "' in OSS"); String key = pathToKey(f) + FOLDER_SUFFIX; store.storeEmptyFile(key); } return true; }
From source file:com.ruizhan.hadoop.hdfs.Ls.java
License:Apache License
@Override protected void processPath(PathData item) throws IOException { FileStatus stat = item.stat; String line = String.format(lineFormat, (stat.isDirectory() ? "d" : "-"), stat.getPermission(), (stat.isFile() ? stat.getReplication() : "-"), stat.getOwner(), stat.getGroup(), formatSize(stat.getLen()), dateFormat.format(new Date(stat.getModificationTime())), item); out.println(line);/*from ww w . j a va 2 s. c o m*/ }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java
License:Apache License
@Override public List<ConfigIssue> init() { List<ConfigIssue> issues = super.init(); validateHadoopFS(issues);//ww w . j a v a 2 s . c o m // This is for getting no of splits - no of executors hadoopConf.set(FileInputFormat.LIST_STATUS_NUM_THREADS, "5"); // Per Hive-on-Spark hadoopConf.set(FileInputFormat.SPLIT_MAXSIZE, String.valueOf(750000000)); // Per Hive-on-Spark for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) { hadoopConf.set(config.getKey(), config.getValue()); } List<Path> hdfsDirPaths = new ArrayList<>(); if (hdfsDirLocations == null || hdfsDirLocations.isEmpty()) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_18)); } else if (issues.isEmpty()) { for (String hdfsDirLocation : hdfsDirLocations) { try { FileSystem fs = getFileSystemForInitDestroy(); Path ph = fs.makeQualified(new Path(hdfsDirLocation)); hdfsDirPaths.add(ph); if (!fs.exists(ph)) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_10, hdfsDirLocation)); } else if (!fs.getFileStatus(ph).isDirectory()) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_15, hdfsDirLocation)); } else { try { FileStatus[] files = fs.listStatus(ph); if (files == null || files.length == 0) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_16, hdfsDirLocation)); } else if (getContext().isPreview() && previewBuffer.size() < PREVIEW_SIZE) { for (FileStatus fileStatus : files) { if (fileStatus.isFile()) { String path = fileStatus.getPath().toString(); try { List<Map.Entry> buffer; if (dataFormat == DataFormat.AVRO) { buffer = previewAvroBatch(fileStatus, PREVIEW_SIZE); } else { buffer = previewTextBatch(fileStatus, PREVIEW_SIZE); } for (int i = 0; i < buffer.size() && previewBuffer.size() < PREVIEW_SIZE; i++) { Map.Entry entry = buffer.get(i); previewBuffer.put(String.valueOf(entry.getKey()), entry.getValue() == null ? null : entry.getValue()); } } catch (IOException | InterruptedException ex) { String msg = "Error opening " + path + ": " + ex; LOG.info(msg, ex); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_16, fileStatus.getPath())); } } } } } catch (IOException ex) { issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_09, hdfsDirLocation, ex.toString(), ex)); } } } catch (IOException ioe) { LOG.warn("Error connecting to HDFS filesystem: " + ioe, ioe); issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations", Errors.HADOOPFS_11, hdfsDirLocation, ioe.toString(), ioe)); } } } hadoopConf.set(FileInputFormat.INPUT_DIR, StringUtils.join(hdfsDirPaths, ",")); hadoopConf.set(FileInputFormat.INPUT_DIR_RECURSIVE, Boolean.toString(recursive)); switch (dataFormat) { case JSON: if (jsonMaxObjectLen < 1) { issues.add( getContext().createConfigIssue(Groups.JSON.name(), "jsonMaxObjectLen", Errors.HADOOPFS_04)); } break; case TEXT: if (textMaxLineLen < 1) { issues.add( getContext().createConfigIssue(Groups.TEXT.name(), "textMaxLineLen", Errors.HADOOPFS_05)); } break; case LOG: logDataFormatValidator = new LogDataFormatValidator(logMode, logMaxObjectLen, retainOriginalLine, customLogFormat, regex, grokPatternDefinition, grokPattern, enableLog4jCustomLogFormat, log4jCustomLogFormat, OnParseError.ERROR, 0, Groups.LOG.name(), getFieldPathToGroupMap(fieldPathsToGroupName)); logDataFormatValidator.validateLogFormatConfig(issues, getContext()); break; case DELIMITED: if (csvMaxObjectLen < 1) { issues.add(getContext().createConfigIssue(Groups.DELIMITED.name(), "csvMaxObjectLen", Errors.HADOOPFS_30)); } break; case AVRO: if (avroSchema != null && !avroSchema.isEmpty()) { hadoopConf.set(AvroJob.INPUT_SCHEMA, avroSchema); hadoopConf.set(CONF_INPUT_KEY_SCHEMA, avroSchema); } break; default: issues.add(getContext().createConfigIssue(Groups.LOG.name(), "dataFormat", Errors.HADOOPFS_06, dataFormat)); } validateParserFactoryConfigs(issues); LOG.info("Issues: " + issues); return issues; }
From source file:com.tfm.utad.reducerdata.ReducerDataPig.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-pig"); Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataPig"); job.setJarByClass(ReducerDataPig.class); // Setup MapReduce job.setMapperClass(ReducerDataPigMapper.class); job.setReducerClass(ReducerDataPigReducer.class); job.setNumReduceTasks(1);//from w w w .ja va 2 s . c o m // Specify key / value job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(ReducerPigKey.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss"); Date date = new Date(); Path inputPath = new Path("/home/jab/camus/reducer-data-vertica"); Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date)); // Create configuration Configuration conf = new Configuration(true); conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN); FileSystem fs = FileSystem.get(conf); Path filesPath = new Path(inputPath + "/*"); FileStatus[] files = fs.globStatus(filesPath); // Create job Job job = new Job(conf, "ReducerDataVertica"); job.setJarByClass(ReducerDataVertica.class); // Setup MapReduce job.setMapperClass(ReducerDataVerticaMapper.class); job.setReducerClass(ReducerDataVerticaReducer.class); job.setNumReduceTasks(1);// w w w.j a v a 2s .c o m // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(ReducerVerticaValue.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(SequenceFileInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists if (fs.exists(outputDir)) { fs.delete(outputDir, true); } // Execute job int code = job.waitForCompletion(true) ? 0 : 1; if (code == 0) { Counters counters = job.getCounters(); Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA); LOG.info("Counter malformed data: " + malformedCounter.getValue()); for (FileStatus fStatus : files) { LOG.info("File name:" + fStatus.getPath()); if (fStatus.isFile()) { LOG.info("Removing file in path:" + fStatus.getPath()); fs.delete(fStatus.getPath(), false); } } } }
From source file:com.twitter.algebra.matrix.format.MapDir.java
License:Apache License
/** * Disk usage of the MapDir or a dir of sequence files * @param mapDirPath the path to MapDir or a directory of sequence files * @param fs// www . j av a 2 s . co m * @return * @throws FileNotFoundException * @throws IOException */ public static long du(Path mapDirPath, FileSystem fs) throws FileNotFoundException, IOException { FileStatus[] dirs = fs.listStatus(mapDirPath, mapFilter); if (dirs.length == 0) //it is not a mapdir then, do a simple ls dirs = fs.listStatus(mapDirPath); long size = 0; for (FileStatus dirStatus : dirs) { //if it is a sequence file if (dirStatus.isFile()) size += dirStatus.getLen(); else //or if it is a mapfile, which is directory size += dirSize(dirStatus, fs); } return size; }
From source file:com.uber.hoodie.common.util.FSUtils.java
License:Apache License
/** * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its * subdirs are skipped/*from w w w .java 2 s .c o m*/ * @param fs File System * @param basePathStr Base-Path * @param consumer Callback for processing * @param excludeMetaFolder Exclude .hoodie folder * @throws IOException */ @VisibleForTesting static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer, boolean excludeMetaFolder) throws IOException { PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER; FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr)); for (int i = 0; i < topLevelStatuses.length; i++) { FileStatus child = topLevelStatuses[i]; if (child.isFile()) { boolean success = consumer.apply(child); if (!success) { throw new HoodieException("Failed to process file-status=" + child); } } else if (pathFilter.accept(child.getPath())) { RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true); while (itr.hasNext()) { FileStatus status = itr.next(); boolean success = consumer.apply(status); if (!success) { throw new HoodieException("Failed to process file-status=" + status); } } } } }
From source file:com.wandisco.s3hdfs.rewrite.filter.TestMetadata.java
License:Apache License
@Test @SuppressWarnings("deprecation") public void testBasicMetadataRead() throws IOException, URISyntaxException, ServiceException, NoSuchAlgorithmException { NameNode nn = cluster.getNameNode(); System.out.println(nn.getHttpAddress().toString()); S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("myBucket", "bigFile"); // Put new object byte[] data = new byte[SMALL_SIZE]; for (int i = 0; i < SMALL_SIZE; i++) { data[i] = (byte) (i % 256); }/*from w ww . j a v a 2s. com*/ S3Object object = new S3Object(s3HdfsPath.getObjectName(), data); Map<String, Object> metaEntries = new HashMap<String, Object>(); metaEntries.put("scared", "yes"); metaEntries.put("tired", "yes"); metaEntries.put("hopeless", "never"); object.addAllMetadata(metaEntries); object.setMetadataComplete(true); s3Service.putObject(s3HdfsPath.getBucketName(), object); HttpClient httpClient = new HttpClient(); // Set up HttpGet and get response FileStatus fs = hdfs.getFileStatus(new Path(s3HdfsPath.getFullHdfsMetaPath())); assertTrue(fs.isFile()); assertTrue(fs.getPath().getName().equals(META_FILE_NAME)); String url = "http://" + hostName + ":" + PROXY_PORT + "/webhdfs/v1/s3hdfs/" + s3HdfsPath.getUserName() + "/myBucket/bigFile/" + DEFAULT_VERSION + "/" + META_FILE_NAME + "?op=OPEN"; GetMethod httpGet = new GetMethod(url); httpClient.executeMethod(httpGet); InputStream is = httpGet.getResponseBodyAsStream(); Properties retVal = testUtil.parseMap(is); System.out.println(retVal); // consume response and re-allocate connection httpGet.releaseConnection(); assert httpGet.getStatusCode() == 200; assert retVal.getProperty("x-amz-meta-scared").equals("yes"); assert retVal.getProperty("x-amz-meta-tired").equals("yes"); assert retVal.getProperty("x-amz-meta-hopeless").equals("never"); }
From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java
License:Open Source License
/** * Lists the contents of a directory/*www. j av a 2s .c o m*/ * @param request * @param response */ private void handleListFiles(HttpServletRequest request, HttpServletResponse response) { try { String pathStr = request.getParameter("path"); Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr); FileSystem fs = path.getFileSystem(commonParams); FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter); Arrays.sort(fileStatuses, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { if (o1.isDirectory() && o2.isFile()) return -1; if (o1.isFile() && o2.isDirectory()) return 1; return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase()); } }); response.setContentType("application/json;charset=utf-8"); response.setStatus(HttpServletResponse.SC_OK); PrintWriter out = response.getWriter(); out.print("{\"FileStatuses\":{"); if (pathStr.endsWith("/")) { pathStr = pathStr.substring(0, pathStr.length() - 1); } out.printf("\"BaseDir\":\"%s\",", pathStr); if (path.getParent() != null) out.printf("\"ParentDir\":\"%s\",", path.getParent()); out.print("\"FileStatus\":["); for (int i = 0; i < fileStatuses.length; i++) { FileStatus fileStatus = fileStatuses[i]; if (i != 0) out.print(','); String filename = fileStatus.getPath().getName(); int idot = filename.lastIndexOf('.'); String extension = idot == -1 ? "" : filename.substring(idot + 1); out.printf( "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d," + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d," + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\"," + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}", fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(), fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(), fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0, fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase()); } out.print("]}"); // Check if there is an image or master file FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().startsWith("_master") || path.getName().equals("_data.png"); } }); for (FileStatus metaFile : metaFiles) { String metaFileName = metaFile.getPath().getName(); if (metaFileName.startsWith("_master")) { out.printf(",\"MasterPath\":\"%s\"", metaFileName); String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams); if (shape != null) out.printf(",\"Shape\":\"%s\"", shape); } else if (metaFileName.equals("_data.png")) out.printf(",\"ImagePath\":\"%s\"", metaFileName); } out.print("}"); out.close(); } catch (Exception e) { System.out.println("error happened"); e.printStackTrace(); try { e.printStackTrace(response.getWriter()); } catch (IOException ioe) { ioe.printStackTrace(); e.printStackTrace(); } response.setContentType("text/plain;charset=utf-8"); response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR); } }
From source file:gobblin.data.management.conversion.hive.validation.ValidationJob.java
License:Apache License
/*** * Execute Hive queries using {@link HiveJdbcConnector} and validate results. * @param queries Queries to execute./*from w w w . ja va2s .co m*/ */ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix") private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException { if (null == queries || queries.size() == 0) { log.warn("No queries specified to be executed"); return Collections.emptyList(); } List<Long> rowCounts = Lists.newArrayList(); Closer closer = Closer.create(); try { HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props)); for (String query : queries) { String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString(); Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput); query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query; log.info("Executing query: " + query); try { hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false", "SET hive.auto.convert.join=false", query); FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir); List<FileStatus> files = new ArrayList<>(); for (FileStatus fileStatus : fileStatusList) { if (fileStatus.isFile()) { files.add(fileStatus); } } if (files.size() > 1) { log.warn("Found more than one output file. Should have been one."); } else if (files.size() == 0) { log.warn("Found no output file. Should have been one."); } else { String theString = IOUtils.toString( new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8)); log.info("Found row count: " + theString.trim()); if (StringUtils.isBlank(theString.trim())) { rowCounts.add(0l); } else { try { rowCounts.add(Long.parseLong(theString.trim())); } catch (NumberFormatException e) { throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e); } } } } finally { if (this.fs.exists(hiveTempDir)) { log.debug("Deleting temp dir: " + hiveTempDir); this.fs.delete(hiveTempDir, true); } } } } catch (SQLException e) { throw new RuntimeException(e); } finally { try { closer.close(); } catch (IOException e) { log.warn("Could not close HiveJdbcConnector", e); } } return rowCounts; }