Example usage for org.apache.hadoop.fs FileStatus isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isFile.

Prototype

public boolean isFile()

Source Link

Document

Is this a file?

Usage

From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java

License:Apache License

/**
 * Creates a single directory.//from   w  ww. j av a  2 s  .  co m
 *
 * @param f path
 * @return true iff the directory exists, or was created
 */
private boolean mkdir(Path f) throws IOException {
    try {
        FileStatus fileStatus = getFileStatus(f);
        if (fileStatus.isFile()) {
            throw new FileAlreadyExistsException(
                    String.format("Can't make directory for path '%s' since it is a file.", f));
        }
    } catch (FileNotFoundException e) {
        if (LOG.isDebugEnabled())
            LOG.debug("Making dir '" + f + "' in OSS");
        String key = pathToKey(f) + FOLDER_SUFFIX;
        store.storeEmptyFile(key);
    }
    return true;
}

From source file:com.ruizhan.hadoop.hdfs.Ls.java

License:Apache License

@Override
protected void processPath(PathData item) throws IOException {
    FileStatus stat = item.stat;
    String line = String.format(lineFormat, (stat.isDirectory() ? "d" : "-"), stat.getPermission(),
            (stat.isFile() ? stat.getReplication() : "-"), stat.getOwner(), stat.getGroup(),
            formatSize(stat.getLen()), dateFormat.format(new Date(stat.getModificationTime())), item);
    out.println(line);/*from  ww w  . j  a  va  2  s. c  o  m*/
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java

License:Apache License

@Override
public List<ConfigIssue> init() {
    List<ConfigIssue> issues = super.init();
    validateHadoopFS(issues);//ww w .  j a  v  a  2  s . c o m
    // This is for getting no of splits - no of executors
    hadoopConf.set(FileInputFormat.LIST_STATUS_NUM_THREADS, "5"); // Per Hive-on-Spark
    hadoopConf.set(FileInputFormat.SPLIT_MAXSIZE, String.valueOf(750000000)); // Per Hive-on-Spark
    for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) {
        hadoopConf.set(config.getKey(), config.getValue());
    }
    List<Path> hdfsDirPaths = new ArrayList<>();
    if (hdfsDirLocations == null || hdfsDirLocations.isEmpty()) {
        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                Errors.HADOOPFS_18));
    } else if (issues.isEmpty()) {
        for (String hdfsDirLocation : hdfsDirLocations) {
            try {
                FileSystem fs = getFileSystemForInitDestroy();
                Path ph = fs.makeQualified(new Path(hdfsDirLocation));
                hdfsDirPaths.add(ph);
                if (!fs.exists(ph)) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                            Errors.HADOOPFS_10, hdfsDirLocation));
                } else if (!fs.getFileStatus(ph).isDirectory()) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                            Errors.HADOOPFS_15, hdfsDirLocation));
                } else {
                    try {
                        FileStatus[] files = fs.listStatus(ph);
                        if (files == null || files.length == 0) {
                            issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(),
                                    "hdfsDirLocations", Errors.HADOOPFS_16, hdfsDirLocation));
                        } else if (getContext().isPreview() && previewBuffer.size() < PREVIEW_SIZE) {
                            for (FileStatus fileStatus : files) {
                                if (fileStatus.isFile()) {
                                    String path = fileStatus.getPath().toString();
                                    try {
                                        List<Map.Entry> buffer;
                                        if (dataFormat == DataFormat.AVRO) {
                                            buffer = previewAvroBatch(fileStatus, PREVIEW_SIZE);
                                        } else {
                                            buffer = previewTextBatch(fileStatus, PREVIEW_SIZE);
                                        }
                                        for (int i = 0; i < buffer.size()
                                                && previewBuffer.size() < PREVIEW_SIZE; i++) {
                                            Map.Entry entry = buffer.get(i);
                                            previewBuffer.put(String.valueOf(entry.getKey()),
                                                    entry.getValue() == null ? null : entry.getValue());
                                        }
                                    } catch (IOException | InterruptedException ex) {
                                        String msg = "Error opening " + path + ": " + ex;
                                        LOG.info(msg, ex);
                                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(),
                                                "hdfsDirLocations", Errors.HADOOPFS_16, fileStatus.getPath()));
                                    }
                                }
                            }
                        }
                    } catch (IOException ex) {
                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                                Errors.HADOOPFS_09, hdfsDirLocation, ex.toString(), ex));
                    }
                }
            } catch (IOException ioe) {
                LOG.warn("Error connecting to HDFS filesystem: " + ioe, ioe);
                issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                        Errors.HADOOPFS_11, hdfsDirLocation, ioe.toString(), ioe));
            }
        }
    }
    hadoopConf.set(FileInputFormat.INPUT_DIR, StringUtils.join(hdfsDirPaths, ","));
    hadoopConf.set(FileInputFormat.INPUT_DIR_RECURSIVE, Boolean.toString(recursive));
    switch (dataFormat) {
    case JSON:
        if (jsonMaxObjectLen < 1) {
            issues.add(
                    getContext().createConfigIssue(Groups.JSON.name(), "jsonMaxObjectLen", Errors.HADOOPFS_04));
        }
        break;
    case TEXT:
        if (textMaxLineLen < 1) {
            issues.add(
                    getContext().createConfigIssue(Groups.TEXT.name(), "textMaxLineLen", Errors.HADOOPFS_05));
        }
        break;
    case LOG:
        logDataFormatValidator = new LogDataFormatValidator(logMode, logMaxObjectLen, retainOriginalLine,
                customLogFormat, regex, grokPatternDefinition, grokPattern, enableLog4jCustomLogFormat,
                log4jCustomLogFormat, OnParseError.ERROR, 0, Groups.LOG.name(),
                getFieldPathToGroupMap(fieldPathsToGroupName));
        logDataFormatValidator.validateLogFormatConfig(issues, getContext());
        break;
    case DELIMITED:
        if (csvMaxObjectLen < 1) {
            issues.add(getContext().createConfigIssue(Groups.DELIMITED.name(), "csvMaxObjectLen",
                    Errors.HADOOPFS_30));
        }
        break;
    case AVRO:
        if (avroSchema != null && !avroSchema.isEmpty()) {
            hadoopConf.set(AvroJob.INPUT_SCHEMA, avroSchema);
            hadoopConf.set(CONF_INPUT_KEY_SCHEMA, avroSchema);
        }
        break;
    default:
        issues.add(getContext().createConfigIssue(Groups.LOG.name(), "dataFormat", Errors.HADOOPFS_06,
                dataFormat));
    }
    validateParserFactoryConfigs(issues);
    LOG.info("Issues: " + issues);
    return issues;
}

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);//from  w w w .ja  va  2 s .  c o  m

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);// w  w w.j  a v  a  2s  .c  o m

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

/**
 * Disk usage of the MapDir or a dir of sequence files
 * @param mapDirPath the path to MapDir or a directory of sequence files
 * @param fs//  www .  j av  a 2 s  . co m
 * @return
 * @throws FileNotFoundException
 * @throws IOException
 */
public static long du(Path mapDirPath, FileSystem fs) throws FileNotFoundException, IOException {
    FileStatus[] dirs = fs.listStatus(mapDirPath, mapFilter);
    if (dirs.length == 0) //it is not a mapdir then, do a simple ls
        dirs = fs.listStatus(mapDirPath);
    long size = 0;
    for (FileStatus dirStatus : dirs) {
        //if it is a sequence file
        if (dirStatus.isFile())
            size += dirStatus.getLen();
        else
            //or if it is a mapfile, which is directory
            size += dirSize(dirStatus, fs);
    }
    return size;
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
 * subdirs are skipped/*from  w w w .java  2 s  .c  o m*/
 * @param fs           File System
 * @param basePathStr  Base-Path
 * @param consumer     Callback for processing
 * @param excludeMetaFolder Exclude .hoodie folder
 * @throws IOException
 */
@VisibleForTesting
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
        boolean excludeMetaFolder) throws IOException {
    PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
    FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
    for (int i = 0; i < topLevelStatuses.length; i++) {
        FileStatus child = topLevelStatuses[i];
        if (child.isFile()) {
            boolean success = consumer.apply(child);
            if (!success) {
                throw new HoodieException("Failed to process file-status=" + child);
            }
        } else if (pathFilter.accept(child.getPath())) {
            RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true);
            while (itr.hasNext()) {
                FileStatus status = itr.next();
                boolean success = consumer.apply(status);
                if (!success) {
                    throw new HoodieException("Failed to process file-status=" + status);
                }
            }
        }
    }
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestMetadata.java

License:Apache License

@Test
@SuppressWarnings("deprecation")
public void testBasicMetadataRead()
        throws IOException, URISyntaxException, ServiceException, NoSuchAlgorithmException {
    NameNode nn = cluster.getNameNode();
    System.out.println(nn.getHttpAddress().toString());

    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("myBucket", "bigFile");

    // Put new object
    byte[] data = new byte[SMALL_SIZE];
    for (int i = 0; i < SMALL_SIZE; i++) {
        data[i] = (byte) (i % 256);
    }/*from  w  ww  . j a v a 2s. com*/
    S3Object object = new S3Object(s3HdfsPath.getObjectName(), data);
    Map<String, Object> metaEntries = new HashMap<String, Object>();
    metaEntries.put("scared", "yes");
    metaEntries.put("tired", "yes");
    metaEntries.put("hopeless", "never");
    object.addAllMetadata(metaEntries);
    object.setMetadataComplete(true);
    s3Service.putObject(s3HdfsPath.getBucketName(), object);

    HttpClient httpClient = new HttpClient();

    // Set up HttpGet and get response
    FileStatus fs = hdfs.getFileStatus(new Path(s3HdfsPath.getFullHdfsMetaPath()));
    assertTrue(fs.isFile());
    assertTrue(fs.getPath().getName().equals(META_FILE_NAME));
    String url = "http://" + hostName + ":" + PROXY_PORT + "/webhdfs/v1/s3hdfs/" + s3HdfsPath.getUserName()
            + "/myBucket/bigFile/" + DEFAULT_VERSION + "/" + META_FILE_NAME + "?op=OPEN";
    GetMethod httpGet = new GetMethod(url);
    httpClient.executeMethod(httpGet);
    InputStream is = httpGet.getResponseBodyAsStream();
    Properties retVal = testUtil.parseMap(is);
    System.out.println(retVal);

    // consume response and re-allocate connection
    httpGet.releaseConnection();
    assert httpGet.getStatusCode() == 200;
    assert retVal.getProperty("x-amz-meta-scared").equals("yes");
    assert retVal.getProperty("x-amz-meta-tired").equals("yes");
    assert retVal.getProperty("x-amz-meta-hopeless").equals("never");
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory/*www.  j  av a 2s  .c o  m*/
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:gobblin.data.management.conversion.hive.validation.ValidationJob.java

License:Apache License

/***
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute./*from w w w .  ja  va2s  .co  m*/
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {

    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }

    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();

    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false",
                        "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(
                            new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        throw new RuntimeException(e);
    } finally {
        try {
            closer.close();
        } catch (IOException e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }

    return rowCounts;
}