Example usage for org.apache.hadoop.fs FileStatus isFile

List of usage examples for org.apache.hadoop.fs FileStatus isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus isFile.

Prototype

public boolean isFile() 

Source Link

Document

Is this a file?

Usage

From source file:com.quixey.hadoop.fs.oss.OSSFileSystem.java

License:Apache License

/**
 * Creates a single directory.//from   w  ww. j av a  2 s  .  co m
 *
 * @param f path
 * @return true iff the directory exists, or was created
 */
private boolean mkdir(Path f) throws IOException {
    try {
        FileStatus fileStatus = getFileStatus(f);
        if (fileStatus.isFile()) {
            throw new FileAlreadyExistsException(
                    String.format("Can't make directory for path '%s' since it is a file.", f));
        }
    } catch (FileNotFoundException e) {
        if (LOG.isDebugEnabled())
            LOG.debug("Making dir '" + f + "' in OSS");
        String key = pathToKey(f) + FOLDER_SUFFIX;
        store.storeEmptyFile(key);
    }
    return true;
}

From source file:com.ruizhan.hadoop.hdfs.Ls.java

License:Apache License

@Override
protected void processPath(PathData item) throws IOException {
    FileStatus stat = item.stat;
    String line = String.format(lineFormat, (stat.isDirectory() ? "d" : "-"), stat.getPermission(),
            (stat.isFile() ? stat.getReplication() : "-"), stat.getOwner(), stat.getGroup(),
            formatSize(stat.getLen()), dateFormat.format(new Date(stat.getModificationTime())), item);
    out.println(line);/*from  ww w  . j  a  va  2  s. c  o  m*/
}

From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java

License:Apache License

@Override
public List<ConfigIssue> init() {
    List<ConfigIssue> issues = super.init();
    validateHadoopFS(issues);//ww w .  j a  v  a  2  s . c o m
    // This is for getting no of splits - no of executors
    hadoopConf.set(FileInputFormat.LIST_STATUS_NUM_THREADS, "5"); // Per Hive-on-Spark
    hadoopConf.set(FileInputFormat.SPLIT_MAXSIZE, String.valueOf(750000000)); // Per Hive-on-Spark
    for (Map.Entry<String, String> config : hdfsConfigs.entrySet()) {
        hadoopConf.set(config.getKey(), config.getValue());
    }
    List<Path> hdfsDirPaths = new ArrayList<>();
    if (hdfsDirLocations == null || hdfsDirLocations.isEmpty()) {
        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                Errors.HADOOPFS_18));
    } else if (issues.isEmpty()) {
        for (String hdfsDirLocation : hdfsDirLocations) {
            try {
                FileSystem fs = getFileSystemForInitDestroy();
                Path ph = fs.makeQualified(new Path(hdfsDirLocation));
                hdfsDirPaths.add(ph);
                if (!fs.exists(ph)) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                            Errors.HADOOPFS_10, hdfsDirLocation));
                } else if (!fs.getFileStatus(ph).isDirectory()) {
                    issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                            Errors.HADOOPFS_15, hdfsDirLocation));
                } else {
                    try {
                        FileStatus[] files = fs.listStatus(ph);
                        if (files == null || files.length == 0) {
                            issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(),
                                    "hdfsDirLocations", Errors.HADOOPFS_16, hdfsDirLocation));
                        } else if (getContext().isPreview() && previewBuffer.size() < PREVIEW_SIZE) {
                            for (FileStatus fileStatus : files) {
                                if (fileStatus.isFile()) {
                                    String path = fileStatus.getPath().toString();
                                    try {
                                        List<Map.Entry> buffer;
                                        if (dataFormat == DataFormat.AVRO) {
                                            buffer = previewAvroBatch(fileStatus, PREVIEW_SIZE);
                                        } else {
                                            buffer = previewTextBatch(fileStatus, PREVIEW_SIZE);
                                        }
                                        for (int i = 0; i < buffer.size()
                                                && previewBuffer.size() < PREVIEW_SIZE; i++) {
                                            Map.Entry entry = buffer.get(i);
                                            previewBuffer.put(String.valueOf(entry.getKey()),
                                                    entry.getValue() == null ? null : entry.getValue());
                                        }
                                    } catch (IOException | InterruptedException ex) {
                                        String msg = "Error opening " + path + ": " + ex;
                                        LOG.info(msg, ex);
                                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(),
                                                "hdfsDirLocations", Errors.HADOOPFS_16, fileStatus.getPath()));
                                    }
                                }
                            }
                        }
                    } catch (IOException ex) {
                        issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                                Errors.HADOOPFS_09, hdfsDirLocation, ex.toString(), ex));
                    }
                }
            } catch (IOException ioe) {
                LOG.warn("Error connecting to HDFS filesystem: " + ioe, ioe);
                issues.add(getContext().createConfigIssue(Groups.HADOOP_FS.name(), "hdfsDirLocations",
                        Errors.HADOOPFS_11, hdfsDirLocation, ioe.toString(), ioe));
            }
        }
    }
    hadoopConf.set(FileInputFormat.INPUT_DIR, StringUtils.join(hdfsDirPaths, ","));
    hadoopConf.set(FileInputFormat.INPUT_DIR_RECURSIVE, Boolean.toString(recursive));
    switch (dataFormat) {
    case JSON:
        if (jsonMaxObjectLen < 1) {
            issues.add(
                    getContext().createConfigIssue(Groups.JSON.name(), "jsonMaxObjectLen", Errors.HADOOPFS_04));
        }
        break;
    case TEXT:
        if (textMaxLineLen < 1) {
            issues.add(
                    getContext().createConfigIssue(Groups.TEXT.name(), "textMaxLineLen", Errors.HADOOPFS_05));
        }
        break;
    case LOG:
        logDataFormatValidator = new LogDataFormatValidator(logMode, logMaxObjectLen, retainOriginalLine,
                customLogFormat, regex, grokPatternDefinition, grokPattern, enableLog4jCustomLogFormat,
                log4jCustomLogFormat, OnParseError.ERROR, 0, Groups.LOG.name(),
                getFieldPathToGroupMap(fieldPathsToGroupName));
        logDataFormatValidator.validateLogFormatConfig(issues, getContext());
        break;
    case DELIMITED:
        if (csvMaxObjectLen < 1) {
            issues.add(getContext().createConfigIssue(Groups.DELIMITED.name(), "csvMaxObjectLen",
                    Errors.HADOOPFS_30));
        }
        break;
    case AVRO:
        if (avroSchema != null && !avroSchema.isEmpty()) {
            hadoopConf.set(AvroJob.INPUT_SCHEMA, avroSchema);
            hadoopConf.set(CONF_INPUT_KEY_SCHEMA, avroSchema);
        }
        break;
    default:
        issues.add(getContext().createConfigIssue(Groups.LOG.name(), "dataFormat", Errors.HADOOPFS_06,
                dataFormat));
    }
    validateParserFactoryConfigs(issues);
    LOG.info("Issues: " + issues);
    return issues;
}

From source file:com.tfm.utad.reducerdata.ReducerDataPig.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-pig");
    Path outputDir = new Path("/home/jab/camus/pigdata/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataPig");
    job.setJarByClass(ReducerDataPig.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataPigMapper.class);
    job.setReducerClass(ReducerDataPigReducer.class);
    job.setNumReduceTasks(1);//from  w w w .ja  va  2 s .  c o  m

    // Specify key / value
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(ReducerPigKey.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.tfm.utad.reducerdata.ReducerDataVertica.java

public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {

    SimpleDateFormat sdf = new SimpleDateFormat("YYYY-MM-dd-HH-mm-ss");
    Date date = new Date();

    Path inputPath = new Path("/home/jab/camus/reducer-data-vertica");
    Path outputDir = new Path("/home/jab/camus/verticadb/" + sdf.format(date));

    // Create configuration
    Configuration conf = new Configuration(true);
    conf.set(FS_DEFAULT_FS, HDFS_LOCALHOST_LOCALDOMAIN);
    FileSystem fs = FileSystem.get(conf);
    Path filesPath = new Path(inputPath + "/*");
    FileStatus[] files = fs.globStatus(filesPath);

    // Create job
    Job job = new Job(conf, "ReducerDataVertica");
    job.setJarByClass(ReducerDataVertica.class);

    // Setup MapReduce
    job.setMapperClass(ReducerDataVerticaMapper.class);
    job.setReducerClass(ReducerDataVerticaReducer.class);
    job.setNumReduceTasks(1);// w  w w.j  a v  a  2s  .c  o m

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(ReducerVerticaValue.class);

    // Input
    FileInputFormat.addInputPath(job, inputPath);
    job.setInputFormatClass(SequenceFileInputFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setOutputFormatClass(TextOutputFormat.class);

    // Delete output if exists
    if (fs.exists(outputDir)) {
        fs.delete(outputDir, true);
    }

    // Execute job
    int code = job.waitForCompletion(true) ? 0 : 1;
    if (code == 0) {
        Counters counters = job.getCounters();
        Counter malformedCounter = counters.findCounter(ReducerDataEnum.MALFORMED_DATA);
        LOG.info("Counter malformed data: " + malformedCounter.getValue());
        for (FileStatus fStatus : files) {
            LOG.info("File name:" + fStatus.getPath());
            if (fStatus.isFile()) {
                LOG.info("Removing file in path:" + fStatus.getPath());
                fs.delete(fStatus.getPath(), false);
            }
        }
    }
}

From source file:com.twitter.algebra.matrix.format.MapDir.java

License:Apache License

/**
 * Disk usage of the MapDir or a dir of sequence files
 * @param mapDirPath the path to MapDir or a directory of sequence files
 * @param fs//  www .  j av  a 2 s  . co m
 * @return
 * @throws FileNotFoundException
 * @throws IOException
 */
public static long du(Path mapDirPath, FileSystem fs) throws FileNotFoundException, IOException {
    FileStatus[] dirs = fs.listStatus(mapDirPath, mapFilter);
    if (dirs.length == 0) //it is not a mapdir then, do a simple ls
        dirs = fs.listStatus(mapDirPath);
    long size = 0;
    for (FileStatus dirStatus : dirs) {
        //if it is a sequence file
        if (dirStatus.isFile())
            size += dirStatus.getLen();
        else
            //or if it is a mapfile, which is directory
            size += dirSize(dirStatus, fs);
    }
    return size;
}

From source file:com.uber.hoodie.common.util.FSUtils.java

License:Apache License

/**
 * Recursively processes all files in the base-path. If excludeMetaFolder is set, the meta-folder and all its
 * subdirs are skipped/*from  w w w .java  2 s  .c  o m*/
 * @param fs           File System
 * @param basePathStr  Base-Path
 * @param consumer     Callback for processing
 * @param excludeMetaFolder Exclude .hoodie folder
 * @throws IOException
 */
@VisibleForTesting
static void processFiles(FileSystem fs, String basePathStr, Function<FileStatus, Boolean> consumer,
        boolean excludeMetaFolder) throws IOException {
    PathFilter pathFilter = excludeMetaFolder ? getExcludeMetaPathFilter() : ALLOW_ALL_FILTER;
    FileStatus[] topLevelStatuses = fs.listStatus(new Path(basePathStr));
    for (int i = 0; i < topLevelStatuses.length; i++) {
        FileStatus child = topLevelStatuses[i];
        if (child.isFile()) {
            boolean success = consumer.apply(child);
            if (!success) {
                throw new HoodieException("Failed to process file-status=" + child);
            }
        } else if (pathFilter.accept(child.getPath())) {
            RemoteIterator<LocatedFileStatus> itr = fs.listFiles(child.getPath(), true);
            while (itr.hasNext()) {
                FileStatus status = itr.next();
                boolean success = consumer.apply(status);
                if (!success) {
                    throw new HoodieException("Failed to process file-status=" + status);
                }
            }
        }
    }
}

From source file:com.wandisco.s3hdfs.rewrite.filter.TestMetadata.java

License:Apache License

@Test
@SuppressWarnings("deprecation")
public void testBasicMetadataRead()
        throws IOException, URISyntaxException, ServiceException, NoSuchAlgorithmException {
    NameNode nn = cluster.getNameNode();
    System.out.println(nn.getHttpAddress().toString());

    S3HdfsPath s3HdfsPath = testUtil.setUpS3HdfsPath("myBucket", "bigFile");

    // Put new object
    byte[] data = new byte[SMALL_SIZE];
    for (int i = 0; i < SMALL_SIZE; i++) {
        data[i] = (byte) (i % 256);
    }/*from  w  ww  . j a v a 2s. com*/
    S3Object object = new S3Object(s3HdfsPath.getObjectName(), data);
    Map<String, Object> metaEntries = new HashMap<String, Object>();
    metaEntries.put("scared", "yes");
    metaEntries.put("tired", "yes");
    metaEntries.put("hopeless", "never");
    object.addAllMetadata(metaEntries);
    object.setMetadataComplete(true);
    s3Service.putObject(s3HdfsPath.getBucketName(), object);

    HttpClient httpClient = new HttpClient();

    // Set up HttpGet and get response
    FileStatus fs = hdfs.getFileStatus(new Path(s3HdfsPath.getFullHdfsMetaPath()));
    assertTrue(fs.isFile());
    assertTrue(fs.getPath().getName().equals(META_FILE_NAME));
    String url = "http://" + hostName + ":" + PROXY_PORT + "/webhdfs/v1/s3hdfs/" + s3HdfsPath.getUserName()
            + "/myBucket/bigFile/" + DEFAULT_VERSION + "/" + META_FILE_NAME + "?op=OPEN";
    GetMethod httpGet = new GetMethod(url);
    httpClient.executeMethod(httpGet);
    InputStream is = httpGet.getResponseBodyAsStream();
    Properties retVal = testUtil.parseMap(is);
    System.out.println(retVal);

    // consume response and re-allocate connection
    httpGet.releaseConnection();
    assert httpGet.getStatusCode() == 200;
    assert retVal.getProperty("x-amz-meta-scared").equals("yes");
    assert retVal.getProperty("x-amz-meta-tired").equals("yes");
    assert retVal.getProperty("x-amz-meta-hopeless").equals("never");
}

From source file:edu.umn.cs.spatialHadoop.visualization.HadoopvizServer.java

License:Open Source License

/**
 * Lists the contents of a directory/*www.  j  av a 2s  .c o  m*/
 * @param request
 * @param response
 */
private void handleListFiles(HttpServletRequest request, HttpServletResponse response) {
    try {
        String pathStr = request.getParameter("path");
        Path path = new Path(pathStr == null || pathStr.isEmpty() ? "/" : pathStr);
        FileSystem fs = path.getFileSystem(commonParams);
        FileStatus[] fileStatuses = fs.listStatus(path, SpatialSite.NonHiddenFileFilter);
        Arrays.sort(fileStatuses, new Comparator<FileStatus>() {
            @Override
            public int compare(FileStatus o1, FileStatus o2) {
                if (o1.isDirectory() && o2.isFile())
                    return -1;
                if (o1.isFile() && o2.isDirectory())
                    return 1;
                return o1.getPath().getName().toLowerCase().compareTo(o2.getPath().getName().toLowerCase());
            }
        });
        response.setContentType("application/json;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_OK);
        PrintWriter out = response.getWriter();
        out.print("{\"FileStatuses\":{");
        if (pathStr.endsWith("/")) {
            pathStr = pathStr.substring(0, pathStr.length() - 1);
        }
        out.printf("\"BaseDir\":\"%s\",", pathStr);
        if (path.getParent() != null)
            out.printf("\"ParentDir\":\"%s\",", path.getParent());
        out.print("\"FileStatus\":[");
        for (int i = 0; i < fileStatuses.length; i++) {
            FileStatus fileStatus = fileStatuses[i];
            if (i != 0)
                out.print(',');
            String filename = fileStatus.getPath().getName();
            int idot = filename.lastIndexOf('.');
            String extension = idot == -1 ? "" : filename.substring(idot + 1);
            out.printf(
                    "{\"accessTime\":%d,\"blockSize\":%d,\"childrenNum\":%d,\"fileId\":%d,"
                            + "\"group\":\"%s\",\"length\":%d,\"modificationTime\":%d,"
                            + "\"owner\":\"%s\",\"pathSuffix\":\"%s\",\"permission\":\"%s\","
                            + "\"replication\":%d,\"storagePolicy\":%d,\"type\":\"%s\",\"extension\":\"%s\"}",
                    fileStatus.getAccessTime(), fileStatus.getBlockSize(), 0, 0, fileStatus.getGroup(),
                    fileStatus.getLen(), fileStatus.getModificationTime(), fileStatus.getOwner(),
                    fileStatus.getPath().getName(), fileStatus.getPermission(), fileStatus.getReplication(), 0,
                    fileStatus.isDirectory() ? "DIRECTORY" : "FILE", extension.toLowerCase());
        }
        out.print("]}");
        // Check if there is an image or master file
        FileStatus[] metaFiles = fs.listStatus(path, new PathFilter() {
            @Override
            public boolean accept(Path path) {
                return path.getName().startsWith("_master") || path.getName().equals("_data.png");
            }
        });
        for (FileStatus metaFile : metaFiles) {
            String metaFileName = metaFile.getPath().getName();
            if (metaFileName.startsWith("_master")) {
                out.printf(",\"MasterPath\":\"%s\"", metaFileName);
                String shape = OperationsParams.detectShape(fileStatuses[0].getPath(), commonParams);
                if (shape != null)
                    out.printf(",\"Shape\":\"%s\"", shape);
            } else if (metaFileName.equals("_data.png"))
                out.printf(",\"ImagePath\":\"%s\"", metaFileName);
        }
        out.print("}");

        out.close();
    } catch (Exception e) {
        System.out.println("error happened");
        e.printStackTrace();
        try {
            e.printStackTrace(response.getWriter());
        } catch (IOException ioe) {
            ioe.printStackTrace();
            e.printStackTrace();
        }
        response.setContentType("text/plain;charset=utf-8");
        response.setStatus(HttpServletResponse.SC_INTERNAL_SERVER_ERROR);
    }
}

From source file:gobblin.data.management.conversion.hive.validation.ValidationJob.java

License:Apache License

/***
 * Execute Hive queries using {@link HiveJdbcConnector} and validate results.
 * @param queries Queries to execute./*from w w w .  ja  va2s  .co  m*/
 */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix")
private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException {

    if (null == queries || queries.size() == 0) {
        log.warn("No queries specified to be executed");
        return Collections.emptyList();
    }

    List<Long> rowCounts = Lists.newArrayList();
    Closer closer = Closer.create();

    try {
        HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props));
        for (String query : queries) {
            String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString();
            Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput);
            query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query;
            log.info("Executing query: " + query);
            try {
                hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false",
                        "SET hive.auto.convert.join=false", query);
                FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir);
                List<FileStatus> files = new ArrayList<>();
                for (FileStatus fileStatus : fileStatusList) {
                    if (fileStatus.isFile()) {
                        files.add(fileStatus);
                    }
                }
                if (files.size() > 1) {
                    log.warn("Found more than one output file. Should have been one.");
                } else if (files.size() == 0) {
                    log.warn("Found no output file. Should have been one.");
                } else {
                    String theString = IOUtils.toString(
                            new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8));
                    log.info("Found row count: " + theString.trim());
                    if (StringUtils.isBlank(theString.trim())) {
                        rowCounts.add(0l);
                    } else {
                        try {
                            rowCounts.add(Long.parseLong(theString.trim()));
                        } catch (NumberFormatException e) {
                            throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e);
                        }
                    }
                }
            } finally {
                if (this.fs.exists(hiveTempDir)) {
                    log.debug("Deleting temp dir: " + hiveTempDir);
                    this.fs.delete(hiveTempDir, true);
                }
            }
        }
    } catch (SQLException e) {
        throw new RuntimeException(e);
    } finally {
        try {
            closer.close();
        } catch (IOException e) {
            log.warn("Could not close HiveJdbcConnector", e);
        }
    }

    return rowCounts;
}