Example usage for org.apache.hadoop.fs FileSystem isFile

List of usage examples for org.apache.hadoop.fs FileSystem isFile

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem isFile.

Prototype

@Deprecated
public boolean isFile(Path f) throws IOException 

Source Link

Document

True iff the named path is a regular file.

Usage

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given texts.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected lines in the file (not including terminating end-of-lines).
 *//*from   w w w .  j a  v a  2 s .c om*/
public static void assertReadFile(String recordDelimiter, FileSystem fs, String path, String... expected)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(path))))) {
            Scanner s = new Scanner(r).useDelimiter(recordDelimiter);
            for (String line : expected) {
                assertThat(s.next(), is(line));
            }
            assertThat(s.hasNext(), is(false));
        }
    } else if (fs.isDirectory(p)) {
        HashSet<String> expect = new HashSet<>(Arrays.asList(expected));
        for (FileStatus fstatus : fs.listStatus(p)) {
            try (BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(fstatus.getPath())))) {
                Scanner s = new Scanner(r).useDelimiter(recordDelimiter);
                String line = null;
                while (s.hasNext()) {
                    line = s.next();
                    if (!expect.remove(line))
                        fail("Unexpected line: " + line);
                }
            }
        }
        // Check before asserting for the message.
        if (expect.size() != 0)
            assertThat("Not all lines found: " + expect.iterator().next(), expect, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given avro.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 *//*  www  .j  a  v  a  2 s.c o m*/
public static void assertReadAvroFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (DataFileStream<GenericRecord> reader = new DataFileStream<GenericRecord>(
                new BufferedInputStream(fs.open(new Path(path))), new GenericDatumReader<GenericRecord>())) {
            IndexedRecord record = null;
            while (reader.hasNext()) {
                record = reader.iterator().next();
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if (indexedRecord.equals(record)) {
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadAvroFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.talend.components.test.MiniDfsResource.java

License:Open Source License

/**
 * Tests that a file on the HDFS cluster contains the given parquet.
 *
 * @param path the name of the file on the HDFS cluster
 * @param expected the expected avro record in the file .
 *//*from  w w w  .  ja v  a2 s  .  c o m*/
public static void assertReadParquetFile(FileSystem fs, String path, Set<IndexedRecord> expected, boolean part)
        throws IOException {
    Path p = new Path(path);
    if (fs.isFile(p)) {
        try (AvroParquetReader<GenericRecord> reader = new AvroParquetReader<GenericRecord>(fs.getConf(),
                new Path(path))) {
            IndexedRecord record = null;
            while (null != (record = reader.read())) {
                IndexedRecord eqRecord = null;
                for (IndexedRecord indexedRecord : expected) {
                    if (indexedRecord.equals(record)) {
                        eqRecord = indexedRecord;
                        break;
                    }
                }
                expected.remove(eqRecord);
            }
        }
        // Check before asserting for the message.
        if (!part && expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else if (fs.isDirectory(p)) {
        for (FileStatus fstatus : FileSystemUtil.listSubFiles(fs, p)) {
            assertReadParquetFile(fs, fstatus.getPath().toString(), expected, true);
        }
        // Check before asserting for the message.
        if (expected.size() != 0)
            assertThat("Not all avro records found: " + expected.iterator().next(), expected, hasSize(0));
    } else {
        fail("No such path: " + path);
    }
}

From source file:org.trustedanalytics.resourceserver.data.InputStreamProvider.java

License:Apache License

/**
 * Gets an InputStream for a path on HDFS.
 *
 * If given path is a directory, it will read files inside that dir and create
 * a SequenceInputStream from them, which emulates reading from directory just like from
 * a regular file. Notice that this method is not meant to read huge datasets
 * (as well as the whole project).//from  w ww.  j a v a  2 s  .  com
 * @param path
 * @return
 * @throws IOException
 */
public InputStream getInputStream(Path path) throws IOException {
    Objects.requireNonNull(path);

    FileSystem fs = hdfsConfig.getFileSystem();
    if (fs.isFile(path)) {
        return fs.open(path);
    } else if (fs.isDirectory(path)) {
        FileStatus[] files = fs.listStatus(path);
        List<InputStream> paths = Arrays.stream(files).map(f -> {
            try {
                return fs.open(f.getPath());
            } catch (IOException e) {
                LOGGER.log(Level.SEVERE, "Cannot read file " + f.getPath().toString(), e);
                return null;
            }
        }).filter(f -> f != null).collect(Collectors.toList());
        return new SequenceInputStream(Collections.enumeration(paths));
    } else {
        throw new IllegalArgumentException("Given path " + path.toString() + " is neither file nor directory");
    }
}

From source file:pl.edu.icm.coansys.statisticsgenerator.tools.ViewTool.java

License:Open Source License

private static void processFileOrDirectory(Path pt, Configuration conf) throws IOException {
    FileSystem fs = pt.getFileSystem(conf);
    if (fs.isDirectory(pt)) {
        for (FileStatus fstat : fs.listStatus(pt)) {
            processFileOrDirectory(fstat.getPath(), conf);
        }/*w  w w.  j a  v a2  s  .  com*/
    } else if (fs.isFile(pt)) {
        viewFile(pt, conf);
    } else {
        //zaloguj bd
    }
}

From source file:streaming.core.DownloadRunner.java

License:Apache License

public static int getTarFileByPath(HttpServletResponse res, String pathStr) {
    String[] paths = pathStr.split(",");
    try {/*from   w  w  w.j a  va2s  . c om*/
        OutputStream outputStream = res.getOutputStream();

        TarOutputStream tarOutputStream = new TarOutputStream(new BufferedOutputStream(outputStream));

        FileSystem fs = FileSystem.get(new Configuration());
        List<FileStatus> files = new ArrayList<FileStatus>();

        for (String path : paths) {
            Path p = new Path(path);
            if (fs.exists(p)) {
                if (fs.isFile(p)) {
                    files.add(fs.getFileStatus(p));
                } else if (fs.isDirectory(p)) {
                    FileStatus[] fileStatusArr = fs.listStatus(p);
                    if (fileStatusArr != null && fileStatusArr.length > 0) {

                        for (FileStatus cur : fileStatusArr) {
                            if (cur.isFile()) {
                                files.add(cur);
                            }
                        }
                    }
                }
            }

        }

        if (files.size() > 0) {
            FSDataInputStream inputStream = null;
            int len = files.size();
            int i = 1;
            for (FileStatus cur : files) {
                logger.info("[" + i++ + "/" + len + "]" + ",?" + cur);
                inputStream = fs.open(cur.getPath());

                tarOutputStream.putNextEntry(new HDFSTarEntry(cur, cur.getPath().getName()));
                org.apache.commons.io.IOUtils.copyLarge(inputStream, tarOutputStream);
                inputStream.close();

            }
            tarOutputStream.flush();
            tarOutputStream.close();
            return 200;
        } else
            return 400;

    } catch (Exception e) {
        e.printStackTrace();
        return 500;

    }
}

From source file:streaming.core.DownloadRunner.java

License:Apache License

public static int getRawFileByPath(HttpServletResponse res, String path, long position) {

    try {//  w ww.  j  a va 2s  . co m
        FileSystem fs = FileSystem.get(new Configuration());

        Path p = new Path(path);
        if (fs.exists(p)) {

            List<FileStatus> files = new ArrayList<FileStatus>();

            //
            if (fs.isFile(p)) {
                files.add(fs.getFileStatus(p));
            } else if (fs.isDirectory(p)) {

                FileStatus[] fileStatusArr = fs.listStatus(p);
                if (fileStatusArr != null && fileStatusArr.length > 0) {

                    for (FileStatus cur : fileStatusArr) {
                        files.add(cur);
                    }
                }
            }

            //??
            if (files.size() > 0) {

                logger.info(path + "" + files.size());

                FSDataInputStream inputStream = null;
                OutputStream outputStream = res.getOutputStream();

                int len = files.size();
                int i = 1;
                long allPosition = 0;
                for (FileStatus cur : files) {

                    logger.info("[" + i++ + "/" + len + "]" + path + ",?" + cur);
                    inputStream = fs.open(cur.getPath());

                    if (position > 0) {

                        if (allPosition + cur.getLen() > position) {
                            inputStream.seek(position - allPosition);
                            logger.info("seek position " + (position - allPosition));
                            position = -1;
                        }
                        allPosition += cur.getLen();
                    }
                    org.apache.commons.io.IOUtils.copyLarge(inputStream, outputStream);
                    inputStream.close();

                }
                outputStream.flush();
                outputStream.close();
                return 200;

            } else {
                logger.info(path + "" + files.size());
            }

        } else {

            return 400;
        }

    } catch (Exception e) {
        e.printStackTrace();

    }

    return 500;
}

From source file:uk.bl.wa.hadoop.mapreduce.hash.HdfsFileHasher.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    // Options://from w w  w  . j av a  2 s  . c  o m
    String[] otherArgs = new GenericOptionsParser(args).getRemainingArgs();

    // Process remaining args list this:
    Options options = new Options();
    options.addOption("i", true, "a local file containing a list of HDFS paths to process");
    options.addOption("o", true, "output directory");
    options.addOption("m", false, "use MD5 rather than SHA-512");
    options.addOption("r", true, "number of reducers (defaults to 1)");

    CommandLineParser parser = new PosixParser();
    CommandLine cmd = parser.parse(options, otherArgs);
    if (!cmd.hasOption("i") || !cmd.hasOption("o")) {
        HelpFormatter helpFormatter = new HelpFormatter();
        helpFormatter.setWidth(80);
        helpFormatter.printHelp(CLI_USAGE, CLI_HEADER, options, "");
        System.exit(1);
    }
    String input_file = cmd.getOptionValue("i");
    String output_path = cmd.getOptionValue("o");
    String algorithm = null;
    int numReducers = 1;
    if (cmd.hasOption("m")) {
        algorithm = "MD5";
    }
    if (cmd.hasOption("r")) {
        numReducers = Integer.parseInt(cmd.getOptionValue("r"));
    }

    // When implementing tool, choose algorithm:
    Configuration conf = this.getConf();
    if (algorithm != null)
        conf.set(MessageDigestMapper.CONFIG_DIGEST_ALGORITHM, algorithm);

    // Create job
    Job job = new Job(conf, "HDFS File Checksummer");
    job.setJarByClass(HdfsFileHasher.class);

    // Setup MapReduce job
    // Do not specify the number of Reducer
    job.setMapperClass(MessageDigestMapper.class);
    job.setReducerClass(Reducer.class);

    // Just one output file:
    job.setNumReduceTasks(numReducers);

    // Specify key / value
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    // Input
    log.info("Reading input files...");
    String line = null;
    long line_count = 0;
    BufferedReader br = new BufferedReader(new FileReader(input_file));
    while ((line = br.readLine()) != null) {
        if (StringUtils.isEmpty(line))
            continue;
        //
        line_count++;
        Path path = new Path(line);
        FileSystem fs = path.getFileSystem(conf);
        if (fs.isFile(path)) {
            FileInputFormat.addInputPath(job, path);
        } else if (fs.isDirectory(path)) {
            FileStatus[] listing = fs.listStatus(path);
            int list_count = 0;
            for (FileStatus fstat : listing) {
                list_count++;
                log.info("Checking " + list_count + "/" + listing.length + " " + fstat.getPath());
                if (!fstat.isDir()) {
                    FileInputFormat.addInputPath(job, fstat.getPath());
                }
            }
        }
    }
    br.close();
    log.info("Read " + FileInputFormat.getInputPaths(job).length + " input files from " + line_count
            + " paths.");
    job.setInputFormatClass(UnsplittableInputFileFormat.class);

    // Output
    FileOutputFormat.setOutputPath(job, new Path(output_path));
    job.setOutputFormatClass(TextOutputFormat.class);

    // Execute job and return status
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:voldemort.store.readonly.fetcher.HdfsFetcher.java

License:Apache License

private boolean fetch(FileSystem fs, Path source, File dest, CopyStats stats) throws IOException {
    if (!fs.isFile(source)) {
        Utils.mkdirs(dest);//  w w  w.j a va  2s  .c  o m
        FileStatus[] statuses = fs.listStatus(source);
        if (statuses != null) {
            // sort the files so that index files come last. Maybe
            // this will help keep them cached until the swap
            Arrays.sort(statuses, new IndexFileLastComparator());
            byte[] origCheckSum = null;
            CheckSumType checkSumType = CheckSumType.NONE;

            // Do a checksum of checksum - Similar to HDFS
            CheckSum checkSumGenerator = null;
            CheckSum fileCheckSumGenerator = null;

            for (FileStatus status : statuses) {

                // Kept for backwards compatibility
                if (status.getPath().getName().contains("checkSum.txt")) {

                    // Ignore old checksum files

                } else if (status.getPath().getName().contains(".metadata")) {

                    logger.debug("Reading .metadata");
                    // Read metadata into local file
                    File copyLocation = new File(dest, status.getPath().getName());
                    copyFileWithCheckSum(fs, status.getPath(), copyLocation, stats, null);

                    // Open the local file to initialize checksum
                    ReadOnlyStorageMetadata metadata;
                    try {
                        metadata = new ReadOnlyStorageMetadata(copyLocation);
                    } catch (IOException e) {
                        logger.error("Error reading metadata file ", e);
                        throw new VoldemortException(e);
                    }

                    // Read checksum
                    String checkSumTypeString = (String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM_TYPE);
                    String checkSumString = (String) metadata.get(ReadOnlyStorageMetadata.CHECKSUM);

                    if (checkSumTypeString != null && checkSumString != null) {

                        try {
                            origCheckSum = Hex.decodeHex(checkSumString.toCharArray());
                        } catch (DecoderException e) {
                            logger.error("Exception reading checksum file. Ignoring checksum ", e);
                            continue;
                        }

                        logger.debug("Checksum from .metadata " + new String(Hex.encodeHex(origCheckSum)));
                        checkSumType = CheckSum.fromString(checkSumTypeString);
                        checkSumGenerator = CheckSum.getInstance(checkSumType);
                        fileCheckSumGenerator = CheckSum.getInstance(checkSumType);
                    }

                } else if (!status.getPath().getName().startsWith(".")) {

                    // Read other (.data , .index files)
                    File copyLocation = new File(dest, status.getPath().getName());
                    copyFileWithCheckSum(fs, status.getPath(), copyLocation, stats, fileCheckSumGenerator);

                    if (fileCheckSumGenerator != null && checkSumGenerator != null) {
                        byte[] checkSum = fileCheckSumGenerator.getCheckSum();
                        logger.debug("Checksum for " + status.getPath() + " - "
                                + new String(Hex.encodeHex(checkSum)));
                        checkSumGenerator.update(checkSum);
                    }
                }

            }

            logger.info(
                    "Completed reading all files from " + source.toString() + " to " + dest.getAbsolutePath());
            // Check checksum
            if (checkSumType != CheckSumType.NONE) {
                byte[] newCheckSum = checkSumGenerator.getCheckSum();
                boolean checkSumComparison = (ByteUtils.compare(newCheckSum, origCheckSum) == 0);

                logger.info("Checksum generated from streaming - " + new String(Hex.encodeHex(newCheckSum)));
                logger.info("Checksum on file - " + new String(Hex.encodeHex(origCheckSum)));
                logger.info("Check-sum verification - " + checkSumComparison);

                return checkSumComparison;
            } else {
                logger.info("No check-sum verification required");
                return true;
            }
        }
    }
    logger.error("Source " + source.toString() + " should be a directory");
    return false;

}

From source file:voldemort.store.readonly.mr.utils.AvroUtils.java

License:Apache License

/**
 * Pull the schema off of the given file (if it is a file). If it is a
 * directory, then pull schemas off of all subfiles, and check that they are
 * all the same schema. If so, return that schema, otherwise throw an
 * exception//from   www.  j  ava2 s. co  m
 * 
 * @param fs The filesystem to use
 * @param path The path from which to get the schema
 * @param checkSameSchema boolean flag to check all files in directory for
 *        same schema
 * @return The schema of this file or all its subfiles
 * @throws IOException
 */

@SuppressWarnings({ "unchecked", "rawtypes" })
private static Schema getSchemaFromPath(FileSystem fs, Path path, boolean checkSameSchema) {

    try {
        if (fs.isFile(path)) {
            BufferedInputStream inStream = null;
            try {
                inStream = new BufferedInputStream(fs.open(path));
            } catch (IOException e1) {
                // TODO Auto-generated catch block
                e1.printStackTrace();
            }
            GenericDatumReader datum = new GenericDatumReader();

            DataFileStream reader = null;
            try {
                reader = new DataFileStream(inStream, datum);
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
            return reader.getSchema();
        } else {
            FileStatus[] statuses = null;
            if (fs.isDirectory(path)) {
                // this is a directory, get schemas from all subfiles
                statuses = fs.listStatus(path);
            } else {
                // this is wildcard path, get schemas from all matched files
                statuses = fs.globStatus(path);
            }
            if (statuses == null || statuses.length == 0)
                throw new IllegalArgumentException("No files found in path pattern " + path.toUri().getPath());
            List<Schema> schemas = new ArrayList<Schema>();
            for (FileStatus status : statuses) {
                if (!HadoopUtils.shouldPathBeIgnored(status.getPath())) {
                    if (!checkSameSchema) {
                        // return first valid schema w/o checking all files
                        return getSchemaFromPath(fs, status.getPath(), checkSameSchema);
                    }
                    schemas.add(getSchemaFromPath(fs, status.getPath(), checkSameSchema));
                }
            }

            // now check that all the schemas are the same
            if (schemas.size() > 0) {
                Schema schema = schemas.get(0);
                for (int i = 1; i < schemas.size(); i++)
                    if (!schema.equals(schemas.get(i)))
                        throw new IllegalArgumentException("The directory " + path.toString()
                                + " contains heterogenous schemas: found both '" + schema.toString() + "' and '"
                                + schemas.get(i).toString() + "'.");

                return schema;
            } else {
                throw new IllegalArgumentException("No Valid metadata file found for Path:" + path.toString());
            }
        }
    } catch (Exception e) {
        // logger.error("failed to get metadata from path:" + path);
        throw new RuntimeException(e);
    }

}