Example usage for org.apache.hadoop.fs FileStatus getPath

List of usage examples for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath() 

Source Link

Usage

From source file:com.conductor.s3.S3InputFormatUtils.java

License:Apache License

/**
 * Converts the {@link org.apache.hadoop.fs.FileStatus}s to {@link org.apache.hadoop.mapred.InputSplit}s (MRV1 API).
 * <p>//from   ww w.  j a  v a 2  s . c om
 * This is taken directly from {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, less any file system
 * operations that do not make sense when using {@code S3}.
 * 
 * @param files
 *            the files to convert
 * @param minSize
 *            the minimum size of the splits
 * @param maxSize
 *            the maximum size of the splits
 * @return the splits.
 */
static List<InputSplit> convertToInputSplitsMRV1(final Iterable<FileStatus> files, final long minSize,
        final long maxSize) {
    final List<InputSplit> splits = Lists.newArrayList();
    for (final FileStatus file : files) {
        // check for valid data for this input format
        checkArgument(!file.isDirectory(), "Cannot pass directories to this method!");
        final String path = file.getPath().toString();
        checkArgument(path.startsWith("s3:") || path.startsWith("s3n:"), "Expected S3 input");

        // create splits out of file
        final long length = file.getLen();
        if (length > 0) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                splits.add(new FileSplit(file.getPath(), length - bytesRemaining, splitSize, S3_SPLIT_HOST));
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                splits.add(
                        new FileSplit(file.getPath(), length - bytesRemaining, bytesRemaining, S3_SPLIT_HOST));
            }
        }
    }
    return splits;
}

From source file:com.constellio.sdk.tests.FactoriesTestFeatures.java

private void deleteFromHadoop(String user, String url) {
    System.setProperty("HADOOP_USER_NAME", user);
    Configuration hadoopConfig = new Configuration();

    if (url == null || user == null) {
        throw new RuntimeException("No config");
    }//from   www.j  a va2  s  . c o m

    hadoopConfig.set("fs.defaultFS", url);
    hadoopConfig.set("hadoop.job.ugi", user);

    try {
        FileSystem hdfs = FileSystem.get(hadoopConfig);
        for (FileStatus file : hdfs.globStatus(new Path("*"))) {
            hdfs.delete(file.getPath(), true);
        }

    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.conversantmedia.mapreduce.example.distribute.DistributedObjectExample.java

License:Apache License

/**
 * Distributed {@link Set} containing the prepared list of blacklisted words.
 * //from   w  w  w .ja v  a  2 s.  c  o m
 * @return Set         the set of blacklisted words
 * @throws IOException   if it fails to read in the file
 */
@Distribute
public Set<String> getBlacklist() throws IOException {
    Set<String> blacklist = null;
    if (StringUtils.isNotBlank(context.blacklist)) {
        blacklist = new HashSet<>();
        InputStreamReader reader = null;
        try {
            FileSystem fs = FileSystem.get(new Configuration());
            FileStatus file = fs.getFileStatus(new Path(context.blacklist));
            reader = new InputStreamReader(fs.open(file.getPath()));
            for (String line : IOUtils.readLines(reader)) {
                blacklist.add(line);
            }
        } finally {
            IOUtils.closeQuietly(reader);
        }
    }
    return blacklist;
}

From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java

License:Apache License

@DriverInit
public void copyFilesToWorking() throws IOException {
    // Copy the input files into the 'workingDir'
    FileSystem fs = FileSystem.get(getConf());

    this.workingDirectory = new Path("/tmp/" + UUID.randomUUID().toString());
    fs.mkdirs(workingDirectory);//from ww  w  .  j av  a  2 s.c  o m

    FileStatus[] files = fs.globStatus(new Path(context.getInput()));
    for (FileStatus file : files) {
        Path dest = new Path(workingDirectory, file.getPath().getName());
        FileUtil.copy(fs, file.getPath(), fs, dest, false, getConf());
    }
}

From source file:com.conversantmedia.mapreduce.io.avro.CombineAvroKeyFileInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    // Get the list from our parent...
    List<FileStatus> result = new ArrayList<>();

    // Loop through and remove any that are empty
    for (FileStatus file : super.listStatus(job)) {
        if (file.getLen() < 1) {
            logger().error("Skipping Empty file: " + file.getPath());
        } else {// w  w w .  j  a  va2  s . c om
            result.add(file);
        }
    }

    return result;
}

From source file:com.conversantmedia.mapreduce.tool.BaseTool.java

License:Apache License

/**
 * Moves our inputs into the 'archive' path for
 * long term storage, or perhaps further processing.
 * @param context      the job's driver context bean
 * @throws IOException   if the inputs cannot be moved to
 *          the archive path./*from w w w .  ja v  a2s. co m*/
 */
protected void archiveInputs(T context) throws IOException {
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(context.getArchive());

    for (Path input : context.getInput()) {
        List<FileStatus> status = getInputFiles(input);
        for (FileStatus file : status) {
            Path dest = new Path(context.getArchive(), file.getPath().getName());
            fs.rename(file.getPath(), dest);
            logger().debug("Moved [" + input + "] to [" + dest + "]");
        }
    }
}

From source file:com.curiousby.baoyou.cn.hadoop.HDFSUtils.java

License:Open Source License

/**
 * ?/*w  w w.j ava 2 s  .  c om*/
 * @param dirPath
 */
public List<String> dir(String dirPath) throws IOException {
    List<String> fileList = null;
    Path path = new Path(dirPath);
    if (fileSystem.exists(path)) {
        fileList = new ArrayList<String>();
        FileStatus[] list = this.getFileStatus(path);
        for (FileStatus fileStatus : list) {
            fileList.add(fileStatus.getPath().toString());
        }
    } else {
        System.out.println("?");
    }
    return fileList;
}

From source file:com.dasasian.chok.operation.master.IndexDeployOperation.java

License:Apache License

protected static List<Shard> readShardsFromFs(final String indexName, final String indexPathString)
        throws IndexDeployException {
    // get shard folders from source
    URI uri;/*from w  w w . jav  a  2 s.  co m*/
    try {
        uri = new URI(indexPathString);
    } catch (final URISyntaxException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to parse index path uri '"
                + indexPathString + "', make sure it starts with file:// or hdfs:// ", e);
    }
    FileSystem fileSystem;
    try {
        fileSystem = HadoopUtil.getFileSystem(new Path(uri.toString()));
    } catch (final IOException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                "unable to retrive file system for index path '" + indexPathString
                        + "', make sure your path starts with hadoop support prefix like file:// or hdfs://",
                e);
    }

    List<Shard> shards = new ArrayList<>();
    try {
        final Path indexPath = new Path(indexPathString);
        if (!fileSystem.exists(indexPath)) {
            throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                    "index path '" + uri + "' does not exists");
        }
        final FileStatus[] listStatus = fileSystem.listStatus(indexPath, new PathFilter() {
            public boolean accept(final Path aPath) {
                return !aPath.getName().startsWith(".");
            }
        });
        for (final FileStatus fileStatus : listStatus) {
            String shardPath = fileStatus.getPath().toString();
            if (fileStatus.isDir() || shardPath.endsWith(".zip")) {
                shards.add(new Shard(createShardName(indexName, shardPath), shardPath));
            }
        }
    } catch (final IOException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                "could not access index path: " + indexPathString, e);
    }

    if (shards.size() == 0) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index does not contain any shard");
    }
    return shards;
}

From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java

License:Apache License

public static void createTables(String database, TajoClient client) throws Exception {
    String dataDir = getDataDir();
    if (dataDir == null || dataDir.isEmpty()) {
        throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>");
    }/*from   w w  w  .  j  a  va2  s  .co m*/

    if (dataDir.startsWith("hdfs://")) {
        Path path = new Path(dataDir);
        FileSystem fs = path.getFileSystem(new Configuration());
        for (String eachTable : tableNames) {
            Path tableDataDir = new Path(path, eachTable);
            if (!fs.exists(tableDataDir)) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    } else {
        File dataDirFile = new File(dataDir);
        if (!dataDirFile.exists()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists.");
        }
        if (dataDirFile.isFile()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory.");
        }

        for (String eachTable : tableNames) {
            File tableDataDir = new File(dataDirFile, eachTable);
            if (!tableDataDir.exists()) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    }

    KeyValueSet opt = new KeyValueSet();
    opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

    LOG.info("Create database: " + database);
    client.executeQuery("create database if not exists " + database);

    Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString());

    Path ddlPath = new Path(tpcdsResourceURL, "ddl");
    FileSystem localFs = FileSystem.getLocal(new Configuration());

    FileStatus[] files = localFs.listStatus(ddlPath);

    String dataDirWithPrefix = dataDir;
    if (dataDir.indexOf("://") < 0) {
        dataDirWithPrefix = "file://" + dataDir;
    }

    for (FileStatus eachFile : files) {
        if (eachFile.isFile()) {
            String tableName = eachFile.getPath().getName().split("\\.")[0];
            String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri()));
            query = query.replace("${DB}", database);
            query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName);

            LOG.info("Create table:" + tableName + "," + query);
            client.executeQuery(query);
        }
    }
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

/**
 * Write a file to a zip output stream, removing leading path name components from the actual file name when creating
 * the zip file entry./*from   w w  w . j  a v  a2s  .c  o  m*/
 * 
 * The entry placed in the zip file is <code>baseName</code>/ <code>relativePath</code>, where
 * <code>relativePath</code> is constructed by removing a leading <code>root</code> from the path for
 * <code>itemToZip</code>.
 * 
 * If <code>itemToZip</code> is an empty directory, it is ignored. If <code>itemToZip</code> is a directory, the
 * contents of the directory are added recursively.
 * 
 * @param zos
 *          The zip output stream
 * @param baseName
 *          The base name to use for the file name entry in the zip file
 * @param root
 *          The path to remove from <code>itemToZip</code> to make a relative path name
 * @param itemToZip
 *          The path to the file to be added to the zip file
 * @return the number of entries added
 * @throws IOException
 */
static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName,
        final String root, final Path itemToZip) throws IOException {
    LOG.info(String.format("zipDirectory: %s %s %s", baseName, root, itemToZip));
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    int count = 0;

    final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
    if (itemStatus.isDir()) {
        final FileStatus[] statai = localFs.listStatus(itemToZip);

        // Add a directory entry to the zip file
        final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);
        final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR);
        LOG.info(String.format("Adding directory %s to zip", zipDirName));
        zos.putNextEntry(dirZipEntry);
        zos.closeEntry();
        count++;

        if (statai == null || statai.length == 0) {
            LOG.info(String.format("Skipping empty directory %s", itemToZip));
            return count;
        }
        for (FileStatus status : statai) {
            count += zipDirectory(conf, zos, baseName, root, status.getPath());
        }
        LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip));
        return count;
    }

    final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);

    if (inZipPath.length() == 0) {
        LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName));
        return 0;
    }

    // Take empty files in case the place holder is needed
    FSDataInputStream in = null;
    try {
        in = localFs.open(itemToZip);
        final ZipEntry ze = new ZipEntry(inZipPath);
        ze.setTime(itemStatus.getModificationTime());
        // Comments confuse looking at the zip file
        // ze.setComment(itemToZip.toString());
        zos.putNextEntry(ze);

        IOUtils.copyBytes(in, zos, conf, false);
        zos.closeEntry();
        LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
        return 1;
    } finally {
        in.close();
    }

}