Example usage for org.apache.hadoop.fs FileStatus getPath

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileStatus getPath.

Prototype

public Path getPath()

Source Link

Usage

From source file:com.conductor.s3.S3InputFormatUtils.java

License:Apache License

/**
 * Converts the {@link org.apache.hadoop.fs.FileStatus}s to {@link org.apache.hadoop.mapred.InputSplit}s (MRV1 API).
 * <p>//from   ww w.  j a  v a 2  s . c om
 * This is taken directly from {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}, less any file system
 * operations that do not make sense when using {@code S3}.
 * 
 * @param files
 *            the files to convert
 * @param minSize
 *            the minimum size of the splits
 * @param maxSize
 *            the maximum size of the splits
 * @return the splits.
 */
static List<InputSplit> convertToInputSplitsMRV1(final Iterable<FileStatus> files, final long minSize,
        final long maxSize) {
    final List<InputSplit> splits = Lists.newArrayList();
    for (final FileStatus file : files) {
        // check for valid data for this input format
        checkArgument(!file.isDirectory(), "Cannot pass directories to this method!");
        final String path = file.getPath().toString();
        checkArgument(path.startsWith("s3:") || path.startsWith("s3n:"), "Expected S3 input");

        // create splits out of file
        final long length = file.getLen();
        if (length > 0) {
            long blockSize = file.getBlockSize();
            long splitSize = computeSplitSize(blockSize, minSize, maxSize);
            long bytesRemaining = length;
            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
                splits.add(new FileSplit(file.getPath(), length - bytesRemaining, splitSize, S3_SPLIT_HOST));
                bytesRemaining -= splitSize;
            }
            if (bytesRemaining != 0) {
                splits.add(
                        new FileSplit(file.getPath(), length - bytesRemaining, bytesRemaining, S3_SPLIT_HOST));
            }
        }
    }
    return splits;
}

From source file:com.constellio.sdk.tests.FactoriesTestFeatures.java

private void deleteFromHadoop(String user, String url) {
    System.setProperty("HADOOP_USER_NAME", user);
    Configuration hadoopConfig = new Configuration();

    if (url == null || user == null) {
        throw new RuntimeException("No config");
    }//from   www.j  a va2  s  . c o m

    hadoopConfig.set("fs.defaultFS", url);
    hadoopConfig.set("hadoop.job.ugi", user);

    try {
        FileSystem hdfs = FileSystem.get(hadoopConfig);
        for (FileStatus file : hdfs.globStatus(new Path("*"))) {
            hdfs.delete(file.getPath(), true);
        }

    } catch (IOException e) {
        throw new RuntimeException(e);
    }
}

From source file:com.conversantmedia.mapreduce.example.distribute.DistributedObjectExample.java

License:Apache License

/**
 * Distributed {@link Set} containing the prepared list of blacklisted words.
 * //from   w  w  w .ja v  a  2 s.  c  o m
 * @return Set         the set of blacklisted words
 * @throws IOException   if it fails to read in the file
 */
@Distribute
public Set<String> getBlacklist() throws IOException {
    Set<String> blacklist = null;
    if (StringUtils.isNotBlank(context.blacklist)) {
        blacklist = new HashSet<>();
        InputStreamReader reader = null;
        try {
            FileSystem fs = FileSystem.get(new Configuration());
            FileStatus file = fs.getFileStatus(new Path(context.blacklist));
            reader = new InputStreamReader(fs.open(file.getPath()));
            for (String line : IOUtils.readLines(reader)) {
                blacklist.add(line);
            }
        } finally {
            IOUtils.closeQuietly(reader);
        }
    }
    return blacklist;
}

From source file:com.conversantmedia.mapreduce.example.PrepareInputsExample.java

License:Apache License

@DriverInit
public void copyFilesToWorking() throws IOException {
    // Copy the input files into the 'workingDir'
    FileSystem fs = FileSystem.get(getConf());

    this.workingDirectory = new Path("/tmp/" + UUID.randomUUID().toString());
    fs.mkdirs(workingDirectory);//from ww  w  .  j av  a  2 s.c  o m

    FileStatus[] files = fs.globStatus(new Path(context.getInput()));
    for (FileStatus file : files) {
        Path dest = new Path(workingDirectory, file.getPath().getName());
        FileUtil.copy(fs, file.getPath(), fs, dest, false, getConf());
    }
}

From source file:com.conversantmedia.mapreduce.io.avro.CombineAvroKeyFileInputFormat.java

License:Apache License

@Override
protected List<FileStatus> listStatus(JobContext job) throws IOException {
    // Get the list from our parent...
    List<FileStatus> result = new ArrayList<>();

    // Loop through and remove any that are empty
    for (FileStatus file : super.listStatus(job)) {
        if (file.getLen() < 1) {
            logger().error("Skipping Empty file: " + file.getPath());
        } else {// w  w w .  j  a  va2  s . c om
            result.add(file);
        }
    }

    return result;
}

From source file:com.conversantmedia.mapreduce.tool.BaseTool.java

License:Apache License

/**
 * Moves our inputs into the 'archive' path for
 * long term storage, or perhaps further processing.
 * @param context      the job's driver context bean
 * @throws IOException   if the inputs cannot be moved to
 *          the archive path./*from w w w .  ja v  a2s. co m*/
 */
protected void archiveInputs(T context) throws IOException {
    FileSystem fs = FileSystem.get(getConf());
    fs.mkdirs(context.getArchive());

    for (Path input : context.getInput()) {
        List<FileStatus> status = getInputFiles(input);
        for (FileStatus file : status) {
            Path dest = new Path(context.getArchive(), file.getPath().getName());
            fs.rename(file.getPath(), dest);
            logger().debug("Moved [" + input + "] to [" + dest + "]");
        }
    }
}

From source file:com.curiousby.baoyou.cn.hadoop.HDFSUtils.java

License:Open Source License

/**
 * ?/*w  w w.j ava 2 s  .  c om*/
 * @param dirPath
 */
public List<String> dir(String dirPath) throws IOException {
    List<String> fileList = null;
    Path path = new Path(dirPath);
    if (fileSystem.exists(path)) {
        fileList = new ArrayList<String>();
        FileStatus[] list = this.getFileStatus(path);
        for (FileStatus fileStatus : list) {
            fileList.add(fileStatus.getPath().toString());
        }
    } else {
        System.out.println("?");
    }
    return fileList;
}

From source file:com.dasasian.chok.operation.master.IndexDeployOperation.java

License:Apache License

protected static List<Shard> readShardsFromFs(final String indexName, final String indexPathString)
        throws IndexDeployException {
    // get shard folders from source
    URI uri;/*from w  w w . jav  a  2 s.  co m*/
    try {
        uri = new URI(indexPathString);
    } catch (final URISyntaxException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "unable to parse index path uri '"
                + indexPathString + "', make sure it starts with file:// or hdfs:// ", e);
    }
    FileSystem fileSystem;
    try {
        fileSystem = HadoopUtil.getFileSystem(new Path(uri.toString()));
    } catch (final IOException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                "unable to retrive file system for index path '" + indexPathString
                        + "', make sure your path starts with hadoop support prefix like file:// or hdfs://",
                e);
    }

    List<Shard> shards = new ArrayList<>();
    try {
        final Path indexPath = new Path(indexPathString);
        if (!fileSystem.exists(indexPath)) {
            throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                    "index path '" + uri + "' does not exists");
        }
        final FileStatus[] listStatus = fileSystem.listStatus(indexPath, new PathFilter() {
            public boolean accept(final Path aPath) {
                return !aPath.getName().startsWith(".");
            }
        });
        for (final FileStatus fileStatus : listStatus) {
            String shardPath = fileStatus.getPath().toString();
            if (fileStatus.isDir() || shardPath.endsWith(".zip")) {
                shards.add(new Shard(createShardName(indexName, shardPath), shardPath));
            }
        }
    } catch (final IOException e) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE,
                "could not access index path: " + indexPathString, e);
    }

    if (shards.size() == 0) {
        throw new IndexDeployException(ErrorType.INDEX_NOT_ACCESSIBLE, "index does not contain any shard");
    }
    return shards;
}

From source file:com.datamoin.tajo.tpcds.TpcDSTestUtil.java

License:Apache License

public static void createTables(String database, TajoClient client) throws Exception {
    String dataDir = getDataDir();
    if (dataDir == null || dataDir.isEmpty()) {
        throw new IOException("No TPCDS_DATA_DIR property. Use -DTPCDS_DATA_DIR=<data dir>");
    }/*from   w w  w  .  j  a  va2  s  .co m*/

    if (dataDir.startsWith("hdfs://")) {
        Path path = new Path(dataDir);
        FileSystem fs = path.getFileSystem(new Configuration());
        for (String eachTable : tableNames) {
            Path tableDataDir = new Path(path, eachTable);
            if (!fs.exists(tableDataDir)) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    } else {
        File dataDirFile = new File(dataDir);
        if (!dataDirFile.exists()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] not exists.");
        }
        if (dataDirFile.isFile()) {
            throw new IOException("TPCDS_DATA_DIR [" + dataDir + "] is not a directory.");
        }

        for (String eachTable : tableNames) {
            File tableDataDir = new File(dataDirFile, eachTable);
            if (!tableDataDir.exists()) {
                throw new IOException(eachTable + " data dir [" + tableDataDir + "] not exists.");
            }
        }
    }

    KeyValueSet opt = new KeyValueSet();
    opt.set(StorageConstants.TEXT_DELIMITER, StorageConstants.DEFAULT_FIELD_DELIMITER);

    LOG.info("Create database: " + database);
    client.executeQuery("create database if not exists " + database);

    Path tpcdsResourceURL = new Path(ClassLoader.getSystemResource("tpcds").toString());

    Path ddlPath = new Path(tpcdsResourceURL, "ddl");
    FileSystem localFs = FileSystem.getLocal(new Configuration());

    FileStatus[] files = localFs.listStatus(ddlPath);

    String dataDirWithPrefix = dataDir;
    if (dataDir.indexOf("://") < 0) {
        dataDirWithPrefix = "file://" + dataDir;
    }

    for (FileStatus eachFile : files) {
        if (eachFile.isFile()) {
            String tableName = eachFile.getPath().getName().split("\\.")[0];
            String query = FileUtil.readTextFile(new File(eachFile.getPath().toUri()));
            query = query.replace("${DB}", database);
            query = query.replace("${DATA_LOCATION}", dataDirWithPrefix + "/" + tableName);

            LOG.info("Create table:" + tableName + "," + query);
            client.executeQuery(query);
        }
    }
}

From source file:com.datasalt.pangool.solr.SolrRecordWriter.java

License:Apache License

/**
 * Write a file to a zip output stream, removing leading path name components from the actual file name when creating
 * the zip file entry./*from   w w  w . j  a v  a2s  .c  o  m*/
 * 
 * The entry placed in the zip file is <code>baseName</code>/ <code>relativePath</code>, where
 * <code>relativePath</code> is constructed by removing a leading <code>root</code> from the path for
 * <code>itemToZip</code>.
 * 
 * If <code>itemToZip</code> is an empty directory, it is ignored. If <code>itemToZip</code> is a directory, the
 * contents of the directory are added recursively.
 * 
 * @param zos
 *          The zip output stream
 * @param baseName
 *          The base name to use for the file name entry in the zip file
 * @param root
 *          The path to remove from <code>itemToZip</code> to make a relative path name
 * @param itemToZip
 *          The path to the file to be added to the zip file
 * @return the number of entries added
 * @throws IOException
 */
static public int zipDirectory(final Configuration conf, final ZipOutputStream zos, final String baseName,
        final String root, final Path itemToZip) throws IOException {
    LOG.info(String.format("zipDirectory: %s %s %s", baseName, root, itemToZip));
    LocalFileSystem localFs = FileSystem.getLocal(conf);
    int count = 0;

    final FileStatus itemStatus = localFs.getFileStatus(itemToZip);
    if (itemStatus.isDir()) {
        final FileStatus[] statai = localFs.listStatus(itemToZip);

        // Add a directory entry to the zip file
        final String zipDirName = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);
        final ZipEntry dirZipEntry = new ZipEntry(zipDirName + Path.SEPARATOR_CHAR);
        LOG.info(String.format("Adding directory %s to zip", zipDirName));
        zos.putNextEntry(dirZipEntry);
        zos.closeEntry();
        count++;

        if (statai == null || statai.length == 0) {
            LOG.info(String.format("Skipping empty directory %s", itemToZip));
            return count;
        }
        for (FileStatus status : statai) {
            count += zipDirectory(conf, zos, baseName, root, status.getPath());
        }
        LOG.info(String.format("Wrote %d entries for directory %s", count, itemToZip));
        return count;
    }

    final String inZipPath = relativePathForZipEntry(itemToZip.toUri().getPath(), baseName, root);

    if (inZipPath.length() == 0) {
        LOG.warn(String.format("Skipping empty zip file path for %s (%s %s)", itemToZip, root, baseName));
        return 0;
    }

    // Take empty files in case the place holder is needed
    FSDataInputStream in = null;
    try {
        in = localFs.open(itemToZip);
        final ZipEntry ze = new ZipEntry(inZipPath);
        ze.setTime(itemStatus.getModificationTime());
        // Comments confuse looking at the zip file
        // ze.setComment(itemToZip.toString());
        zos.putNextEntry(ze);

        IOUtils.copyBytes(in, zos, conf, false);
        zos.closeEntry();
        LOG.info(String.format("Wrote %d entries for file %s", count, itemToZip));
        return 1;
    } finally {
        in.close();
    }

}