Example usage for org.apache.hadoop.fs Path toString

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toString.

Prototype

@Override
    public String toString()

Source Link

Usage

From source file:cascading.tap.hadoop.io.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*from  ww  w  . j  av a 2  s. c o  m*/

    if (LOG.isDebugEnabled()) {
        LOG.debug("current split config diff:");
        for (Map.Entry<String, String> entry : config.entrySet())
            LOG.debug("key: {}, value: {}", entry.getKey(), entry.getValue());
    }

    JobConf currentConf = HadoopUtil.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set(CASCADING_SOURCE_PATH, path.toString());

            LOG.info("current split input path: {}", path);
        }
    }
}

From source file:cascading.tap.hadoop.io.StreamedFileSystem.java

License:Open Source License

public static String getMD5SumFor(Configuration conf, Path path) {
    return getMD5SumFor(conf, path.toString());
}

From source file:cascading.tap.hadoop.io.StreamedFileSystem.java

License:Open Source License

public static void setMD5SumFor(Configuration conf, Path path, String md5Hex) {
    setMD5SumFor(conf, path.toString(), md5Hex);
}

From source file:cascading.tap.hadoop.MultiInputSplit.java

License:Open Source License

public void readFields(DataInput in) throws IOException {
    String splitType = in.readUTF();
    config = new HashMap<String, String>();

    String[] keys = WritableUtils.readStringArray(in);
    String[] values = WritableUtils.readStringArray(in);

    for (int i = 0; i < keys.length; i++)
        config.put(keys[i], values[i]);/*w ww  .j  a va 2s  .c o  m*/

    JobConf currentConf = MultiInputFormat.mergeConf(jobConf, config, false);

    try {
        inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType),
                currentConf);
    } catch (ClassNotFoundException exp) {
        throw new IOException("split class " + splitType + " not found");
    }

    inputSplit.readFields(in);

    if (inputSplit instanceof FileSplit) {
        Path path = ((FileSplit) inputSplit).getPath();

        if (path != null) {
            jobConf.set("cascading.source.path", path.toString());

            if (LOG.isInfoEnabled())
                LOG.info("current split input path: " + path.toString());
        }
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

/**
 * copies all files from the taskoutputpath to the outputpath
 *
 * @param conf//from   w w w .  j  av  a  2  s  .  co m
 */
public static void commitTask(Configuration conf) throws IOException {
    Path taskOutputPath = new Path(conf.get("mapred.work.output.dir"));

    FileSystem fs = getFSSafe(conf, taskOutputPath);

    if (fs == null)
        return;

    AtomicInteger integer = pathCounts.get(taskOutputPath.toString());

    if (integer.decrementAndGet() != 0)
        return;

    String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id"));

    LOG.info("committing task: '{}' - {}", taskId, taskOutputPath);

    if (taskOutputPath != null) {
        if (writeDirectlyToWorkingPath(conf, taskOutputPath))
            return;

        if (fs.exists(taskOutputPath)) {
            Path jobOutputPath = taskOutputPath.getParent().getParent();
            // Move the task outputs to their final place
            moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath);

            // Delete the temporary task-specific output directory
            if (!fs.delete(taskOutputPath, true))
                LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId,
                        taskOutputPath);

            LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath);
        }
    }
}

From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java

License:Open Source License

static void setWorkOutputPath(Configuration conf, Path outputDir) {
    outputDir = new Path(asJobConfInstance(conf).getWorkingDirectory(), outputDir);
    conf.set("mapred.work.output.dir", outputDir.toString());
}

From source file:cascading.tap.hadoop.ZipInputFormat.java

License:Open Source License

/**
 * Return true only if the file is in ZIP format.
 *
 * @param fs   the file system that the file is on
 * @param file the path that represents this file
 * @return is this file splitable?//from w ww  . jav  a 2s. c  o m
 */
protected boolean isSplitable(FileSystem fs, Path file) {
    if (!isAllowSplits(fs))
        return false;

    if (LOG.isDebugEnabled())
        LOG.debug("verifying ZIP format for file: " + file.toString());

    boolean splitable = true;
    ZipInputStream zipInputStream = null;

    try {
        zipInputStream = new ZipInputStream(fs.open(file));
        ZipEntry zipEntry = zipInputStream.getNextEntry();

        if (zipEntry == null)
            throw new IOException("no entries found, empty zip file");

        if (LOG.isDebugEnabled())
            LOG.debug("ZIP format verification successful");
    } catch (IOException exception) {
        LOG.error("exception encountered while trying to open and read ZIP input stream", exception);
        splitable = false;
    } finally {
        safeClose(zipInputStream);
    }

    return splitable;
}

From source file:cascading.tap.hadoop.ZipInputFormat.java

License:Open Source License

/**
 * Splits files returned by {@link #listPathsInternal(JobConf)}. Each file is
 * expected to be in zip format and each split corresponds to
 * {@link ZipEntry}.//w  w w.java  2  s. c o  m
 *
 * @param job       the JobConf data structure, see {@link JobConf}
 * @param numSplits the number of splits required. Ignored here
 * @throws IOException if input files are not in zip format
 */
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    if (LOG.isDebugEnabled())
        LOG.debug("start splitting input ZIP files");

    Path[] files = listPathsInternal(job);

    for (int i = 0; i < files.length; i++) { // check we have valid files
        Path file = files[i];
        FileSystem fs = file.getFileSystem(job);

        if (!fs.isFile(file) || !fs.exists(file))
            throw new IOException("not a file: " + files[i]);
    }

    // generate splits
    ArrayList<ZipSplit> splits = new ArrayList<ZipSplit>(numSplits);

    for (int i = 0; i < files.length; i++) {
        Path file = files[i];
        FileSystem fs = file.getFileSystem(job);

        if (LOG.isDebugEnabled())
            LOG.debug("opening zip file: " + file.toString());

        if (isAllowSplits(fs))
            makeSplits(job, splits, fs, file);
        else
            makeSplit(job, splits, file);
    }

    if (LOG.isDebugEnabled())
        LOG.debug("end splitting input ZIP files");

    return splits.toArray(new ZipSplit[splits.size()]);
}

From source file:cascading.tap.hive.HiveTableDescriptor.java

License:Open Source License

/**
 * Constructs a new HiveTableDescriptor object.
 *
 * @param databaseName     The database name.
 * @param tableName        The table name
 * @param columnNames      Names of the columns
 * @param columnTypes      Hive types of the columns
 * @param delimiter        The field delimiter of the Hive table
 * @param serializationLib Hive serialization library.
 */// w w w. j a  v a  2 s .c o  m
public HiveTableDescriptor(String databaseName, String tableName, String[] columnNames, String[] columnTypes,
        String[] partitionKeys, String delimiter, String serializationLib, Path location) {
    if (tableName == null || tableName.isEmpty())
        throw new IllegalArgumentException("tableName cannot be null or empty");
    if (databaseName == null || tableName.isEmpty())
        this.databaseName = HIVE_DEFAULT_DATABASE_NAME;
    else
        this.databaseName = databaseName.toLowerCase();
    this.tableName = tableName.toLowerCase();
    this.columnNames = columnNames;
    this.columnTypes = columnTypes;
    this.partitionKeys = partitionKeys;
    this.serializationLib = serializationLib;
    //Only set the delimiter if the serialization lib is Delimited.
    if (delimiter == null && this.serializationLib == HIVE_DEFAULT_SERIALIZATION_LIB_NAME)
        this.delimiter = HIVE_DEFAULT_DELIMITER;
    else
        this.delimiter = delimiter;
    if (isPartitioned())
        verifyPartitionKeys();
    if (columnNames.length == 0 || columnTypes.length == 0 || columnNames.length != columnTypes.length)
        throw new IllegalArgumentException(
                "columnNames and columnTypes cannot be empty and must have the same size");

    if (location != null) {
        if (!location.isAbsolute())
            throw new IllegalArgumentException("location must be a fully qualified absolute path");

        // Store as string since path is not serialisable
        this.location = location.toString();
    }
}

From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java

License:Apache License

public static void main(String[] args) throws IOException {
    Path p = new Path(args[0]);
    FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration());
    FileStatus[] listStatus = fileSystem.listStatus(p);
    for (FileStatus fileStatus : listStatus) {
        Path path = fileStatus.getPath();
        fileSystem.rename(path, new Path(path.toString() + ".lf"));
    }/*from   w  w w.  ja  v a 2s .  c o m*/
}