List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:cascading.tap.hadoop.io.MultiInputSplit.java
License:Open Source License
public void readFields(DataInput in) throws IOException { String splitType = in.readUTF(); config = new HashMap<String, String>(); String[] keys = WritableUtils.readStringArray(in); String[] values = WritableUtils.readStringArray(in); for (int i = 0; i < keys.length; i++) config.put(keys[i], values[i]);/*from ww w . j av a 2 s. c o m*/ if (LOG.isDebugEnabled()) { LOG.debug("current split config diff:"); for (Map.Entry<String, String> entry : config.entrySet()) LOG.debug("key: {}, value: {}", entry.getKey(), entry.getValue()); } JobConf currentConf = HadoopUtil.mergeConf(jobConf, config, false); try { inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType), currentConf); } catch (ClassNotFoundException exp) { throw new IOException("split class " + splitType + " not found"); } inputSplit.readFields(in); if (inputSplit instanceof FileSplit) { Path path = ((FileSplit) inputSplit).getPath(); if (path != null) { jobConf.set(CASCADING_SOURCE_PATH, path.toString()); LOG.info("current split input path: {}", path); } } }
From source file:cascading.tap.hadoop.io.StreamedFileSystem.java
License:Open Source License
public static String getMD5SumFor(Configuration conf, Path path) { return getMD5SumFor(conf, path.toString()); }
From source file:cascading.tap.hadoop.io.StreamedFileSystem.java
License:Open Source License
public static void setMD5SumFor(Configuration conf, Path path, String md5Hex) { setMD5SumFor(conf, path.toString(), md5Hex); }
From source file:cascading.tap.hadoop.MultiInputSplit.java
License:Open Source License
public void readFields(DataInput in) throws IOException { String splitType = in.readUTF(); config = new HashMap<String, String>(); String[] keys = WritableUtils.readStringArray(in); String[] values = WritableUtils.readStringArray(in); for (int i = 0; i < keys.length; i++) config.put(keys[i], values[i]);/*w ww .j a va 2s .c o m*/ JobConf currentConf = MultiInputFormat.mergeConf(jobConf, config, false); try { inputSplit = (InputSplit) ReflectionUtils.newInstance(currentConf.getClassByName(splitType), currentConf); } catch (ClassNotFoundException exp) { throw new IOException("split class " + splitType + " not found"); } inputSplit.readFields(in); if (inputSplit instanceof FileSplit) { Path path = ((FileSplit) inputSplit).getPath(); if (path != null) { jobConf.set("cascading.source.path", path.toString()); if (LOG.isInfoEnabled()) LOG.info("current split input path: " + path.toString()); } } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
/** * copies all files from the taskoutputpath to the outputpath * * @param conf//from w w w . j av a 2 s . co m */ public static void commitTask(Configuration conf) throws IOException { Path taskOutputPath = new Path(conf.get("mapred.work.output.dir")); FileSystem fs = getFSSafe(conf, taskOutputPath); if (fs == null) return; AtomicInteger integer = pathCounts.get(taskOutputPath.toString()); if (integer.decrementAndGet() != 0) return; String taskId = conf.get("mapred.task.id", conf.get("mapreduce.task.id")); LOG.info("committing task: '{}' - {}", taskId, taskOutputPath); if (taskOutputPath != null) { if (writeDirectlyToWorkingPath(conf, taskOutputPath)) return; if (fs.exists(taskOutputPath)) { Path jobOutputPath = taskOutputPath.getParent().getParent(); // Move the task outputs to their final place moveTaskOutputs(conf, fs, jobOutputPath, taskOutputPath); // Delete the temporary task-specific output directory if (!fs.delete(taskOutputPath, true)) LOG.info("failed to delete the temporary output directory of task: '{}' - {}", taskId, taskOutputPath); LOG.info("saved output of task '{}' to {}", taskId, jobOutputPath); } } }
From source file:cascading.tap.hadoop.util.Hadoop18TapUtil.java
License:Open Source License
static void setWorkOutputPath(Configuration conf, Path outputDir) { outputDir = new Path(asJobConfInstance(conf).getWorkingDirectory(), outputDir); conf.set("mapred.work.output.dir", outputDir.toString()); }
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
/** * Return true only if the file is in ZIP format. * * @param fs the file system that the file is on * @param file the path that represents this file * @return is this file splitable?//from w ww . jav a 2s. c o m */ protected boolean isSplitable(FileSystem fs, Path file) { if (!isAllowSplits(fs)) return false; if (LOG.isDebugEnabled()) LOG.debug("verifying ZIP format for file: " + file.toString()); boolean splitable = true; ZipInputStream zipInputStream = null; try { zipInputStream = new ZipInputStream(fs.open(file)); ZipEntry zipEntry = zipInputStream.getNextEntry(); if (zipEntry == null) throw new IOException("no entries found, empty zip file"); if (LOG.isDebugEnabled()) LOG.debug("ZIP format verification successful"); } catch (IOException exception) { LOG.error("exception encountered while trying to open and read ZIP input stream", exception); splitable = false; } finally { safeClose(zipInputStream); } return splitable; }
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
/** * Splits files returned by {@link #listPathsInternal(JobConf)}. Each file is * expected to be in zip format and each split corresponds to * {@link ZipEntry}.//w w w.java 2 s. c o m * * @param job the JobConf data structure, see {@link JobConf} * @param numSplits the number of splits required. Ignored here * @throws IOException if input files are not in zip format */ public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { if (LOG.isDebugEnabled()) LOG.debug("start splitting input ZIP files"); Path[] files = listPathsInternal(job); for (int i = 0; i < files.length; i++) { // check we have valid files Path file = files[i]; FileSystem fs = file.getFileSystem(job); if (!fs.isFile(file) || !fs.exists(file)) throw new IOException("not a file: " + files[i]); } // generate splits ArrayList<ZipSplit> splits = new ArrayList<ZipSplit>(numSplits); for (int i = 0; i < files.length; i++) { Path file = files[i]; FileSystem fs = file.getFileSystem(job); if (LOG.isDebugEnabled()) LOG.debug("opening zip file: " + file.toString()); if (isAllowSplits(fs)) makeSplits(job, splits, fs, file); else makeSplit(job, splits, file); } if (LOG.isDebugEnabled()) LOG.debug("end splitting input ZIP files"); return splits.toArray(new ZipSplit[splits.size()]); }
From source file:cascading.tap.hive.HiveTableDescriptor.java
License:Open Source License
/** * Constructs a new HiveTableDescriptor object. * * @param databaseName The database name. * @param tableName The table name * @param columnNames Names of the columns * @param columnTypes Hive types of the columns * @param delimiter The field delimiter of the Hive table * @param serializationLib Hive serialization library. */// w w w. j a v a 2 s .c o m public HiveTableDescriptor(String databaseName, String tableName, String[] columnNames, String[] columnTypes, String[] partitionKeys, String delimiter, String serializationLib, Path location) { if (tableName == null || tableName.isEmpty()) throw new IllegalArgumentException("tableName cannot be null or empty"); if (databaseName == null || tableName.isEmpty()) this.databaseName = HIVE_DEFAULT_DATABASE_NAME; else this.databaseName = databaseName.toLowerCase(); this.tableName = tableName.toLowerCase(); this.columnNames = columnNames; this.columnTypes = columnTypes; this.partitionKeys = partitionKeys; this.serializationLib = serializationLib; //Only set the delimiter if the serialization lib is Delimited. if (delimiter == null && this.serializationLib == HIVE_DEFAULT_SERIALIZATION_LIB_NAME) this.delimiter = HIVE_DEFAULT_DELIMITER; else this.delimiter = delimiter; if (isPartitioned()) verifyPartitionKeys(); if (columnNames.length == 0 || columnTypes.length == 0 || columnNames.length != columnTypes.length) throw new IllegalArgumentException( "columnNames and columnTypes cannot be empty and must have the same size"); if (location != null) { if (!location.isAbsolute()) throw new IllegalArgumentException("location must be a fully qualified absolute path"); // Store as string since path is not serialisable this.location = location.toString(); } }
From source file:cc.solr.lucene.store.hdfs.ChangeFileExt.java
License:Apache License
public static void main(String[] args) throws IOException { Path p = new Path(args[0]); FileSystem fileSystem = FileSystem.get(p.toUri(), new Configuration()); FileStatus[] listStatus = fileSystem.listStatus(p); for (FileStatus fileStatus : listStatus) { Path path = fileStatus.getPath(); fileSystem.rename(path, new Path(path.toString() + ".lf")); }/*from w w w. ja v a 2s . c o m*/ }