List of usage examples for org.apache.hadoop.fs Path makeQualified
@InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" }) public Path makeQualified(URI defaultUri, Path workingDir)
From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java
License:Apache License
/** * Resolves the specified URIs by removing files that have been previously read. * * @throws KyloCatalogException if a data set option is invalid * @throws IOException if an I/O error occurs *//*from w w w . j a v a2 s.c o m*/ @Nonnull @SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" }) private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException { // Get configuration final Configuration conf = delegate.getHadoopConfiguration(client); final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK), SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null)); final Job job = Job.getInstance(conf); final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName); if (highWaterMarkValue != null) { try { HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue)); } catch (final NumberFormatException e) { throw new KyloCatalogException( "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e); } } final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE), SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null)); if (maxFileAge != null) { try { HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge)); } catch (final NumberFormatException e) { throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e); } } final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE), SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null)); if (minFileAge != null) { try { HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge)); } catch (final NumberFormatException e) { throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e); } } // Convert URIs to Paths final Path[] paths = new Path[uris.size()]; for (int i = 0; i < uris.size(); ++i) { final Path path = new Path(uris.get(i)); final FileSystem fs = path.getFileSystem(conf); paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory()); } HighWaterMarkInputFormat.setInputPaths(job, paths); // Get high water mark paths final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat(); final List<FileStatus> files = inputFormat.listStatus(job); client.setHighWaterMarks( Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark()))); // Return resolved paths final List<String> resolvedPaths = new ArrayList<>(files.size()); if (files.isEmpty()) { resolvedPaths.add("file:/dev/null"); } else { for (final FileStatus file : files) { resolvedPaths.add(file.getPath().toString()); } } return resolvedPaths; }
From source file:io.dstream.tez.utils.HdfsSerializerUtils.java
License:Apache License
/** * Will serialize object to HDFS returning its {@link Path}. * * @param source// w ww .j av a 2 s. c o m * @param fs * @param targetPath * @return */ public static Path serialize(Object source, FileSystem fs, Path targetPath) { Assert.notNull(targetPath, "'targetPath' must not be null"); Assert.notNull(fs, "'fs' must not be null"); Assert.notNull(source, "'source' must not be null"); Path resultPath = targetPath.makeQualified(fs.getUri(), fs.getWorkingDirectory()); OutputStream targetOutputStream = null; try { targetOutputStream = fs.create(targetPath); SerializationUtils.serialize(source, targetOutputStream); } catch (Exception e) { throw new IllegalStateException("Failed to serialize " + source + " to " + resultPath, e); } return resultPath; }
From source file:org.apache.accumulo.core.file.rfile.PrintInfo.java
License:Apache License
@Override public void execute(final String[] args) throws Exception { Opts opts = new Opts(); opts.parseArgs(PrintInfo.class.getName(), args); if (opts.files.isEmpty()) { System.err.println("No files were given"); System.exit(-1);/*w w w . j ava 2s . c o m*/ } Configuration conf = new Configuration(); for (String confFile : opts.configFiles) { log.debug("Adding Hadoop configuration file " + confFile); conf.addResource(new Path(confFile)); } FileSystem hadoopFs = FileSystem.get(conf); FileSystem localFs = FileSystem.getLocal(conf); LogHistogram kvHistogram = new LogHistogram(); KeyStats dataKeyStats = new KeyStats(); KeyStats indexKeyStats = new KeyStats(); for (String arg : opts.files) { Path path = new Path(arg); FileSystem fs; if (arg.contains(":")) fs = path.getFileSystem(conf); else { log.warn("Attempting to find file across filesystems. Consider providing URI instead of path"); fs = hadoopFs.exists(path) ? hadoopFs : localFs; // fall back to local } System.out.println( "Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString()); CachableBlockFile.Reader _rdr = new CachableBlockFile.Reader(fs, path, conf, null, null, SiteConfiguration.getInstance(DefaultConfiguration.getInstance())); Reader iter = new RFile.Reader(_rdr); MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer(); if (opts.vis || opts.hash) iter.registerMetrics(vmg); iter.printInfo(); System.out.println(); org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] { arg }); Map<String, ArrayList<ByteSequence>> localityGroupCF = null; if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats) { localityGroupCF = iter.getLocalityGroupCF(); FileSKVIterator dataIter; if (opts.useSample) { dataIter = iter.getSample(); if (dataIter == null) { System.out.println("ERROR : This rfile has no sample data"); return; } } else { dataIter = iter; } if (opts.keyStats) { FileSKVIterator indexIter = iter.getIndex(); while (indexIter.hasTop()) { indexKeyStats.add(indexIter.getTopKey()); indexIter.next(); } } for (Entry<String, ArrayList<ByteSequence>> cf : localityGroupCF.entrySet()) { dataIter.seek(new Range((Key) null, (Key) null), cf.getValue(), true); while (dataIter.hasTop()) { Key key = dataIter.getTopKey(); Value value = dataIter.getTopValue(); if (opts.dump) { System.out.println(key + " -> " + value); if (System.out.checkError()) return; } if (opts.histogram) { kvHistogram.add(key.getSize() + value.getSize()); } if (opts.keyStats) { dataKeyStats.add(key); } dataIter.next(); } } } iter.close(); if (opts.vis || opts.hash) { System.out.println(); vmg.printMetrics(opts.hash, "Visibility", System.out); } if (opts.histogram) { System.out.println(); kvHistogram.print(""); } if (opts.keyStats) { System.out.println(); System.out.println("Statistics for keys in data :"); dataKeyStats.print("\t"); System.out.println(); System.out.println("Statistics for keys in index :"); indexKeyStats.print("\t"); } // If the output stream has closed, there is no reason to keep going. if (System.out.checkError()) return; } }
From source file:org.apache.accumulo.server.util.TabletOperations.java
License:Apache License
public static String createTabletDirectory(VolumeManager fs, String tableId, Text endRow) { String lowDirectory;/*from w w w . j a va2 s. c o m*/ UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); String volume = fs.choose(ServerConstants.getTablesDirs()); while (true) { try { if (endRow == null) { lowDirectory = Constants.DEFAULT_TABLET_LOCATION; Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory); if (fs.exists(lowDirectoryPath) || fs.mkdirs(lowDirectoryPath)) { FileSystem pathFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem(); return lowDirectoryPath.makeQualified(pathFs.getUri(), pathFs.getWorkingDirectory()) .toString(); } log.warn("Failed to create " + lowDirectoryPath + " for unknown reason"); } else { lowDirectory = "/" + Constants.GENERATED_TABLET_DIRECTORY_PREFIX + namer.getNextName(); Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory); if (fs.exists(lowDirectoryPath)) throw new IllegalStateException("Dir exist when it should not " + lowDirectoryPath); if (fs.mkdirs(lowDirectoryPath)) { FileSystem lowDirectoryFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem(); return lowDirectoryPath .makeQualified(lowDirectoryFs.getUri(), lowDirectoryFs.getWorkingDirectory()) .toString(); } } } catch (IOException e) { log.warn(e); } log.warn("Failed to create dir for tablet in table " + tableId + " in volume " + volume + " + will retry ..."); UtilWaitThread.sleep(3000); } }
From source file:org.apache.accumulo.tserver.tablet.Tablet.java
License:Apache License
private static String createTabletDirectory(VolumeManager fs, String tableId, Text endRow) { String lowDirectory;//from w w w . j av a 2s. c o m UniqueNameAllocator namer = UniqueNameAllocator.getInstance(); String volume = fs.choose(Optional.of(tableId), ServerConstants.getBaseUris()) + Constants.HDFS_TABLES_DIR + Path.SEPARATOR; while (true) { try { if (endRow == null) { lowDirectory = Constants.DEFAULT_TABLET_LOCATION; Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory); if (fs.exists(lowDirectoryPath) || fs.mkdirs(lowDirectoryPath)) { FileSystem pathFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem(); return lowDirectoryPath.makeQualified(pathFs.getUri(), pathFs.getWorkingDirectory()) .toString(); } log.warn("Failed to create " + lowDirectoryPath + " for unknown reason"); } else { lowDirectory = "/" + Constants.GENERATED_TABLET_DIRECTORY_PREFIX + namer.getNextName(); Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory); if (fs.exists(lowDirectoryPath)) throw new IllegalStateException("Dir exist when it should not " + lowDirectoryPath); if (fs.mkdirs(lowDirectoryPath)) { FileSystem lowDirectoryFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem(); return lowDirectoryPath .makeQualified(lowDirectoryFs.getUri(), lowDirectoryFs.getWorkingDirectory()) .toString(); } } } catch (IOException e) { log.warn(e); } log.warn("Failed to create dir for tablet in table " + tableId + " in volume " + volume + " + will retry ..."); sleepUninterruptibly(3, TimeUnit.SECONDS); } }
From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java
License:Apache License
/** * Adds all the jars in the same path as the blur jar files. * /*from ww w . ja v a2s . c om*/ * @param conf * @throws IOException */ public static void addAllJarsInBlurLib(Configuration conf) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); jars.addAll(conf.getStringCollection("tmpjars")); String property = System.getProperty("java.class.path"); String[] files = property.split("\\:"); String blurLibPath = getPath("blur-", files); if (blurLibPath == null) { return; } List<String> pathes = getPathes(blurLibPath, files); for (String pathStr : pathes) { Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path); continue; } jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }
From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java
License:Apache License
/** * Add the jars containing the given classes to the job's configuration such * that JobClient will ship them to the cluster and add them to the * DistributedCache.// w ww . java 2 s . c o m */ public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException { FileSystem localFs = FileSystem.getLocal(conf); Set<String> jars = new HashSet<String>(); // Add jars that are already in the tmpjars variable jars.addAll(conf.getStringCollection("tmpjars")); // Add jars containing the specified classes for (Class<?> clazz : classes) { if (clazz == null) { continue; } String pathStr = findOrCreateJar(clazz); if (pathStr == null) { LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster."); continue; } Path path = new Path(pathStr); if (!localFs.exists(path)) { LOG.warn("Could not validate jar file " + path + " for class " + clazz); continue; } jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString()); } if (jars.isEmpty()) { return; } conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0]))); }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Configuration configuration = context.getConfiguration(); _autoGenerateRecordIdAsHashOfData = isAutoGenerateRecordIdAsHashOfData(configuration); _autoGenerateRowIdAsHashOfData = isAutoGenerateRowIdAsHashOfData(configuration); if (_autoGenerateRecordIdAsHashOfData || _autoGenerateRowIdAsHashOfData) { try {/*w w w . j a v a 2 s . c o m*/ _digest = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { throw new IOException(e); } } _columnNameMap = getFamilyAndColumnNameMap(configuration); _separator = new String(Base64.decodeBase64(configuration.get(BLUR_CSV_SEPARATOR_BASE64, _separator)), UTF_8); _splitter = Splitter.on(_separator); Path fileCurrentlyProcessing = getCurrentFile(context); Collection<String> families = configuration.getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES); OUTER: for (String family : families) { Collection<String> pathStrCollection = configuration .getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX + family); for (String pathStr : pathStrCollection) { Path path = new Path(pathStr); FileSystem fileSystem = path.getFileSystem(configuration); path = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); if (isParent(path, fileCurrentlyProcessing)) { _familyFromPath = family; _familyNotInFile = true; break OUTER; } } } }
From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java
License:Apache License
protected Path getCurrentFile(Context context) throws IOException { InputSplit split = context.getInputSplit(); if (split != null && split instanceof FileSplit) { FileSplit inputSplit = (FileSplit) split; Path path = inputSplit.getPath(); FileSystem fileSystem = path.getFileSystem(context.getConfiguration()); return path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory()); }/*from www . j a v a 2 s . c o m*/ return null; }
From source file:org.apache.falcon.entity.FileSystemStorage.java
License:Apache License
public String getUriTemplate(LocationType locationType, List<Location> locationList) { Location locationForType = null; for (Location location : locationList) { if (location.getType() == locationType) { locationForType = location;//from w ww . ja v a 2 s. c om break; } } if (locationForType == null || StringUtils.isEmpty(locationForType.getPath())) { return null; } // normalize the path so trailing and double '/' are removed Path locationPath = new Path(locationForType.getPath()); locationPath = locationPath.makeQualified(getDefaultUri(), getWorkingDir()); if (isRelativePath(locationPath)) { locationPath = new Path(storageUrl + locationPath); } return locationPath.toString(); }