Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@InterfaceAudience.LimitedPrivate({ "HDFS", "MapReduce" })
public Path makeQualified(URI defaultUri, Path workingDir) 

Source Link

Document

Returns a qualified path object.

Usage

From source file:com.thinkbiganalytics.kylo.catalog.spark.sources.spark.SparkDataSetContext.java

License:Apache License

/**
 * Resolves the specified URIs by removing files that have been previously read.
 *
 * @throws KyloCatalogException if a data set option is invalid
 * @throws IOException          if an I/O error occurs
 *//*from w w  w  .  j a v  a2 s.c o m*/
@Nonnull
@SuppressWarnings({ "squid:HiddenFieldCheck", "squid:S1192" })
private List<String> resolveHighWaterMarkPaths(@Nonnull final List<String> uris) throws IOException {
    // Get configuration
    final Configuration conf = delegate.getHadoopConfiguration(client);
    final String highWaterMarkName = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.HIGH_WATER_MARK),
            SparkUtil.getOrElse(getOption(HIGH_WATER_MARK_OPTION), null));
    final Job job = Job.getInstance(conf);

    final String highWaterMarkValue = client.getHighWaterMarks().get(highWaterMarkName);
    if (highWaterMarkValue != null) {
        try {
            HighWaterMarkInputFormat.setHighWaterMark(job, Long.parseLong(highWaterMarkValue));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException(
                    "Invalid " + HIGH_WATER_MARK_OPTION + " value: " + highWaterMarkValue, e);
        }
    }

    final String maxFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MAX_FILE_AGE),
            SparkUtil.getOrElse(getOption(MAX_AGE_OPTION), null));
    if (maxFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMaxFileAge(job, Long.parseLong(maxFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MAX_AGE_OPTION + " value: " + maxFileAge, e);
        }
    }

    final String minFileAge = SparkUtil.getOrElse(getOption(HighWaterMarkInputFormat.MIN_FILE_AGE),
            SparkUtil.getOrElse(getOption(MIN_AGE_OPTION), null));
    if (minFileAge != null) {
        try {
            HighWaterMarkInputFormat.setMinFileAge(job, Long.parseLong(minFileAge));
        } catch (final NumberFormatException e) {
            throw new KyloCatalogException("Invalid " + MIN_AGE_OPTION + " value: " + minFileAge, e);
        }
    }

    // Convert URIs to Paths
    final Path[] paths = new Path[uris.size()];

    for (int i = 0; i < uris.size(); ++i) {
        final Path path = new Path(uris.get(i));
        final FileSystem fs = path.getFileSystem(conf);
        paths[i] = path.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    }

    HighWaterMarkInputFormat.setInputPaths(job, paths);

    // Get high water mark paths
    final HighWaterMarkInputFormat inputFormat = new HighWaterMarkInputFormat();
    final List<FileStatus> files = inputFormat.listStatus(job);
    client.setHighWaterMarks(
            Collections.singletonMap(highWaterMarkName, Long.toString(inputFormat.getLastHighWaterMark())));

    // Return resolved paths
    final List<String> resolvedPaths = new ArrayList<>(files.size());

    if (files.isEmpty()) {
        resolvedPaths.add("file:/dev/null");
    } else {
        for (final FileStatus file : files) {
            resolvedPaths.add(file.getPath().toString());
        }
    }

    return resolvedPaths;
}

From source file:io.dstream.tez.utils.HdfsSerializerUtils.java

License:Apache License

/**
 * Will serialize object to HDFS returning its {@link Path}.
 *
 * @param source//  w ww  .j  av  a 2  s. c o  m
 * @param fs
 * @param targetPath
 * @return
 */
public static Path serialize(Object source, FileSystem fs, Path targetPath) {
    Assert.notNull(targetPath, "'targetPath' must not be null");
    Assert.notNull(fs, "'fs' must not be null");
    Assert.notNull(source, "'source' must not be null");

    Path resultPath = targetPath.makeQualified(fs.getUri(), fs.getWorkingDirectory());
    OutputStream targetOutputStream = null;
    try {
        targetOutputStream = fs.create(targetPath);
        SerializationUtils.serialize(source, targetOutputStream);
    } catch (Exception e) {
        throw new IllegalStateException("Failed to serialize " + source + " to " + resultPath, e);
    }
    return resultPath;
}

From source file:org.apache.accumulo.core.file.rfile.PrintInfo.java

License:Apache License

@Override
public void execute(final String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs(PrintInfo.class.getName(), args);
    if (opts.files.isEmpty()) {
        System.err.println("No files were given");
        System.exit(-1);/*w  w w .  j  ava  2s  .  c o m*/
    }

    Configuration conf = new Configuration();
    for (String confFile : opts.configFiles) {
        log.debug("Adding Hadoop configuration file " + confFile);
        conf.addResource(new Path(confFile));
    }

    FileSystem hadoopFs = FileSystem.get(conf);
    FileSystem localFs = FileSystem.getLocal(conf);

    LogHistogram kvHistogram = new LogHistogram();

    KeyStats dataKeyStats = new KeyStats();
    KeyStats indexKeyStats = new KeyStats();

    for (String arg : opts.files) {
        Path path = new Path(arg);
        FileSystem fs;
        if (arg.contains(":"))
            fs = path.getFileSystem(conf);
        else {
            log.warn("Attempting to find file across filesystems. Consider providing URI instead of path");
            fs = hadoopFs.exists(path) ? hadoopFs : localFs; // fall back to local
        }
        System.out.println(
                "Reading file: " + path.makeQualified(fs.getUri(), fs.getWorkingDirectory()).toString());

        CachableBlockFile.Reader _rdr = new CachableBlockFile.Reader(fs, path, conf, null, null,
                SiteConfiguration.getInstance(DefaultConfiguration.getInstance()));
        Reader iter = new RFile.Reader(_rdr);
        MetricsGatherer<Map<String, ArrayList<VisibilityMetric>>> vmg = new VisMetricsGatherer();

        if (opts.vis || opts.hash)
            iter.registerMetrics(vmg);

        iter.printInfo();
        System.out.println();
        org.apache.accumulo.core.file.rfile.bcfile.PrintInfo.main(new String[] { arg });

        Map<String, ArrayList<ByteSequence>> localityGroupCF = null;

        if (opts.histogram || opts.dump || opts.vis || opts.hash || opts.keyStats) {
            localityGroupCF = iter.getLocalityGroupCF();

            FileSKVIterator dataIter;
            if (opts.useSample) {
                dataIter = iter.getSample();

                if (dataIter == null) {
                    System.out.println("ERROR : This rfile has no sample data");
                    return;
                }
            } else {
                dataIter = iter;
            }

            if (opts.keyStats) {
                FileSKVIterator indexIter = iter.getIndex();
                while (indexIter.hasTop()) {
                    indexKeyStats.add(indexIter.getTopKey());
                    indexIter.next();
                }
            }

            for (Entry<String, ArrayList<ByteSequence>> cf : localityGroupCF.entrySet()) {

                dataIter.seek(new Range((Key) null, (Key) null), cf.getValue(), true);
                while (dataIter.hasTop()) {
                    Key key = dataIter.getTopKey();
                    Value value = dataIter.getTopValue();
                    if (opts.dump) {
                        System.out.println(key + " -> " + value);
                        if (System.out.checkError())
                            return;
                    }
                    if (opts.histogram) {
                        kvHistogram.add(key.getSize() + value.getSize());
                    }
                    if (opts.keyStats) {
                        dataKeyStats.add(key);
                    }
                    dataIter.next();
                }
            }
        }

        iter.close();

        if (opts.vis || opts.hash) {
            System.out.println();
            vmg.printMetrics(opts.hash, "Visibility", System.out);
        }

        if (opts.histogram) {
            System.out.println();
            kvHistogram.print("");
        }

        if (opts.keyStats) {
            System.out.println();
            System.out.println("Statistics for keys in data :");
            dataKeyStats.print("\t");
            System.out.println();
            System.out.println("Statistics for keys in index :");
            indexKeyStats.print("\t");
        }
        // If the output stream has closed, there is no reason to keep going.
        if (System.out.checkError())
            return;
    }
}

From source file:org.apache.accumulo.server.util.TabletOperations.java

License:Apache License

public static String createTabletDirectory(VolumeManager fs, String tableId, Text endRow) {
    String lowDirectory;/*from  w w  w  . j a  va2 s. c  o  m*/

    UniqueNameAllocator namer = UniqueNameAllocator.getInstance();
    String volume = fs.choose(ServerConstants.getTablesDirs());

    while (true) {
        try {
            if (endRow == null) {
                lowDirectory = Constants.DEFAULT_TABLET_LOCATION;
                Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory);
                if (fs.exists(lowDirectoryPath) || fs.mkdirs(lowDirectoryPath)) {
                    FileSystem pathFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem();
                    return lowDirectoryPath.makeQualified(pathFs.getUri(), pathFs.getWorkingDirectory())
                            .toString();
                }
                log.warn("Failed to create " + lowDirectoryPath + " for unknown reason");
            } else {
                lowDirectory = "/" + Constants.GENERATED_TABLET_DIRECTORY_PREFIX + namer.getNextName();
                Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory);
                if (fs.exists(lowDirectoryPath))
                    throw new IllegalStateException("Dir exist when it should not " + lowDirectoryPath);
                if (fs.mkdirs(lowDirectoryPath)) {
                    FileSystem lowDirectoryFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem();
                    return lowDirectoryPath
                            .makeQualified(lowDirectoryFs.getUri(), lowDirectoryFs.getWorkingDirectory())
                            .toString();
                }
            }
        } catch (IOException e) {
            log.warn(e);
        }

        log.warn("Failed to create dir for tablet in table " + tableId + " in volume " + volume
                + " + will retry ...");
        UtilWaitThread.sleep(3000);

    }
}

From source file:org.apache.accumulo.tserver.tablet.Tablet.java

License:Apache License

private static String createTabletDirectory(VolumeManager fs, String tableId, Text endRow) {
    String lowDirectory;//from w  w w  . j av a 2s.  c  o m

    UniqueNameAllocator namer = UniqueNameAllocator.getInstance();
    String volume = fs.choose(Optional.of(tableId), ServerConstants.getBaseUris()) + Constants.HDFS_TABLES_DIR
            + Path.SEPARATOR;

    while (true) {
        try {
            if (endRow == null) {
                lowDirectory = Constants.DEFAULT_TABLET_LOCATION;
                Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory);
                if (fs.exists(lowDirectoryPath) || fs.mkdirs(lowDirectoryPath)) {
                    FileSystem pathFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem();
                    return lowDirectoryPath.makeQualified(pathFs.getUri(), pathFs.getWorkingDirectory())
                            .toString();
                }
                log.warn("Failed to create " + lowDirectoryPath + " for unknown reason");
            } else {
                lowDirectory = "/" + Constants.GENERATED_TABLET_DIRECTORY_PREFIX + namer.getNextName();
                Path lowDirectoryPath = new Path(volume + "/" + tableId + "/" + lowDirectory);
                if (fs.exists(lowDirectoryPath))
                    throw new IllegalStateException("Dir exist when it should not " + lowDirectoryPath);
                if (fs.mkdirs(lowDirectoryPath)) {
                    FileSystem lowDirectoryFs = fs.getVolumeByPath(lowDirectoryPath).getFileSystem();
                    return lowDirectoryPath
                            .makeQualified(lowDirectoryFs.getUri(), lowDirectoryFs.getWorkingDirectory())
                            .toString();
                }
            }
        } catch (IOException e) {
            log.warn(e);
        }

        log.warn("Failed to create dir for tablet in table " + tableId + " in volume " + volume
                + " + will retry ...");
        sleepUninterruptibly(3, TimeUnit.SECONDS);

    }
}

From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java

License:Apache License

/**
 * Adds all the jars in the same path as the blur jar files.
 * /*from  ww  w  . ja v  a2s . c  om*/
 * @param conf
 * @throws IOException
 */
public static void addAllJarsInBlurLib(Configuration conf) throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    jars.addAll(conf.getStringCollection("tmpjars"));

    String property = System.getProperty("java.class.path");
    String[] files = property.split("\\:");

    String blurLibPath = getPath("blur-", files);
    if (blurLibPath == null) {
        return;
    }
    List<String> pathes = getPathes(blurLibPath, files);
    for (String pathStr : pathes) {
        Path path = new Path(pathStr);
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path);
            continue;
        }
        jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString());
    }
    if (jars.isEmpty()) {
        return;
    }
    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}

From source file:org.apache.blur.mapreduce.lib.BlurMapReduceUtil.java

License:Apache License

/**
 * Add the jars containing the given classes to the job's configuration such
 * that JobClient will ship them to the cluster and add them to the
 * DistributedCache.// w ww .  java 2  s  . c  o  m
 */
public static void addDependencyJars(Configuration conf, Class<?>... classes) throws IOException {
    FileSystem localFs = FileSystem.getLocal(conf);
    Set<String> jars = new HashSet<String>();
    // Add jars that are already in the tmpjars variable
    jars.addAll(conf.getStringCollection("tmpjars"));

    // Add jars containing the specified classes
    for (Class<?> clazz : classes) {
        if (clazz == null) {
            continue;
        }

        String pathStr = findOrCreateJar(clazz);
        if (pathStr == null) {
            LOG.warn("Could not find jar for class " + clazz + " in order to ship it to the cluster.");
            continue;
        }
        Path path = new Path(pathStr);
        if (!localFs.exists(path)) {
            LOG.warn("Could not validate jar file " + path + " for class " + clazz);
            continue;
        }
        jars.add(path.makeQualified(localFs.getUri(), localFs.getWorkingDirectory()).toString());
    }
    if (jars.isEmpty()) {
        return;
    }

    conf.set("tmpjars", StringUtils.arrayToString(jars.toArray(new String[0])));
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Configuration configuration = context.getConfiguration();
    _autoGenerateRecordIdAsHashOfData = isAutoGenerateRecordIdAsHashOfData(configuration);
    _autoGenerateRowIdAsHashOfData = isAutoGenerateRowIdAsHashOfData(configuration);
    if (_autoGenerateRecordIdAsHashOfData || _autoGenerateRowIdAsHashOfData) {
        try {/*w  w  w .  j  a  v a  2 s  .  c  o  m*/
            _digest = MessageDigest.getInstance("MD5");
        } catch (NoSuchAlgorithmException e) {
            throw new IOException(e);
        }
    }
    _columnNameMap = getFamilyAndColumnNameMap(configuration);
    _separator = new String(Base64.decodeBase64(configuration.get(BLUR_CSV_SEPARATOR_BASE64, _separator)),
            UTF_8);
    _splitter = Splitter.on(_separator);
    Path fileCurrentlyProcessing = getCurrentFile(context);
    Collection<String> families = configuration.getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILIES);
    OUTER: for (String family : families) {
        Collection<String> pathStrCollection = configuration
                .getStringCollection(BLUR_CSV_FAMILY_PATH_MAPPINGS_FAMILY_PREFIX + family);
        for (String pathStr : pathStrCollection) {
            Path path = new Path(pathStr);
            FileSystem fileSystem = path.getFileSystem(configuration);
            path = path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
            if (isParent(path, fileCurrentlyProcessing)) {
                _familyFromPath = family;
                _familyNotInFile = true;
                break OUTER;
            }
        }
    }
}

From source file:org.apache.blur.mapreduce.lib.CsvBlurMapper.java

License:Apache License

protected Path getCurrentFile(Context context) throws IOException {
    InputSplit split = context.getInputSplit();
    if (split != null && split instanceof FileSplit) {
        FileSplit inputSplit = (FileSplit) split;
        Path path = inputSplit.getPath();
        FileSystem fileSystem = path.getFileSystem(context.getConfiguration());
        return path.makeQualified(fileSystem.getUri(), fileSystem.getWorkingDirectory());
    }/*from   www  . j  a v  a 2  s  .  c o  m*/
    return null;
}

From source file:org.apache.falcon.entity.FileSystemStorage.java

License:Apache License

public String getUriTemplate(LocationType locationType, List<Location> locationList) {
    Location locationForType = null;
    for (Location location : locationList) {
        if (location.getType() == locationType) {
            locationForType = location;//from w ww  .  ja v a 2  s. c  om
            break;
        }
    }

    if (locationForType == null || StringUtils.isEmpty(locationForType.getPath())) {
        return null;
    }

    // normalize the path so trailing and double '/' are removed
    Path locationPath = new Path(locationForType.getPath());
    locationPath = locationPath.makeQualified(getDefaultUri(), getWorkingDir());

    if (isRelativePath(locationPath)) {
        locationPath = new Path(storageUrl + locationPath);
    }

    return locationPath.toString();
}