Example usage for org.apache.hadoop.fs Path toUri

List of usage examples for org.apache.hadoop.fs Path toUri

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path toUri.

Prototype

public URI toUri() 

Source Link

Document

Convert this Path to a URI.

Usage

From source file:co.cask.cdap.data2.util.hbase.CoprocessorUtil.java

License:Apache License

/**
 * Returns information for all coprocessor configured for the table.
 *
 * @return a Map from coprocessor class name to {@link CoprocessorDescriptor}
 *///from   www .  j ava2  s .  c  o  m
public static Map<String, CoprocessorDescriptor> getCoprocessors(HTableDescriptor tableDescriptor) {
    Map<String, CoprocessorDescriptor> info = Maps.newHashMap();

    // Extract information about existing data janitor coprocessor
    // The following logic is copied from RegionCoprocessorHost in HBase
    for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> entry : tableDescriptor.getValues()
            .entrySet()) {
        String key = Bytes.toString(entry.getKey().get()).trim();
        String spec = Bytes.toString(entry.getValue().get()).trim();

        if (!HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(key).matches()) {
            continue;
        }

        try {
            Matcher matcher = HConstants.CP_HTD_ATTR_VALUE_PATTERN.matcher(spec);
            if (!matcher.matches()) {
                continue;
            }

            String className = matcher.group(2).trim();
            Path path = matcher.group(1).trim().isEmpty() ? null : new Path(matcher.group(1).trim());
            int priority = matcher.group(3).trim().isEmpty() ? Coprocessor.PRIORITY_USER
                    : Integer.valueOf(matcher.group(3));
            String cfgSpec = null;
            try {
                cfgSpec = matcher.group(4);
            } catch (IndexOutOfBoundsException ex) {
                // ignore
            }

            Map<String, String> properties = Maps.newHashMap();
            if (cfgSpec != null) {
                cfgSpec = cfgSpec.substring(cfgSpec.indexOf('|') + 1);
                // do an explicit deep copy of the passed configuration
                Matcher m = HConstants.CP_HTD_ATTR_VALUE_PARAM_PATTERN.matcher(cfgSpec);
                while (m.find()) {
                    properties.put(m.group(1), m.group(2));
                }
            }
            String pathStr = path == null ? null : path.toUri().getPath();
            info.put(className, new CoprocessorDescriptor(className, pathStr, priority, properties));
        } catch (Exception ex) {
            LOG.warn("Coprocessor attribute '{}' has invalid coprocessor specification '{}'", key, spec, ex);
        }
    }

    return info;
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java

License:Apache License

/**
 * given two paths as input:/*from www .jav  a  2  s. c  o m*/
 *    base: /my/base/path
 *    file: /my/base/path/some/other/file
 * return "some/other/file"
 */
private String getRelative(Path base, Path file) {
    return base.toUri().relativize(file.toUri()).getPath();
}

From source file:co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java

License:Apache License

/**
 * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting.
 *
 * @param hConf the job configuration//from w  ww.j a va  2s.c o m
 * @return the framework URI or {@code null} if not present or if the URI in the config is invalid.
 */
@Nullable
public static URI getFrameworkURI(Configuration hConf) {
    String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
    if (framework == null) {
        return null;
    }

    try {
        // Parse the path. It can contains '#' to represent the localized file name
        URI uri = new URI(framework);
        String linkName = uri.getFragment();

        // The following resolution logic is copied from JobSubmitter in MR.
        FileSystem fs = FileSystem.get(hConf);
        Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath()));
        FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf);
        frameworkPath = fc.resolvePath(frameworkPath);
        uri = frameworkPath.toUri();

        // If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name
        if (linkName == null) {
            linkName = uri.getPath();
            int idx = linkName.lastIndexOf('/');
            if (idx >= 0) {
                linkName = linkName.substring(idx + 1);
            }
        }
        return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName);
    } catch (URISyntaxException e) {
        LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.",
                framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    } catch (IOException e) {
        LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.",
                framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e);
    }
    return null;
}

From source file:co.cask.hydrator.plugin.batch.CopybookRecordReader.java

License:Apache License

@Override
public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
    // Get configuration
    Configuration conf = context.getConfiguration();
    int fileStructure = net.sf.JRecord.Common.Constants.IO_FIXED_LENGTH;
    Path path = new Path(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH));
    FileSystem fs = FileSystem.get(path.toUri(), conf);
    // Create input stream for the COBOL copybook contents
    InputStream inputStream = IOUtils
            .toInputStream(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_CBL_CONTENTS), "UTF-8");
    BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream);
    try {/*from  w w  w .ja v a2 s .c o  m*/
        externalRecord = CopybookIOUtils.getExternalRecord(bufferedInputStream);
        recordByteLength = CopybookIOUtils.getRecordLength(externalRecord, fileStructure);

        LineProvider lineProvider = LineIOProvider.getInstance().getLineProvider(fileStructure,
                CopybookIOUtils.FONT);
        reader = LineIOProvider.getInstance().getLineReader(fileStructure, lineProvider);
        LayoutDetail copybook = CopybookIOUtils.getLayoutDetail(externalRecord);

        org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split;

        start = fileSplit.getStart();
        end = start + fileSplit.getLength();

        BufferedInputStream fileIn = new BufferedInputStream(fs.open(fileSplit.getPath()));
        // Jump to the point in the split at which the first complete record of the split starts,
        // if not the first InputSplit
        if (start != 0) {
            position = start - (start % recordByteLength) + recordByteLength;
            fileIn.skip(position);
        }
        reader.open(fileIn, copybook);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:co.cask.hydrator.plugin.batch.source.ExcelReaderRegexFilter.java

License:Apache License

@Override
public boolean accept(Path path) {
    try {/*  ww w . j  ava2  s .  c o m*/
        fs = FileSystem.get(path.toUri(), conf);
        if (fs.isDirectory(path)) {
            return true;
        }

        boolean patternMatch = true;
        Matcher matcher = pattern.matcher(path.toString());
        patternMatch = matcher.find();
        if (patternMatch && !conf.getBoolean(RE_PROCESS, false)
                && CollectionUtils.isNotEmpty(preProcessedFileList)) {
            patternMatch = !preProcessedFileList.contains(path.toString());
        }

        return patternMatch;
    } catch (IOException e) {
        return false;
    }
}

From source file:co.cask.hydrator.plugin.HDFSSinkTest.java

License:Apache License

@Test
public void testHDFSSink() throws Exception {
    String inputDatasetName = "input-hdfssinktest";
    ETLStage source = new ETLStage("source", MockSource.getPlugin(inputDatasetName));

    Path outputDir = dfsCluster.getFileSystem().getHomeDirectory();
    ETLStage sink = new ETLStage("HDFS",
            new ETLPlugin("HDFS", BatchSink.PLUGIN_TYPE,
                    ImmutableMap.<String, String>builder().put("path", outputDir.toUri().toString())
                            .put(Constants.Reference.REFERENCE_NAME, "HDFSinkTest").build(),
                    null));//from www  .ja va 2 s. c om
    ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink)
            .addConnection(source.getName(), sink.getName()).build();

    AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig);
    Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "HDFSTest");
    ApplicationManager appManager = deployApplication(appId, appRequest);

    DataSetManager<Table> inputManager = getDataset(inputDatasetName);
    List<StructuredRecord> input = ImmutableList.of(
            StructuredRecord.builder(SCHEMA).set("ticker", "AAPL").set("num", 10).set("price", 400.23).build(),
            StructuredRecord.builder(SCHEMA).set("ticker", "CDAP").set("num", 13).set("price", 123.23).build());
    MockSource.writeInput(inputManager, input);

    MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME);
    mrManager.start();
    mrManager.waitForFinish(5, TimeUnit.MINUTES);

    Path[] outputFiles = FileUtil.stat2Paths(
            dfsCluster.getFileSystem().listStatus(outputDir, new Utils.OutputFileUtils.OutputFilesFilter()));
    Assert.assertNotNull(outputFiles);
    Assert.assertTrue(outputFiles.length > 0);
    int count = 0;
    List<String> lines = new ArrayList<>();
    for (Path path : outputFiles) {
        InputStream in = dfsCluster.getFileSystem().open(path);
        BufferedReader reader = new BufferedReader(new InputStreamReader(in));
        String line;
        while ((line = reader.readLine()) != null) {
            lines.add(line);
            if (line.contains("AAPL") || line.contains("CDAP")) {
                count++;
            }
        }
        reader.close();
    }
    Assert.assertEquals(2, lines.size());
    Assert.assertEquals(2, count);
}

From source file:com.addthis.hydra.task.output.HDFSOutputWrapperFactory.java

License:Apache License

/**
 * Opens a write stream for an HDFS output. Most of the complexity in this
 * method is related to determining the correct file name based on the given
 * {@code target} parameter.  If the file already exists and we are appending
 * to an existing file then we will rename that file and open up a new stream which
 * will append data to  that file.  If the file does not exist a new file is created
 * with a .tmp extension.  When the stream is closed the file will be renamed to remove
 * the .tmp extension/*from  w w w.  j a va2 s.  c o m*/
 *
 * @param target - the base file name of the target output stream
 * @param outputFlags - output flags setting various options about the output stream
 * @param streamEmitter - the emitter that can convert bundles into the desired byte arrays for output
 * @return a OutputWrapper which can be used to write bytes to the new stream
 * @throws IOException propagated from underlying components
 */
@Override
public OutputWrapper openWriteStream(String target, OutputStreamFlags outputFlags,
        OutputStreamEmitter streamEmitter) throws IOException {
    log.debug("[open] {}target={} hdfs", outputFlags, target);
    String modifiedTarget = getModifiedTarget(target, outputFlags);
    Path targetPath = new Path(dir, modifiedTarget);
    Path targetPathTmp = new Path(dir, modifiedTarget.concat(".tmp"));
    boolean exists = fileSystem.exists(targetPath);
    FSDataOutputStream outputStream;
    if (exists) {
        log.debug("[open.append]{}/ renaming to {}/{}", targetPath, targetPathTmp,
                fileSystem.exists(targetPathTmp));
        if (!fileSystem.rename(targetPath, targetPathTmp)) {
            throw new IOException("Unable to rename " + targetPath.toUri() + " to " + targetPathTmp.toUri());
        }
        outputStream = fileSystem.append(targetPathTmp);
    } else {
        outputStream = fileSystem.create(targetPathTmp, false);
    }
    OutputStream wrappedStream = wrapOutputStream(outputFlags, exists, outputStream);
    return new HDFSOutputWrapper(wrappedStream, streamEmitter, outputFlags.isCompress(),
            outputFlags.getCompressType(), target, targetPath, targetPathTmp, fileSystem);
}

From source file:com.alexholmes.hdfsslurper.Configurator.java

License:Apache License

public static void checkScheme(Path p, ConfigNames config) throws ConfigSettingException {
    if (StringUtils.isBlank(p.toUri().getScheme())) {
        throw new ConfigSettingException("The " + config.name() + " scheme cannot be null."
                + " An example of a valid scheme is 'hdfs://localhost:8020/tmp' or 'file:/tmp'");
    }//from w w w.  j  a v  a  2s  .  c o  m
}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private void process(FileStatus srcFileStatus) throws IOException, InterruptedException {

    Path stagingFile = null;// w w w.ja va2 s  .  co m
    FileSystem destFs = null;
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();

    try {
        FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig());

        // run a script which can change the name of the file as well as
        // write out a new version of the file
        //
        if (config.getWorkScript() != null) {
            Path newSrcFile = stageSource(srcFileStatus);
            srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile);
        }

        Path srcFile = srcFileStatus.getPath();

        // get the target HDFS file
        //
        Path destFile = getHdfsTargetPath(srcFileStatus);

        if (config.getCodec() != null) {
            String ext = config.getCodec().getDefaultExtension();
            if (!destFile.getName().endsWith(ext)) {
                destFile = new Path(destFile.toString() + ext);
            }
        }

        destFs = destFile.getFileSystem(config.getConfig());

        // get the staging HDFS file
        //
        stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile);
        String batchId = srcFile.toString().substring(
                srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length());

        log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'"
                + "$batchId#" + batchId);

        // if the directory of the target file doesn't exist, attempt to
        // create it
        //
        Path destParentDir = destFile.getParent();
        if (!destFs.exists(destParentDir)) {
            log.info("event#Attempting creation of target directory: " + destParentDir.toUri());
            if (!destFs.mkdirs(destParentDir)) {
                throw new IOException("event#Failed to create target directory: " + destParentDir.toUri());
            }
        }

        // if the staging directory doesn't exist, attempt to create it
        //
        Path destStagingParentDir = stagingFile.getParent();
        if (!destFs.exists(destStagingParentDir)) {
            log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri());
            if (!destFs.mkdirs(destStagingParentDir)) {
                throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri());
            }
        }

        // copy the file
        //
        InputStream is = null;
        OutputStream os = null;
        CRC32 crc = new CRC32();
        try {
            is = new BufferedInputStream(srcFs.open(srcFile));
            if (config.isVerify()) {
                is = new CheckedInputStream(is, crc);
            }
            os = destFs.create(stagingFile);

            if (config.getCodec() != null) {
                os = config.getCodec().createOutputStream(os);
            }

            IOUtils.copyBytes(is, os, 4096, false);
        } finally {
            IOUtils.closeStream(is);
            IOUtils.closeStream(os);
        }

        long srcFileSize = srcFs.getFileStatus(srcFile).getLen();
        long destFileSize = destFs.getFileStatus(stagingFile).getLen();
        if (config.getCodec() == null && srcFileSize != destFileSize) {
            throw new IOException(
                    "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize);
        }

        log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#"
                + batchId);

        if (config.isVerify()) {
            verify(stagingFile, crc.getValue());
        }

        if (destFs.exists(destFile)) {
            destFs.delete(destFile, false);
        }

        log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'"
                + "$batchId#" + batchId);
        if (!destFs.rename(stagingFile, destFile)) {
            throw new IOException("event#Failed to rename file");
        }

        if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) {
            Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX);
            if (destFs.exists(lzoIndexPath)) {
                log.info("event#Deleting index file as it already exists");
                destFs.delete(lzoIndexPath, false);
            }
            indexer.index(destFile);
        }

        fileSystemManager.fileCopyComplete(srcFileStatus);

    } catch (Throwable t) {
        log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t);

        // delete the staging file if it still exists
        //
        try {
            if (destFs != null && destFs.exists(stagingFile)) {
                destFs.delete(stagingFile, false);
            }
        } catch (Throwable t2) {
            log.error("event#Failed to delete staging file " + stagingFile, t2);
        }

        fileSystemManager.fileCopyError(srcFileStatus);
    }

}

From source file:com.alexholmes.hdfsslurper.WorkerThread.java

License:Apache License

private Path stageSource(FileStatus srcFile) throws IOException {
    String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter();
    Path p = new Path(ScriptExecutor.getStdOutFromScript(config.getWorkScript(), srcFile.getPath().toString(),
            60, TimeUnit.SECONDS, config.getFileNameBatchIdDelimiter()));
    String batchId = p.toString().substring(p.toString().lastIndexOf(filenameBatchidDelimiter) + 1,
            p.toString().length());//w  w w .  j a  va2 s . c  o  m
    if (p.toUri().getScheme() == null) {
        throw new IOException(
                "event#Work path from script must be a URI with a scheme: '" + p + "'" + "$batchId#" + batchId);
    }
    log.info("event#Staging script returned new file '" + p + " for old " + srcFile.getPath() + "$batchId#"
            + batchId);
    return p;
}