List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:co.cask.cdap.data2.util.hbase.CoprocessorUtil.java
License:Apache License
/** * Returns information for all coprocessor configured for the table. * * @return a Map from coprocessor class name to {@link CoprocessorDescriptor} *///from www . j ava2 s . c o m public static Map<String, CoprocessorDescriptor> getCoprocessors(HTableDescriptor tableDescriptor) { Map<String, CoprocessorDescriptor> info = Maps.newHashMap(); // Extract information about existing data janitor coprocessor // The following logic is copied from RegionCoprocessorHost in HBase for (Map.Entry<ImmutableBytesWritable, ImmutableBytesWritable> entry : tableDescriptor.getValues() .entrySet()) { String key = Bytes.toString(entry.getKey().get()).trim(); String spec = Bytes.toString(entry.getValue().get()).trim(); if (!HConstants.CP_HTD_ATTR_KEY_PATTERN.matcher(key).matches()) { continue; } try { Matcher matcher = HConstants.CP_HTD_ATTR_VALUE_PATTERN.matcher(spec); if (!matcher.matches()) { continue; } String className = matcher.group(2).trim(); Path path = matcher.group(1).trim().isEmpty() ? null : new Path(matcher.group(1).trim()); int priority = matcher.group(3).trim().isEmpty() ? Coprocessor.PRIORITY_USER : Integer.valueOf(matcher.group(3)); String cfgSpec = null; try { cfgSpec = matcher.group(4); } catch (IndexOutOfBoundsException ex) { // ignore } Map<String, String> properties = Maps.newHashMap(); if (cfgSpec != null) { cfgSpec = cfgSpec.substring(cfgSpec.indexOf('|') + 1); // do an explicit deep copy of the passed configuration Matcher m = HConstants.CP_HTD_ATTR_VALUE_PARAM_PATTERN.matcher(cfgSpec); while (m.find()) { properties.put(m.group(1), m.group(2)); } } String pathStr = path == null ? null : path.toUri().getPath(); info.put(className, new CoprocessorDescriptor(className, pathStr, priority, properties)); } catch (Exception ex) { LOG.warn("Coprocessor attribute '{}' has invalid coprocessor specification '{}'", key, spec, ex); } } return info; }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.partitioned.DynamicPartitioningOutputCommitter.java
License:Apache License
/** * given two paths as input:/*from www .jav a 2 s. c o m*/ * base: /my/base/path * file: /my/base/path/some/other/file * return "some/other/file" */ private String getRelative(Path base, Path file) { return base.toUri().relativize(file.toUri()).getPath(); }
From source file:co.cask.cdap.internal.app.runtime.batch.distributed.MapReduceContainerHelper.java
License:Apache License
/** * Gets the MapReduce framework URI based on the {@code mapreduce.application.framework.path} setting. * * @param hConf the job configuration//from w ww.j a va 2s.c o m * @return the framework URI or {@code null} if not present or if the URI in the config is invalid. */ @Nullable public static URI getFrameworkURI(Configuration hConf) { String framework = hConf.get(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH); if (framework == null) { return null; } try { // Parse the path. It can contains '#' to represent the localized file name URI uri = new URI(framework); String linkName = uri.getFragment(); // The following resolution logic is copied from JobSubmitter in MR. FileSystem fs = FileSystem.get(hConf); Path frameworkPath = fs.makeQualified(new Path(uri.getScheme(), uri.getAuthority(), uri.getPath())); FileContext fc = FileContext.getFileContext(frameworkPath.toUri(), hConf); frameworkPath = fc.resolvePath(frameworkPath); uri = frameworkPath.toUri(); // If doesn't have localized name (in the URI fragment), then use the last part of the URI path as name if (linkName == null) { linkName = uri.getPath(); int idx = linkName.lastIndexOf('/'); if (idx >= 0) { linkName = linkName.substring(idx + 1); } } return new URI(uri.getScheme(), uri.getAuthority(), uri.getPath(), null, linkName); } catch (URISyntaxException e) { LOG.warn("Failed to parse {} as a URI. MapReduce framework path is not used. Check the setting for {}.", framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); } catch (IOException e) { LOG.warn("Failed to resolve {} URI. MapReduce framework path is not used. Check the setting for {}.", framework, MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH, e); } return null; }
From source file:co.cask.hydrator.plugin.batch.CopybookRecordReader.java
License:Apache License
@Override public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException { // Get configuration Configuration conf = context.getConfiguration(); int fileStructure = net.sf.JRecord.Common.Constants.IO_FIXED_LENGTH; Path path = new Path(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_DATA_HDFS_PATH)); FileSystem fs = FileSystem.get(path.toUri(), conf); // Create input stream for the COBOL copybook contents InputStream inputStream = IOUtils .toInputStream(conf.get(CopybookInputFormat.COPYBOOK_INPUTFORMAT_CBL_CONTENTS), "UTF-8"); BufferedInputStream bufferedInputStream = new BufferedInputStream(inputStream); try {/*from w w w .ja v a2 s .c o m*/ externalRecord = CopybookIOUtils.getExternalRecord(bufferedInputStream); recordByteLength = CopybookIOUtils.getRecordLength(externalRecord, fileStructure); LineProvider lineProvider = LineIOProvider.getInstance().getLineProvider(fileStructure, CopybookIOUtils.FONT); reader = LineIOProvider.getInstance().getLineReader(fileStructure, lineProvider); LayoutDetail copybook = CopybookIOUtils.getLayoutDetail(externalRecord); org.apache.hadoop.mapreduce.lib.input.FileSplit fileSplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) split; start = fileSplit.getStart(); end = start + fileSplit.getLength(); BufferedInputStream fileIn = new BufferedInputStream(fs.open(fileSplit.getPath())); // Jump to the point in the split at which the first complete record of the split starts, // if not the first InputSplit if (start != 0) { position = start - (start % recordByteLength) + recordByteLength; fileIn.skip(position); } reader.open(fileIn, copybook); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:co.cask.hydrator.plugin.batch.source.ExcelReaderRegexFilter.java
License:Apache License
@Override public boolean accept(Path path) { try {/* ww w . j ava2 s . c o m*/ fs = FileSystem.get(path.toUri(), conf); if (fs.isDirectory(path)) { return true; } boolean patternMatch = true; Matcher matcher = pattern.matcher(path.toString()); patternMatch = matcher.find(); if (patternMatch && !conf.getBoolean(RE_PROCESS, false) && CollectionUtils.isNotEmpty(preProcessedFileList)) { patternMatch = !preProcessedFileList.contains(path.toString()); } return patternMatch; } catch (IOException e) { return false; } }
From source file:co.cask.hydrator.plugin.HDFSSinkTest.java
License:Apache License
@Test public void testHDFSSink() throws Exception { String inputDatasetName = "input-hdfssinktest"; ETLStage source = new ETLStage("source", MockSource.getPlugin(inputDatasetName)); Path outputDir = dfsCluster.getFileSystem().getHomeDirectory(); ETLStage sink = new ETLStage("HDFS", new ETLPlugin("HDFS", BatchSink.PLUGIN_TYPE, ImmutableMap.<String, String>builder().put("path", outputDir.toUri().toString()) .put(Constants.Reference.REFERENCE_NAME, "HDFSinkTest").build(), null));//from www .ja va 2 s. c om ETLBatchConfig etlConfig = ETLBatchConfig.builder("* * * * *").addStage(source).addStage(sink) .addConnection(source.getName(), sink.getName()).build(); AppRequest<ETLBatchConfig> appRequest = new AppRequest<>(ETLBATCH_ARTIFACT, etlConfig); Id.Application appId = Id.Application.from(Id.Namespace.DEFAULT, "HDFSTest"); ApplicationManager appManager = deployApplication(appId, appRequest); DataSetManager<Table> inputManager = getDataset(inputDatasetName); List<StructuredRecord> input = ImmutableList.of( StructuredRecord.builder(SCHEMA).set("ticker", "AAPL").set("num", 10).set("price", 400.23).build(), StructuredRecord.builder(SCHEMA).set("ticker", "CDAP").set("num", 13).set("price", 123.23).build()); MockSource.writeInput(inputManager, input); MapReduceManager mrManager = appManager.getMapReduceManager(ETLMapReduce.NAME); mrManager.start(); mrManager.waitForFinish(5, TimeUnit.MINUTES); Path[] outputFiles = FileUtil.stat2Paths( dfsCluster.getFileSystem().listStatus(outputDir, new Utils.OutputFileUtils.OutputFilesFilter())); Assert.assertNotNull(outputFiles); Assert.assertTrue(outputFiles.length > 0); int count = 0; List<String> lines = new ArrayList<>(); for (Path path : outputFiles) { InputStream in = dfsCluster.getFileSystem().open(path); BufferedReader reader = new BufferedReader(new InputStreamReader(in)); String line; while ((line = reader.readLine()) != null) { lines.add(line); if (line.contains("AAPL") || line.contains("CDAP")) { count++; } } reader.close(); } Assert.assertEquals(2, lines.size()); Assert.assertEquals(2, count); }
From source file:com.addthis.hydra.task.output.HDFSOutputWrapperFactory.java
License:Apache License
/** * Opens a write stream for an HDFS output. Most of the complexity in this * method is related to determining the correct file name based on the given * {@code target} parameter. If the file already exists and we are appending * to an existing file then we will rename that file and open up a new stream which * will append data to that file. If the file does not exist a new file is created * with a .tmp extension. When the stream is closed the file will be renamed to remove * the .tmp extension/*from w w w. j a va2 s. c o m*/ * * @param target - the base file name of the target output stream * @param outputFlags - output flags setting various options about the output stream * @param streamEmitter - the emitter that can convert bundles into the desired byte arrays for output * @return a OutputWrapper which can be used to write bytes to the new stream * @throws IOException propagated from underlying components */ @Override public OutputWrapper openWriteStream(String target, OutputStreamFlags outputFlags, OutputStreamEmitter streamEmitter) throws IOException { log.debug("[open] {}target={} hdfs", outputFlags, target); String modifiedTarget = getModifiedTarget(target, outputFlags); Path targetPath = new Path(dir, modifiedTarget); Path targetPathTmp = new Path(dir, modifiedTarget.concat(".tmp")); boolean exists = fileSystem.exists(targetPath); FSDataOutputStream outputStream; if (exists) { log.debug("[open.append]{}/ renaming to {}/{}", targetPath, targetPathTmp, fileSystem.exists(targetPathTmp)); if (!fileSystem.rename(targetPath, targetPathTmp)) { throw new IOException("Unable to rename " + targetPath.toUri() + " to " + targetPathTmp.toUri()); } outputStream = fileSystem.append(targetPathTmp); } else { outputStream = fileSystem.create(targetPathTmp, false); } OutputStream wrappedStream = wrapOutputStream(outputFlags, exists, outputStream); return new HDFSOutputWrapper(wrappedStream, streamEmitter, outputFlags.isCompress(), outputFlags.getCompressType(), target, targetPath, targetPathTmp, fileSystem); }
From source file:com.alexholmes.hdfsslurper.Configurator.java
License:Apache License
public static void checkScheme(Path p, ConfigNames config) throws ConfigSettingException { if (StringUtils.isBlank(p.toUri().getScheme())) { throw new ConfigSettingException("The " + config.name() + " scheme cannot be null." + " An example of a valid scheme is 'hdfs://localhost:8020/tmp' or 'file:/tmp'"); }//from w w w. j a v a 2s . c o m }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private void process(FileStatus srcFileStatus) throws IOException, InterruptedException { Path stagingFile = null;// w w w.ja va2 s . co m FileSystem destFs = null; String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); try { FileSystem srcFs = srcFileStatus.getPath().getFileSystem(config.getConfig()); // run a script which can change the name of the file as well as // write out a new version of the file // if (config.getWorkScript() != null) { Path newSrcFile = stageSource(srcFileStatus); srcFileStatus = srcFileStatus.getPath().getFileSystem(config.getConfig()).getFileStatus(newSrcFile); } Path srcFile = srcFileStatus.getPath(); // get the target HDFS file // Path destFile = getHdfsTargetPath(srcFileStatus); if (config.getCodec() != null) { String ext = config.getCodec().getDefaultExtension(); if (!destFile.getName().endsWith(ext)) { destFile = new Path(destFile.toString() + ext); } } destFs = destFile.getFileSystem(config.getConfig()); // get the staging HDFS file // stagingFile = fileSystemManager.getStagingFile(srcFileStatus, destFile); String batchId = srcFile.toString().substring( srcFile.toString().lastIndexOf(filenameBatchidDelimiter) + 1, srcFile.toString().length()); log.info("event#Copying source file '" + srcFile + "' to staging destination '" + stagingFile + "'" + "$batchId#" + batchId); // if the directory of the target file doesn't exist, attempt to // create it // Path destParentDir = destFile.getParent(); if (!destFs.exists(destParentDir)) { log.info("event#Attempting creation of target directory: " + destParentDir.toUri()); if (!destFs.mkdirs(destParentDir)) { throw new IOException("event#Failed to create target directory: " + destParentDir.toUri()); } } // if the staging directory doesn't exist, attempt to create it // Path destStagingParentDir = stagingFile.getParent(); if (!destFs.exists(destStagingParentDir)) { log.info("event#Attempting creation of staging directory: " + destStagingParentDir.toUri()); if (!destFs.mkdirs(destStagingParentDir)) { throw new IOException("event#Failed to create staging directory: " + destParentDir.toUri()); } } // copy the file // InputStream is = null; OutputStream os = null; CRC32 crc = new CRC32(); try { is = new BufferedInputStream(srcFs.open(srcFile)); if (config.isVerify()) { is = new CheckedInputStream(is, crc); } os = destFs.create(stagingFile); if (config.getCodec() != null) { os = config.getCodec().createOutputStream(os); } IOUtils.copyBytes(is, os, 4096, false); } finally { IOUtils.closeStream(is); IOUtils.closeStream(os); } long srcFileSize = srcFs.getFileStatus(srcFile).getLen(); long destFileSize = destFs.getFileStatus(stagingFile).getLen(); if (config.getCodec() == null && srcFileSize != destFileSize) { throw new IOException( "event#File sizes don't match, source = " + srcFileSize + ", dest = " + destFileSize); } log.info("event#Local file size = " + srcFileSize + ", HDFS file size = " + destFileSize + "$batchId#" + batchId); if (config.isVerify()) { verify(stagingFile, crc.getValue()); } if (destFs.exists(destFile)) { destFs.delete(destFile, false); } log.info("event#Moving staging file '" + stagingFile + "' to destination '" + destFile + "'" + "$batchId#" + batchId); if (!destFs.rename(stagingFile, destFile)) { throw new IOException("event#Failed to rename file"); } if (config.isCreateLzopIndex() && destFile.getName().endsWith(lzopExt)) { Path lzoIndexPath = new Path(destFile.toString() + LzoIndex.LZO_INDEX_SUFFIX); if (destFs.exists(lzoIndexPath)) { log.info("event#Deleting index file as it already exists"); destFs.delete(lzoIndexPath, false); } indexer.index(destFile); } fileSystemManager.fileCopyComplete(srcFileStatus); } catch (Throwable t) { log.error("event#Caught exception working on file " + srcFileStatus.getPath(), t); // delete the staging file if it still exists // try { if (destFs != null && destFs.exists(stagingFile)) { destFs.delete(stagingFile, false); } } catch (Throwable t2) { log.error("event#Failed to delete staging file " + stagingFile, t2); } fileSystemManager.fileCopyError(srcFileStatus); } }
From source file:com.alexholmes.hdfsslurper.WorkerThread.java
License:Apache License
private Path stageSource(FileStatus srcFile) throws IOException { String filenameBatchidDelimiter = config.getFileNameBatchIdDelimiter(); Path p = new Path(ScriptExecutor.getStdOutFromScript(config.getWorkScript(), srcFile.getPath().toString(), 60, TimeUnit.SECONDS, config.getFileNameBatchIdDelimiter())); String batchId = p.toString().substring(p.toString().lastIndexOf(filenameBatchidDelimiter) + 1, p.toString().length());//w w w . j a va2 s . c o m if (p.toUri().getScheme() == null) { throw new IOException( "event#Work path from script must be a URI with a scheme: '" + p + "'" + "$batchId#" + batchId); } log.info("event#Staging script returned new file '" + p + " for old " + srcFile.getPath() + "$batchId#" + batchId); return p; }