List of usage examples for org.apache.hadoop.fs Path mergePaths
public static Path mergePaths(Path path1, Path path2)
From source file:com.cloudera.impala.planner.S3PlannerTest.java
License:Apache License
/** * Remove any non-constant components of the given file path. For S3, the * actual bucket name, which will be unique to the tester's setup, needs to * be replaced with a fixed bucket name. *//*from w w w . j a v a 2s .c o m*/ @Override protected Path cleanseFilePath(Path path) { path = super.cleanseFilePath(path); URI fsURI = fsName.toUri(); URI pathURI = path.toUri(); Assert.assertTrue("error: " + path + " is not on filesystem " + fsName, fsURI.getScheme().equals(pathURI.getScheme()) && fsURI.getAuthority().equals(pathURI.getAuthority())); return Path.mergePaths(S3A_CANONICAL_BUCKET, path); }
From source file:com.facebook.presto.hdfs.HDFSConfig.java
License:Apache License
public static Path formPath(String dirOrFile) { String base = getMetaserverStore(); String path = dirOrFile;/*from ww w.java 2 s.c o m*/ while (base.endsWith("/")) { base = base.substring(0, base.length() - 2); } if (!path.startsWith("/")) { path = "/" + path; } return Path.mergePaths(new Path(base), new Path(path)); }
From source file:com.facebook.presto.hdfs.HDFSConfig.java
License:Apache License
public static Path formPath(String dirOrFile1, String dirOrFile2) { String base = getMetaserverStore(); String path1 = dirOrFile1;// w w w . j ava 2 s .com String path2 = dirOrFile2; while (base.endsWith("/")) { base = base.substring(0, base.length() - 2); } if (!path1.startsWith("/")) { path1 = "/" + path1; } if (path1.endsWith("/")) { path1 = path1.substring(0, path1.length() - 2); } if (!path2.startsWith("/")) { path2 = "/" + path2; } return Path.mergePaths(Path.mergePaths(new Path(base), new Path(path1)), new Path(path2)); }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java
License:Open Source License
/** For input files (pure enrichment, not when used for analytics), deletes or archives the files following completion *//*from w w w . j a v a 2s. c o m*/ private void archiveOrDeleteFile() { try { final Path currentPath = _fileSplit.getPath(_currFile); // First check - if only want to do anything if this is an internal job: if (!currentPath.toString().contains(IStorageService.TO_IMPORT_DATA_SUFFIX)) { return; // (not your file to modify....) } final boolean storage_enabled = Optional.ofNullable(_dataBucket.data_schema()) .map(ds -> ds.storage_schema()).map(ss -> Optional.ofNullable(ss.enabled()).orElse(true)) .orElse(false); final boolean archive_enabled = storage_enabled && Optionals.of(() -> _dataBucket.data_schema().storage_schema().raw()) .map(raw -> Optional.ofNullable(raw.enabled()).orElse(true)).orElse(false); if (archive_enabled) { Path newPath = createArchivePath(currentPath); _fs.mkdirs(newPath); @SuppressWarnings("unused") final boolean success = _fs.rename(currentPath, Path.mergePaths(newPath, new Path("/" + currentPath.getName()))); } else { _fs.delete(currentPath, false); } } catch (Exception e) { logger.error(ErrorUtils.getLongForm(HadoopErrorUtils.EXCEPTION_CAUGHT, e)); // We're just going to move on if we can't delete the file, it's // probably a permissions error } }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.BeFileInputReader.java
License:Open Source License
/** Returns the temporal (or not) directory in which to place raw files * @param currentPath//w w w. java2 s. com * @return * @throws Exception */ private Path createArchivePath(Path currentPath) throws Exception { final String timeGroupingFormat = TimeUtils.getTimePeriod(Optionals .of(() -> _dataBucket.data_schema().storage_schema().processed().grouping_time_period()).orElse("")) .validation(fail -> "", success -> TimeUtils.getTimeBasedSuffix(success, Optional.of(ChronoUnit.MINUTES))); final String timeGroup = timeGroupingFormat.isEmpty() ? IStorageService.NO_TIME_SUFFIX : (new SimpleDateFormat(timeGroupingFormat)).format(start); Path storedPath = Path.mergePaths(currentPath.getParent().getParent().getParent().getParent() // (ie up 3 to the root, ie managed_bucket==first subdir) , new Path(IStorageService.STORED_DATA_SUFFIX_RAW + timeGroup)); return storedPath; }
From source file:jadoop.HadoopGridJob.java
License:Open Source License
/** * Creates a temporary working directory on the hadoop HDFS for the job that * will be running. The name of this temporary directory will be the name * given to the job. If there is an existing directory with the same name as * the job's name, this method generate a new name that will be used so that * the temporary directory does not share a name with another directory on * the HDFS./*from w w w. java 2s . com*/ * * @return the path of the new temporary working directory on the HDFS * * @throws IOException * if there is a problem creating the temporary working * directory. */ private Path createTemporaryDirectory(FileSystem fs) throws IOException { // path to the HDFS system Path hdfsHome = fs.getHomeDirectory(); // base name of the temporary working directory. Path newHDFSDir = new Path("/" + jobName); // full path to the temporary working directory on the HDFS. Path tempHDFSWorkingDir = Path.mergePaths(hdfsHome, newHDFSDir); // append numbers to the job name until there is no conflict... int number = 1; while (fs.exists(tempHDFSWorkingDir)) { Path jobNum = new Path("/" + jobName + number); tempHDFSWorkingDir = Path.mergePaths(hdfsHome, jobNum); number++; } // make the directory on the HDFS and return the path to it. fs.mkdirs(tempHDFSWorkingDir); return tempHDFSWorkingDir; }
From source file:org.apache.drill.exec.planner.logical.partition.PruneScanRule.java
License:Apache License
protected void doOnMatch(RelOptRuleCall call, Filter filterRel, Project projectRel, TableScan scanRel) { final String pruningClassName = getClass().getName(); logger.debug("Beginning partition pruning, pruning class: {}", pruningClassName); Stopwatch totalPruningTime = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; final PlannerSettings settings = PrelUtil.getPlannerSettings(call.getPlanner()); PartitionDescriptor descriptor = getPartitionDescriptor(settings, scanRel); final BufferAllocator allocator = optimizerContext.getAllocator(); final Object selection = getDrillTable(scanRel).getSelection(); MetadataContext metaContext = null;/* w w w. jav a2s. co m*/ if (selection instanceof FormatSelection) { metaContext = ((FormatSelection) selection).getSelection().getMetaContext(); } RexNode condition; if (projectRel == null) { condition = filterRel.getCondition(); } else { // get the filter as if it were below the projection. condition = RelOptUtil.pushPastProject(filterRel.getCondition(), projectRel); } RewriteAsBinaryOperators visitor = new RewriteAsBinaryOperators(true, filterRel.getCluster().getRexBuilder()); condition = condition.accept(visitor); Map<Integer, String> fieldNameMap = new HashMap<>(); List<String> fieldNames = scanRel.getRowType().getFieldNames(); BitSet columnBitset = new BitSet(); BitSet partitionColumnBitSet = new BitSet(); Map<Integer, Integer> partitionMap = new HashMap<>(); int relColIndex = 0; for (String field : fieldNames) { final Integer partitionIndex = descriptor.getIdIfValid(field); if (partitionIndex != null) { fieldNameMap.put(partitionIndex, field); partitionColumnBitSet.set(partitionIndex); columnBitset.set(relColIndex); // mapping between the relColIndex and partitionIndex partitionMap.put(relColIndex, partitionIndex); } relColIndex++; } if (partitionColumnBitSet.isEmpty()) { if (totalPruningTime != null) { logger.debug( "No partition columns are projected from the scan..continue. Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); } setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; } // stop watch to track how long we spend in different phases of pruning // first track how long we spend building the filter tree Stopwatch miscTimer = logger.isDebugEnabled() ? Stopwatch.createStarted() : null; FindPartitionConditions c = new FindPartitionConditions(columnBitset, filterRel.getCluster().getRexBuilder()); c.analyze(condition); RexNode pruneCondition = c.getFinalCondition(); BitSet referencedDirsBitSet = c.getReferencedDirs(); if (miscTimer != null) { logger.debug("Total elapsed time to build and analyze filter tree: {} ms", miscTimer.elapsed(TimeUnit.MILLISECONDS)); miscTimer.reset(); } if (pruneCondition == null) { if (totalPruningTime != null) { logger.debug( "No conditions were found eligible for partition pruning. Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); } setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; } // set up the partitions List<PartitionLocation> newPartitions = new ArrayList<>(); long numTotal = 0; // total number of partitions int batchIndex = 0; PartitionLocation firstLocation = null; LogicalExpression materializedExpr = null; String[] spInfo = null; int maxIndex = -1; BitSet matchBitSet = new BitSet(); // Outer loop: iterate over a list of batches of PartitionLocations for (List<PartitionLocation> partitions : descriptor) { numTotal += partitions.size(); logger.debug("Evaluating partition pruning for batch {}", batchIndex); if (batchIndex == 0) { // save the first location in case everything is pruned firstLocation = partitions.get(0); } final NullableBitVector output = new NullableBitVector( MaterializedField.create("", Types.optional(MinorType.BIT)), allocator); final VectorContainer container = new VectorContainer(); try { final ValueVector[] vectors = new ValueVector[descriptor.getMaxHierarchyLevel()]; for (int partitionColumnIndex : BitSets.toIter(partitionColumnBitSet)) { SchemaPath column = SchemaPath.getSimplePath(fieldNameMap.get(partitionColumnIndex)); MajorType type = descriptor.getVectorType(column, settings); MaterializedField field = MaterializedField .create(column.getLastSegment().getNameSegment().getPath(), type); ValueVector v = TypeHelper.getNewVector(field, allocator); v.allocateNew(); vectors[partitionColumnIndex] = v; container.add(v); } if (miscTimer != null) { // track how long we spend populating partition column vectors miscTimer.start(); } // populate partition vectors. descriptor.populatePartitionVectors(vectors, partitions, partitionColumnBitSet, fieldNameMap); if (miscTimer != null) { logger.debug( "Elapsed time to populate partitioning column vectors: {} ms within batchIndex: {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex); miscTimer.reset(); } // materialize the expression; only need to do this once if (batchIndex == 0) { materializedExpr = materializePruneExpr(pruneCondition, settings, scanRel, container); if (materializedExpr == null) { // continue without partition pruning; no need to log anything here since // materializePruneExpr logs it already if (totalPruningTime != null) { logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); } setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; } } output.allocateNew(partitions.size()); if (miscTimer != null) { // start the timer to evaluate how long we spend in the interpreter evaluation miscTimer.start(); } InterpreterEvaluator.evaluate(partitions.size(), optimizerContext, container, output, materializedExpr); if (miscTimer != null) { logger.debug( "Elapsed time in interpreter evaluation: {} ms within batchIndex: {} with # of partitions : {}", miscTimer.elapsed(TimeUnit.MILLISECONDS), batchIndex, partitions.size()); miscTimer.reset(); } int recordCount = 0; int qualifiedCount = 0; if (descriptor.supportsMetadataCachePruning() && partitions.get(0) .isCompositePartition() /* apply single partition check only for composite partitions */) { // Inner loop: within each batch iterate over the PartitionLocations for (PartitionLocation part : partitions) { assert part.isCompositePartition(); if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) { newPartitions.add(part); // Rather than using the PartitionLocation, get the array of partition values for the directories that are // referenced by the filter since we are not interested in directory references in other parts of the query. Pair<String[], Integer> p = composePartition(referencedDirsBitSet, partitionMap, vectors, recordCount); String[] parts = p.getLeft(); int tmpIndex = p.getRight(); maxIndex = Math.max(maxIndex, tmpIndex); if (spInfo == null) { // initialization spInfo = parts; for (int j = 0; j <= tmpIndex; j++) { if (parts[j] != null) { matchBitSet.set(j); } } } else { // compare the new partition with existing partition for (int j = 0; j <= tmpIndex; j++) { if (parts[j] == null || spInfo[j] == null) { // nulls don't match matchBitSet.clear(j); } else { if (!parts[j].equals(spInfo[j])) { matchBitSet.clear(j); } } } } qualifiedCount++; } recordCount++; } } else { // Inner loop: within each batch iterate over the PartitionLocations for (PartitionLocation part : partitions) { if (!output.getAccessor().isNull(recordCount) && output.getAccessor().get(recordCount) == 1) { newPartitions.add(part); qualifiedCount++; } recordCount++; } } logger.debug("Within batch {}: total records: {}, qualified records: {}", batchIndex, recordCount, qualifiedCount); batchIndex++; } catch (Exception e) { logger.warn("Exception while trying to prune partition.", e); if (totalPruningTime != null) { logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); } setPruneStatus(metaContext, PruneStatus.NOT_PRUNED); return; // continue without partition pruning } finally { container.clear(); if (output != null) { output.clear(); } } } try { if (newPartitions.size() == numTotal) { logger.debug("No partitions were eligible for pruning"); return; } // handle the case all partitions are filtered out. boolean canDropFilter = true; boolean wasAllPartitionsPruned = false; Path cacheFileRoot = null; if (newPartitions.isEmpty()) { assert firstLocation != null; // Add the first non-composite partition location, since execution requires schema. // In such case, we should not drop filter. newPartitions.add(firstLocation.getPartitionLocationRecursive().get(0)); canDropFilter = false; // NOTE: with DRILL-4530, the PruneScanRule may be called with only a list of // directories first and the non-composite partition location will still return // directories, not files. So, additional processing is done depending on this flag wasAllPartitionsPruned = true; logger.debug( "All {} partitions were pruned; added back a single partition to allow creating a schema", numTotal); // set the cacheFileRoot appropriately if (firstLocation.isCompositePartition()) { cacheFileRoot = Path.mergePaths(descriptor.getBaseTableLocation(), firstLocation.getCompositePartitionPath()); } } logger.debug("Pruned {} partitions down to {}", numTotal, newPartitions.size()); List<RexNode> conjuncts = RelOptUtil.conjunctions(condition); List<RexNode> pruneConjuncts = RelOptUtil.conjunctions(pruneCondition); conjuncts.removeAll(pruneConjuncts); RexNode newCondition = RexUtil.composeConjunction(filterRel.getCluster().getRexBuilder(), conjuncts, false); RewriteCombineBinaryOperators reverseVisitor = new RewriteCombineBinaryOperators(true, filterRel.getCluster().getRexBuilder()); condition = condition.accept(reverseVisitor); pruneCondition = pruneCondition.accept(reverseVisitor); if (descriptor.supportsMetadataCachePruning() && !wasAllPartitionsPruned) { // if metadata cache file could potentially be used, then assign a proper cacheFileRoot int index = -1; if (!matchBitSet.isEmpty()) { StringBuilder path = new StringBuilder(); index = matchBitSet.length() - 1; for (int j = 0; j < matchBitSet.length(); j++) { if (!matchBitSet.get(j)) { // stop at the first index with no match and use the immediate // previous index index = j - 1; break; } } for (int j = 0; j <= index; j++) { path.append("/").append(spInfo[j]); } cacheFileRoot = Path.mergePaths(descriptor.getBaseTableLocation(), DrillFileSystemUtil.createPathSafe(path.toString())); } if (index != maxIndex) { // if multiple partitions are being selected, we should not drop the filter // since we are reading the cache file at a parent/ancestor level canDropFilter = false; } } RelNode inputRel = descriptor.supportsMetadataCachePruning() ? descriptor.createTableScan(newPartitions, cacheFileRoot, wasAllPartitionsPruned, metaContext) : descriptor.createTableScan(newPartitions, wasAllPartitionsPruned); if (projectRel != null) { inputRel = projectRel.copy(projectRel.getTraitSet(), Collections.singletonList(inputRel)); } if (newCondition.isAlwaysTrue() && canDropFilter) { call.transformTo(inputRel); } else { final RelNode newFilter = filterRel.copy(filterRel.getTraitSet(), Collections.singletonList(inputRel)); call.transformTo(newFilter); } setPruneStatus(metaContext, PruneStatus.PRUNED); } catch (Exception e) { logger.warn("Exception while using the pruned partitions.", e); } finally { if (totalPruningTime != null) { logger.debug("Total pruning elapsed time: {} ms", totalPruningTime.elapsed(TimeUnit.MILLISECONDS)); } } }
From source file:org.apache.oozie.action.hadoop.TestGitMain.java
License:Apache License
public void testGitKeyFileIsCopiedToHDFS() throws Exception { final Path credentialFilePath = Path.mergePaths(getFsTestCaseDir(), new Path("/key_dir/my_key.dsa")); final String credentialFileData = "Key file data"; Path.mergePaths(getFsTestCaseDir(), new Path("/destDir")); writeHDFSFile(credentialFilePath, credentialFileData); final File localFile = gitMain.getKeyFromFS(credentialFilePath); final String testOutput = new String(Files.readAllBytes(localFile.toPath()), StandardCharsets.UTF_8); assertTrue("credential file length mismatch", credentialFileData.length() > 0); assertEquals("credential file data mismatch", credentialFileData, testOutput); FileUtils.deleteDirectory(new File(localFile.getParent())); }
From source file:org.apache.oozie.action.hadoop.TestIntegrationGitActionExecutor.java
License:Apache License
public void testWhenRepoIsClonedThenGitIndexContentIsReadSuccessfully() throws Exception { final Path outputPath = getFsTestCaseDir(); final Path gitRepo = Path.mergePaths(outputPath, new Path("/repoDir")); final Path gitIndex = Path.mergePaths(gitRepo, new Path("/.git/config")); final GitServer gitServer = new GitServer(); final String localRepo = String.format("git://127.0.0.1:%s/repo.git", gitServer.getLocalPort()); final String actionXml = "<git>" + "<resource-manager>" + getJobTrackerUri() + "</resource-manager>" + "<name-node>" + getNameNodeUri() + "</name-node>" + "<git-uri>" + localRepo + "</git-uri>" + "<destination-uri>" + gitRepo + "</destination-uri>" + "</git>"; final Context context = createContext(actionXml); final String launcherId = submitAction(context); try {//from www. j a v a 2s .c om gitServer.start(); waitUntilYarnAppDoneAndAssertSuccess(launcherId); } finally { gitServer.stopAndCleanupReposServer(); } final Map<String, String> actionData = LauncherHelper.getActionData(getFileSystem(), context.getActionDir(), context.getProtoActionConf()); assertFalse(LauncherHelper.hasIdSwap(actionData)); final GitActionExecutor ae = new GitActionExecutor(); ae.check(context, context.getAction()); assertEquals("launcherId and action.externalId should be the same", launcherId, context.getAction().getExternalId()); assertEquals("action should have been SUCCEEDED", "SUCCEEDED", context.getAction().getExternalStatus()); ae.end(context, context.getAction()); assertEquals("action.status should be OK", WorkflowAction.Status.OK, context.getAction().getStatus()); assertTrue("could not create test case output path", getFileSystem().exists(outputPath)); assertTrue("could not save git index", getFileSystem().exists(gitIndex)); try (final InputStream is = getFileSystem().open(gitIndex)) { final String gitIndexContent = IOUtils.toString(is, StandardCharsets.UTF_8); assertTrue("could not read git index", gitIndexContent.toLowerCase().contains("core")); assertTrue("could not read git index", gitIndexContent.toLowerCase().contains("remote")); } }
From source file:org.apache.orc.tools.FileDump.java
License:Apache License
private static void recoverFile(final Path corruptPath, final FileSystem fs, final Configuration conf, final List<Long> footerOffsets, final String backup) throws IOException { // first recover the file to .recovered file and then once successful rename it to actual file Path recoveredPath = getRecoveryFile(corruptPath); // make sure that file does not exist if (fs.exists(recoveredPath)) { fs.delete(recoveredPath, false); }/*from ww w. j a v a 2 s .c o m*/ // if there are no valid footers, the file should still be readable so create an empty orc file if (footerOffsets == null || footerOffsets.isEmpty()) { System.err.println("No readable footers found. Creating empty orc file."); TypeDescription schema = TypeDescription.createStruct(); Writer writer = OrcFile.createWriter(recoveredPath, OrcFile.writerOptions(conf).setSchema(schema)); writer.close(); } else { FSDataInputStream fdis = fs.open(corruptPath); FileStatus fileStatus = fs.getFileStatus(corruptPath); // read corrupt file and copy it to recovered file until last valid footer FSDataOutputStream fdos = fs.create(recoveredPath, true, conf.getInt("io.file.buffer.size", 4096), fileStatus.getReplication(), fileStatus.getBlockSize()); try { long fileLen = footerOffsets.get(footerOffsets.size() - 1); long remaining = fileLen; while (remaining > 0) { int toRead = (int) Math.min(DEFAULT_BLOCK_SIZE, remaining); byte[] data = new byte[toRead]; long startPos = fileLen - remaining; fdis.readFully(startPos, data, 0, toRead); fdos.write(data); System.err.println("Copying data to recovery file - startPos: " + startPos + " toRead: " + toRead + " remaining: " + remaining); remaining = remaining - toRead; } } catch (Exception e) { fs.delete(recoveredPath, false); throw new IOException(e); } finally { fdis.close(); fdos.close(); } } // validate the recovered file once again and start moving corrupt files to backup folder if (isReadable(recoveredPath, conf, Long.MAX_VALUE)) { Path backupDataPath; String scheme = corruptPath.toUri().getScheme(); String authority = corruptPath.toUri().getAuthority(); String filePath = corruptPath.toUri().getPath(); // use the same filesystem as corrupt file if backup-path is not explicitly specified if (backup.equals(DEFAULT_BACKUP_PATH)) { backupDataPath = new Path(scheme, authority, DEFAULT_BACKUP_PATH + filePath); } else { backupDataPath = Path.mergePaths(new Path(backup), corruptPath); } // Move data file to backup path moveFiles(fs, corruptPath, backupDataPath); // Move side file to backup path Path sideFilePath = OrcAcidUtils.getSideFile(corruptPath); Path backupSideFilePath = new Path(backupDataPath.getParent(), sideFilePath.getName()); moveFiles(fs, sideFilePath, backupSideFilePath); // finally move recovered file to actual file moveFiles(fs, recoveredPath, corruptPath); // we are done recovering, backing up and validating System.err.println("Validation of recovered file successful!"); } }