List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.uber.hoodie.utilities.HoodieSnapshotCopier.java
License:Apache License
public void snapshot(JavaSparkContext jsc, String baseDir, final String outputDir, final boolean shouldAssumeDatePartitioning) throws IOException { FileSystem fs = FSUtils.getFs(baseDir, jsc.hadoopConfiguration()); final SerializableConfiguration serConf = new SerializableConfiguration(jsc.hadoopConfiguration()); final HoodieTableMetaClient tableMetadata = new HoodieTableMetaClient(fs.getConf(), baseDir); final TableFileSystemView.ReadOptimizedView fsView = new HoodieTableFileSystemView(tableMetadata, tableMetadata.getActiveTimeline().getCommitsTimeline().filterCompletedInstants()); // Get the latest commit Optional<HoodieInstant> latestCommit = tableMetadata.getActiveTimeline().getCommitsTimeline() .filterCompletedInstants().lastInstant(); if (!latestCommit.isPresent()) { logger.warn("No commits present. Nothing to snapshot"); return;//from www.j a v a 2s .c o m } final String latestCommitTimestamp = latestCommit.get().getTimestamp(); logger.info(String.format("Starting to snapshot latest version files which are also no-late-than %s.", latestCommitTimestamp)); List<String> partitions = FSUtils.getAllPartitionPaths(fs, baseDir, shouldAssumeDatePartitioning); if (partitions.size() > 0) { logger.info(String.format("The job needs to copy %d partitions.", partitions.size())); // Make sure the output directory is empty Path outputPath = new Path(outputDir); if (fs.exists(outputPath)) { logger.warn( String.format("The output path %s targetBasePath already exists, deleting", outputPath)); fs.delete(new Path(outputDir), true); } jsc.parallelize(partitions, partitions.size()).flatMap(partition -> { // Only take latest version files <= latestCommit. FileSystem fs1 = FSUtils.getFs(baseDir, serConf.get()); List<Tuple2<String, String>> filePaths = new ArrayList<>(); Stream<HoodieDataFile> dataFiles = fsView.getLatestDataFilesBeforeOrOn(partition, latestCommitTimestamp); dataFiles.forEach( hoodieDataFile -> filePaths.add(new Tuple2<>(partition, hoodieDataFile.getPath()))); // also need to copy over partition metadata Path partitionMetaFile = new Path(new Path(baseDir, partition), HoodiePartitionMetadata.HOODIE_PARTITION_METAFILE); if (fs1.exists(partitionMetaFile)) { filePaths.add(new Tuple2<>(partition, partitionMetaFile.toString())); } return filePaths.iterator(); }).foreach(tuple -> { String partition = tuple._1(); Path sourceFilePath = new Path(tuple._2()); Path toPartitionPath = new Path(outputDir, partition); FileSystem ifs = FSUtils.getFs(baseDir, serConf.get()); if (!ifs.exists(toPartitionPath)) { ifs.mkdirs(toPartitionPath); } FileUtil.copy(ifs, sourceFilePath, ifs, new Path(toPartitionPath, sourceFilePath.getName()), false, ifs.getConf()); }); // Also copy the .commit files logger.info(String.format("Copying .commit files which are no-late-than %s.", latestCommitTimestamp)); FileStatus[] commitFilesToCopy = fs.listStatus( new Path(baseDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME), (commitFilePath) -> { if (commitFilePath.getName().equals(HoodieTableConfig.HOODIE_PROPERTIES_FILE)) { return true; } else { String commitTime = FSUtils.getCommitFromCommitFile(commitFilePath.getName()); return HoodieTimeline.compareTimestamps(commitTime, latestCommitTimestamp, HoodieTimeline.LESSER_OR_EQUAL); } }); for (FileStatus commitStatus : commitFilesToCopy) { Path targetFilePath = new Path(outputDir + "/" + HoodieTableMetaClient.METAFOLDER_NAME + "/" + commitStatus.getPath().getName()); if (!fs.exists(targetFilePath.getParent())) { fs.mkdirs(targetFilePath.getParent()); } if (fs.exists(targetFilePath)) { logger.error(String.format("The target output commit file (%s targetBasePath) already exists.", targetFilePath)); } FileUtil.copy(fs, commitStatus.getPath(), fs, targetFilePath, false, fs.getConf()); } } else { logger.info("The job has 0 partition to copy."); } // Create the _SUCCESS tag Path successTagPath = new Path(outputDir + "/_SUCCESS"); if (!fs.exists(successTagPath)) { logger.info(String.format("Creating _SUCCESS under targetBasePath: $s", outputDir)); fs.createNewFile(successTagPath); } }
From source file:com.voole.hobbit2.camus.hive.order.mixed.jobcontrol.ControlledJob.java
License:Apache License
/** * Submit this job to mapred. The state becomes RUNNING if submission * is successful, FAILED otherwise. //from w w w .j ava 2 s .co m */ protected synchronized void submit() { try { Configuration conf = job.getConfiguration(); if (conf.getBoolean(CREATE_DIR, false)) { FileSystem fs = FileSystem.get(conf); Path inputPaths[] = FileInputFormat.getInputPaths(job); for (int i = 0; i < inputPaths.length; i++) { if (!fs.exists(inputPaths[i])) { try { fs.mkdirs(inputPaths[i]); } catch (IOException e) { } } } } job.submit(); this.state = State.RUNNING; } catch (Exception ioe) { LOG.info(getJobName() + " got an error while submitting ", ioe); this.state = State.FAILED; this.message = StringUtils.stringifyException(ioe); } }
From source file:com.wipro.ats.bdre.clustermigration.DestTableLoad.java
License:Apache License
public void execute(String[] params) throws IOException { CommandLine commandLine = getCommandLine(params, PARAMS_STRUCTURE); String src = commandLine.getOptionValue("source-path"); String dest = commandLine.getOptionValue("dest-path"); String destFs = commandLine.getOptionValue("dest-fs"); Configuration config = new Configuration(); config.set("fs.defaultFS", destFs); FileSystem hdfs = FileSystem.get(config); Path srcPath = new Path(src); RemoteIterator<LocatedFileStatus> srcFiles = hdfs.listFiles(srcPath, true); while (srcFiles.hasNext()) { String absolutePath = srcFiles.next().getPath().toUri().toString(); if (absolutePath.endsWith("/")) absolutePath = absolutePath.substring(0, absolutePath.length() - 1); LOGGER.info("absolutePath of source business partition= " + absolutePath); String relativePath = absolutePath.replace(src, ""); if (relativePath.endsWith("/")) relativePath = relativePath.substring(0, relativePath.length() - 1); LOGGER.info("relativePath of source business partition= = " + relativePath); if (!dest.endsWith("/")) dest = dest + "/"; String destCheckPathString = dest + relativePath; Path destCheckPath = new Path(destCheckPathString); LOGGER.info("destCheckPath = " + destCheckPath); //find first index that contains a "/" from the end of the string, after first find the second such occurrence, finally trim the '/instanceexecid=number/part_0000' from the whole path, do this for both source and dest paths int destIndex = destCheckPathString.lastIndexOf("/"); int secondLastDestIndex = destCheckPath.toString().lastIndexOf("/", destIndex - 1); int srcIndex = absolutePath.lastIndexOf("/"); int secondLastSrcIndex = absolutePath.substring(0, srcIndex).lastIndexOf("/", srcIndex - 1); String truncatedSrcPath = absolutePath.substring(0, secondLastSrcIndex); LOGGER.info("truncated Src Path = " + truncatedSrcPath); String truncatedDestPath = destCheckPath.toString().substring(0, secondLastDestIndex); LOGGER.info("truncated Dest Path = " + truncatedDestPath); Path existsPathCheck = new Path(truncatedDestPath); Path srcPathToMove = new Path(truncatedSrcPath); //check if the business partition to be copied already exists inside the destination table, if it does, it has to be overwritten (in this case delete at dest and move from source to dest LOGGER.info("Does the business partition exist already inside the table? True/False? = " + hdfs.exists(existsPathCheck)); if (hdfs.exists(existsPathCheck)) { LOGGER.info(//from w w w . j a v a 2 s. com "bus partitions to be copied already exist at the destination, hence deleting them at destination"); hdfs.delete(existsPathCheck, true); } String destPartitionPath = truncatedDestPath.substring(0, truncatedDestPath.lastIndexOf("/")); Path partitionWisePath = new Path(destPartitionPath); hdfs.mkdirs(partitionWisePath); LOGGER.info("moving the business partitions to the destination table"); LOGGER.info("moving " + srcPathToMove + " to " + partitionWisePath); hdfs.rename(srcPathToMove, partitionWisePath); } hdfs.delete(srcPath, true); }
From source file:com.wipro.ats.bdre.filemon.QueuedFileUploader.java
License:Apache License
private static void hdfsCopy(FileCopyInfo fileCopying) throws IOException { try {/*from w w w . ja v a 2 s . c om*/ // Copying file from local to HDFS overriding, if file already exists config.set("fs.defaultFS", FileMonRunnableMain.getDefaultFSName()); FileSystem fs = FileSystem.get(config); String destDir = fileCopying.getDstLocation(); Path destPath = new Path(ResolvePath.replaceVars(destDir)); if (!fs.exists(destPath)) { LOGGER.info("Creating HDFS dest dir " + destPath + " Success=" + fs.mkdirs(destPath)); } if (FileMonRunnableMain.isDeleteCopiedSrc()) { fs.copyFromLocalFile(true, true, new Path(fileCopying.getSrcLocation()), destPath); } else { fs.copyFromLocalFile(false, true, new Path(fileCopying.getSrcLocation()), destPath); File sourceFile = new File(fileCopying.getSrcLocation()); String arcDir = destDir.replace(FileMonRunnableMain.getHdfsUploadDir(), FileMonRunnableMain.getMonitoredDirName() + "/" + FileMonRunnableMain.ARCHIVE); File arcDirFile = new File(arcDir); FileUtils.moveFileToDirectory(sourceFile, arcDirFile, true); } } catch (Exception e) { FileMonitor.addToQueue(fileCopying.getFileName(), fileCopying); LOGGER.error("Error in executeCopyProcess method. Requeuing file " + fileCopying.getFileName(), e); throw new IOException(e); } }
From source file:com.xiaomi.linden.hadoop.indexing.reduce.ShardWriter.java
License:Apache License
/** * Constructor/*w w w . j a v a 2 s .com*/ * @param fs * @param shard * @param tempDir * @param conf * @throws IOException */ public ShardWriter(FileSystem fs, Shard shard, String tempDir, Configuration conf) throws IOException { logger.info("Construct a shard writer"); this.conf = conf; this.fs = fs; localFs = FileSystem.getLocal(conf); perm = new Path(shard.getDirectory()); taxoPerm = new Path(shard.getDirectory() + ".taxonomy"); String indexDir = tempDir + "/" + "index"; String taxoDir = tempDir + "/" + "taxo"; temp = new Path(indexDir); taxoTemp = new Path(taxoDir); if (localFs.exists(temp)) { File tempFile = new File(temp.getName()); if (tempFile.exists()) { LindenReducer.deleteDir(tempFile); } } if (!fs.exists(perm)) { fs.mkdirs(perm); } else { moveToTrash(conf, perm); fs.mkdirs(perm); } if (!fs.exists(taxoPerm)) { fs.mkdirs(taxoPerm); } else { moveToTrash(conf, taxoPerm); fs.mkdirs(taxoPerm); } IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, null); config.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); writer = new IndexWriter(FSDirectory.open(new File(indexDir)), config); taxoWriter = new DirectoryTaxonomyWriter(FSDirectory.open(new File(taxoDir))); }
From source file:com.yahoo.glimmer.indexing.preprocessor.ResourceRecordWriter.java
License:Open Source License
public ResourceRecordWriter(FileSystem fs, Path taskWorkPath, CompressionCodec codecIfAny) throws IOException { if (fs.exists(taskWorkPath)) { throw new IOException("Task work path already exists:" + taskWorkPath.toString()); }//from ww w . j a va 2s .co m fs.mkdirs(taskWorkPath); for (OUTPUT output : OUTPUT.values()) { OutputStream out; if (codecIfAny != null) { Path file = new Path(taskWorkPath, output.filename + codecIfAny.getDefaultExtension()); out = fs.create(file, false); out = codecIfAny.createOutputStream(out); } else { Path file = new Path(taskWorkPath, output.filename); out = fs.create(file, false); } writersMap.put(output, new OutputStreamWriter(out, Charset.forName("UTF-8"))); } Path file = new Path(taskWorkPath, "bySubject.bz2"); OutputStream compressedOutputStream = fs.create(file, false); file = new Path(taskWorkPath, "bySubject.blockOffsets"); bySubjectOffsetsOutputStream = fs.create(file, false); blockOffsetsBuilder = new BlockOffsets.Builder(); // Create a Writer on a BZip2 compressed OutputStream with a small block // size( * 100K). uncompressedOutputStream = new BZip2OutputStream(compressedOutputStream, 1, new BZip2OutputStream.Callback() { @Override public void blockStart(long blockStartBitOffset) { if (lastBlockStartBitOffset != 0) { blockOffsetsBuilder.setBlockStart(lastBlockStartBitOffset, lastFirstDocId); } lastBlockStartBitOffset = blockStartBitOffset; } @Override public void finish(long totalBitsWritten) { blockOffsetsBuilder.close(totalBitsWritten); } }); bySubjectWriter = new OutputStreamWriter(uncompressedOutputStream); }
From source file:com.yahoo.labs.samoa.streams.fs.HDFSFileStreamSourceTest.java
License:Apache License
private void writeSimpleFiles(String path, String ext, int numOfFiles) { // get filesystem FileSystem dfs; try {// w w w . j av a 2s.c o m dfs = hdfsCluster.getFileSystem(); } catch (IOException ioe) { fail("Could not access MiniDFSCluster" + ioe.getMessage()); return; } // create basedir Path basedir = new Path(path); try { dfs.mkdirs(basedir); } catch (IOException ioe) { fail("Could not create DIR:" + path + "\n" + ioe.getMessage()); return; } // write files for (int i = 1; i <= numOfFiles; i++) { String fn = null; if (ext != null) { fn = Integer.toString(i) + "." + ext; } else { fn = Integer.toString(i); } try { OutputStream fin = dfs.create(new Path(path, fn)); BufferedWriter wr = new BufferedWriter(new OutputStreamWriter(fin)); wr.write(Integer.toString(i)); wr.close(); fin.close(); } catch (IOException ioe) { fail("Fail writing to input file: " + fn + " in directory: " + path + ioe.getMessage()); } } }
From source file:com.yahoo.spaclu.data.index.IndexFeatureValueSpark.java
License:Apache License
public static void main(String[] args) throws IOException { IndexFeatureValueOptions optionsFormatRawToDatabase = new IndexFeatureValueOptions(args); String inputPathString = optionsFormatRawToDatabase.getInputPath(); String outputPathString = optionsFormatRawToDatabase.getOutputPath(); String indexPathString = optionsFormatRawToDatabase.getIndexPath(); int numberOfPartitions = optionsFormatRawToDatabase.getNumberOfPartitions(); int maxCutoffThreshold = optionsFormatRawToDatabase.getMaximumCutoffThreshold(); int minCutoffThreshold = optionsFormatRawToDatabase.getMinimumCutoffThreshold(); /*/*from w w w . j a v a 2 s . com*/ * Set<String> excludingFeatureNames = new HashSet<String>(); * excludingFeatureNames.add("login"); * excludingFeatureNames.add("time"); excludingFeatureNames.add("day"); * excludingFeatureNames.add("hms"); excludingFeatureNames.add("fail"); */ sLogger.info("Tool: " + IndexFeatureValueSpark.class.getSimpleName()); sLogger.info(" - input path: " + inputPathString); sLogger.info(" - output path: " + outputPathString); sLogger.info(" - index path: " + indexPathString); sLogger.info(" - number of partitions: " + numberOfPartitions); sLogger.info(" - maximum cutoff: " + maxCutoffThreshold); sLogger.info(" - minimum cutoff: " + minCutoffThreshold); // Create a default hadoop configuration Configuration conf = new Configuration(); // Parse created config to the HDFS FileSystem fs = FileSystem.get(conf); Path outputPath = new Path(outputPathString); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } SparkConf sparkConf = new SparkConf().setAppName(optionsFormatRawToDatabase.toString()); JavaSparkContext sc = new JavaSparkContext(sparkConf); Map<Integer, String> featureIndices = getFeatureIndices(sc.textFile(indexPathString)); List<Integer> listOfAllFeatureIndices = new LinkedList<Integer>(); List<String> listOfAllFeatureInfo = new LinkedList<String>(); Iterator<Integer> indexIter = featureIndices.keySet().iterator(); while (indexIter.hasNext()) { Integer tempKey = indexIter.next(); listOfAllFeatureIndices.add(tempKey); listOfAllFeatureInfo.add(featureIndices.get(tempKey)); } /* * * * * * * * */ JavaRDD<String> rawLines = sc.textFile(inputPathString).repartition(numberOfPartitions); JavaRDD<String[]> tokenizedLines = rawLines.map(new LineFilter(listOfAllFeatureIndices)); JavaPairRDD<Entry<Integer, String>, Long> featureValuesCounts = tokenizedLines .flatMapToPair(new FeatureValueMapper()).reduceByKey(new FeatureValueReducer()); Map<Integer, Builder<String, Long>> featureValueMapping = new Hashtable<Integer, Builder<String, Long>>(); Iterator<Tuple2<Entry<Integer, String>, Long>> iter = featureValuesCounts.collect().iterator(); while (iter.hasNext()) { Tuple2<Entry<Integer, String>, Long> temp = iter.next(); Entry<Integer, String> featureValueEntry = temp._1; int featureIndex = featureValueEntry.getKey(); String featureValue = featureValueEntry.getValue(); long featureValueCount = temp._2; if (!featureValueMapping.containsKey(featureIndex)) { Builder<String, Long> mapBuilder = new Builder<String, Long>(Ordering.natural()); featureValueMapping.put(featureIndex, mapBuilder); } featureValueMapping.get(featureIndex).put(featureValue, featureValueCount); } Preconditions.checkArgument(featureValueMapping.size() == listOfAllFeatureIndices.size()); String outputFeaturePathString = outputPathString + "feature" + Settings.SEPERATOR; fs.mkdirs(new Path(outputFeaturePathString)); String outputFeatureNamePathString = outputPathString + "feature.dat"; Path outputFeatureNamePath = new Path(outputFeatureNamePathString); PrintWriter featureNamePrinterWriter = new PrintWriter(fs.create(outputFeatureNamePath), true); List<Integer> listOfFeatureIndicesToKeep = new LinkedList<Integer>(); Map<Integer, Map<String, Integer>> featureValueIndex = new Hashtable<Integer, Map<String, Integer>>(); for (int d = 0; d < featureValueMapping.size(); d++) { Map<String, Integer> valueToIndex = new Hashtable<String, Integer>(); Map<Integer, String> indexToValue = new Hashtable<Integer, String>(); ImmutableSortedMap<String, Long> immutableSortedMap = featureValueMapping.get(d).build(); for (String keyString : immutableSortedMap.keySet()) { valueToIndex.put(keyString, valueToIndex.size()); indexToValue.put(indexToValue.size(), keyString); } if (valueToIndex.size() <= minCutoffThreshold || valueToIndex.size() > maxCutoffThreshold) { sLogger.info("Feature (" + listOfAllFeatureInfo.get(d) + ") contains " + valueToIndex.size() + " values, skip..."); continue; } else { sLogger.info("Feature (" + listOfAllFeatureInfo.get(d) + ") contains " + valueToIndex.size() + " values."); listOfFeatureIndicesToKeep.add(listOfAllFeatureIndices.get(d)); featureNamePrinterWriter.println(listOfAllFeatureInfo.get(d)); } String outputFeatureIndexPathString = outputFeaturePathString + "index" + Settings.UNDER_SCORE + featureValueIndex.size() + ".dat"; Path outputIndexPath = new Path(outputFeatureIndexPathString); featureValueIndex.put(featureValueIndex.size(), valueToIndex); PrintWriter featureValueIndexPrinterWriter = new PrintWriter(fs.create(outputIndexPath), true); for (int i = 0; i < indexToValue.size(); i++) { featureValueIndexPrinterWriter.println("" + i + Settings.TAB + indexToValue.get(i) + Settings.TAB + immutableSortedMap.get(indexToValue.get(i))); } featureValueIndexPrinterWriter.close(); } featureNamePrinterWriter.close(); JavaRDD<String[]> filteredLines = rawLines.map(new LineFilter(listOfFeatureIndicesToKeep)); JavaRDD<FeatureIntegerVector> indexedData = filteredLines.map(new FeatureValueIndexer(featureValueIndex)); String outputDataPathString = outputPathString + "data"; Path outputDataPath = new Path(outputDataPathString); if (fs.exists(outputDataPath)) { fs.delete(outputDataPath, true); } indexedData.saveAsTextFile(outputDataPathString); sc.stop(); }
From source file:com.yahoo.storm.yarn.Util.java
License:Open Source License
@SuppressWarnings("rawtypes") static Path createConfigurationFileInFs(FileSystem fs, String appHome, Map stormConf, YarnConfiguration yarnConf) throws IOException { // dump stringwriter's content into FS conf/storm.yaml Path confDst = new Path(fs.getHomeDirectory(), appHome + Path.SEPARATOR + STORM_CONF_PATH_STRING); Path dirDst = confDst.getParent(); fs.mkdirs(dirDst); //storm.yaml/*from ww w. j a v a 2 s. c o m*/ FSDataOutputStream out = fs.create(confDst); Yaml yaml = new Yaml(); OutputStreamWriter writer = new OutputStreamWriter(out); rmNulls(stormConf); yaml.dump(stormConf, writer); writer.close(); out.close(); //yarn-site.xml Path yarn_site_xml = new Path(dirDst, "yarn-site.xml"); out = fs.create(yarn_site_xml); writer = new OutputStreamWriter(out); yarnConf.writeXml(writer); writer.close(); out.close(); //logback.xml Path logback_xml = new Path(dirDst, "logback.xml"); out = fs.create(logback_xml); CreateLogbackXML(out); out.close(); return dirDst; }
From source file:com.yolodata.tbana.testutils.FileSystemTestUtils.java
License:Open Source License
public static Path createEmptyDir(FileSystem fileSystem) throws IOException { Path dir = new Path(FileTestUtils.getRandomTestFilepath()); assertTrue(fileSystem.mkdirs(dir)); return fileSystem.getFileStatus(dir).getPath(); }