List of usage examples for org.apache.hadoop.fs Path toUri
public URI toUri()
From source file:com.google.cloud.hadoop.fs.gcs.GoogleHadoopGlobalRootedFileSystem.java
License:Open Source License
@Override public URI getGcsPath(Path hadoopPath) { LOG.debug("GHFS.getGcsPath: {}", hadoopPath); // Convert to fully qualified absolute path; the Path object will callback to get our current // workingDirectory as part of fully resolving the path. Path resolvedPath = hadoopPath.makeQualified(this); // Handle root. if (resolvedPath.equals(getFileSystemRoot())) { return GoogleCloudStorageFileSystem.GCS_ROOT; }// ww w . j av a 2s .com // Need to convert scheme to GCS scheme and possibly move bucket into authority String authorityString = null; if (!Strings.isNullOrEmpty(resolvedPath.toUri().getAuthority())) { authorityString = "/" + resolvedPath.toUri().getAuthority(); } else { authorityString = ""; } // Construct GCS path uri. String path = GoogleCloudStorageFileSystem.SCHEME + ":/" + authorityString + resolvedPath.toUri().getPath(); URI gcsPath = null; try { gcsPath = new URI(path); } catch (URISyntaxException e) { String msg = String.format("Invalid path: %s", hadoopPath); throw new IllegalArgumentException(msg, e); } LOG.debug("GHFS.getGcsPath: {} -> {}", hadoopPath, gcsPath); return gcsPath; }
From source file:com.google.mr4c.sources.MapFileSource.java
License:Open Source License
private URI toURI(String fileName) throws IOException { Path path = new Path(m_metaPath, fileName); return path.toUri(); }
From source file:com.google.mr4c.sources.MBTilesDatasetSourceTest.java
License:Open Source License
@Test public void testStaging() throws Exception { // save a file on disk File file1 = new File("output/mbtilessource/test_stage1.db"); saveInputFile(file1);/*from w w w . j a va2 s.c o m*/ // specify location in HDFS FileSystem fs = HadoopTestUtils.getTestDFS(); Path root = new Path(fs.getUri()); Path file2 = new Path(root, "/test/sources/MBTilesDatasetSourceTest/test_stage2.db"); // standard source test MBTilesDatasetSource src1 = new MBTilesDatasetSource(file1.toURI()); MBTilesDatasetSource src2 = new MBTilesDatasetSource(file2.toUri()); SourceTestUtils.testSource(src1, src2); }
From source file:com.gruter.hadoop.customShell.CustomShell.java
License:Apache License
private int ls(FileStatus src, FileSystem srcFs, boolean recursive, boolean printHeader) throws IOException { final String cmd = recursive ? "lsr" : "ls"; final FileStatus[] items = shellListStatus(cmd, srcFs, src); if (items == null) { return 1; } else {//from w ww . j a va 2 s.c om int numOfErrors = 0; if (!recursive && printHeader) { if (items.length != 0) { System.out.println("Found " + items.length + " items"); } } int maxReplication = 3, maxLen = 10, maxOwner = 0, maxGroup = 0; for (int i = 0; i < items.length; i++) { FileStatus stat = items[i]; int replication = String.valueOf(stat.getReplication()).length(); int len = String.valueOf(stat.getLen()).length(); int owner = String.valueOf(stat.getOwner()).length(); int group = String.valueOf(stat.getGroup()).length(); if (replication > maxReplication) maxReplication = replication; if (len > maxLen) maxLen = len; if (owner > maxOwner) maxOwner = owner; if (group > maxGroup) maxGroup = group; } for (int i = 0; i < items.length; i++) { FileStatus stat = items[i]; Path cur = stat.getPath(); String mdate = dateForm.format(new Date(stat.getModificationTime())); System.out.print((stat.isDir() ? "d" : "-") + stat.getPermission() + " "); System.out.printf("%" + maxReplication + "s ", (!stat.isDir() ? stat.getReplication() : "-")); if (maxOwner > 0) System.out.printf("%-" + maxOwner + "s ", stat.getOwner()); if (maxGroup > 0) System.out.printf("%-" + maxGroup + "s ", stat.getGroup()); System.out.printf("%" + maxLen + "d ", stat.getLen()); System.out.print(mdate + " "); System.out.println(cur.toUri().getPath()); if (recursive && stat.isDir()) { numOfErrors += ls(stat, srcFs, recursive, printHeader); } } return numOfErrors; } }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
void writeDirs() throws IOException { print(Verbosity.INFO, "\n\nUsing temporary directory " + tmpDir.toUri().getPath()); FileStatus status = fs.getFileStatus(srcDir); Path tmpIn = new Path(tmpDir, "in"); bucketFiles = new Path(tmpIn, "dirs"); partitionMap = new Path(tmpIn, "partition-map"); counters = new Path(tmpIn, "counters"); skippedFiles = new HashSet<String>(); /*// w ww . jav a2s. co m * Prefer the path returned by the status because it is always fully qualified. */ List<Path> dirs = asList(status.getPath()); Text key = new Text(); Text value = new Text(); Writer writer = SequenceFile.createWriter(fs, job, bucketFiles, Text.class, Text.class, CompressionType.BLOCK); int numPartitions = Integer.parseInt(job.get("mapred.reduce.tasks")); Bucketer partitionBucketer = new Bucketer(numPartitions, 0, false); partitionBucketer.reset("partition-map"); jobCounters = new Counters(); try { while (!dirs.isEmpty()) { List<Path> nextLevel = new LinkedList<Path>(); for (Path dir : dirs) { jobCounters.incrCounter(MapperCounter.DIRS_FOUND, 1); print(Verbosity.INFO, "\n\n" + dir.toUri().getPath()); FileStatus[] contents = fs.listStatus(dir, new PathFilter() { @Override public boolean accept(Path testPath) { if (ignoredFiles == null) return true; ignoredFiles.reset(testPath.toUri().getPath()); return !ignoredFiles.matches(); } }); if (contents == null || contents.length == 0) { print(Verbosity.INFO, " is empty"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { List<FileStatus> crushables = new ArrayList<FileStatus>(contents.length); Set<String> uncrushedFiles = new HashSet<String>(contents.length); long crushableBytes = 0; /* * Queue sub directories for subsequent inspection and examine the files in this directory. */ for (FileStatus content : contents) { Path path = content.getPath(); if (content.isDir()) { nextLevel.add(path); } else { boolean changed = uncrushedFiles.add(path.toUri().getPath()); assert changed : path.toUri().getPath(); long fileLength = content.getLen(); if (fileLength <= maxEligibleSize) { crushables.add(content); crushableBytes += fileLength; } } } /* * We found a directory with data in it. Make sure we know how to name the crush output file and then increment the * number of files we found. */ if (!uncrushedFiles.isEmpty()) { if (-1 == findMatcher(dir)) { throw new IllegalArgumentException( "Could not find matching regex for directory: " + dir); } jobCounters.incrCounter(MapperCounter.FILES_FOUND, uncrushedFiles.size()); } if (0 == crushableBytes) { print(Verbosity.INFO, " has no crushable files"); jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { /* * We found files to consider for crushing. */ long nBlocks = crushableBytes / dfsBlockSize; if (nBlocks * dfsBlockSize != crushableBytes) { nBlocks++; } /* * maxFileBlocks will be huge in v1 mode, which will lead to one bucket per directory. */ long dirBuckets = nBlocks / maxFileBlocks; if (dirBuckets * maxFileBlocks != nBlocks) { dirBuckets++; } if (dirBuckets > Integer.MAX_VALUE) { throw new AssertionError("Too many buckets: " + dirBuckets); } Bucketer directoryBucketer = new Bucketer((int) dirBuckets, excludeSingleFileDirs); directoryBucketer.reset(getPathPart(dir)); for (FileStatus file : crushables) { directoryBucketer.add(new FileStatusHasSize(file)); } List<Bucket> crushFiles = directoryBucketer.createBuckets(); if (crushFiles.isEmpty()) { jobCounters.incrCounter(MapperCounter.DIRS_SKIPPED, 1); } else { nBuckets += crushFiles.size(); jobCounters.incrCounter(MapperCounter.DIRS_ELIGIBLE, 1); print(Verbosity.INFO, " => " + crushFiles.size() + " output files"); /* * Write out the mapping between a bucket and a file. */ for (Bucket crushFile : crushFiles) { String bucketId = crushFile.name(); List<String> bucketFiles = crushFile.contents(); print(Verbosity.INFO, format("\n Output %s will include %,d input bytes from %,d files", bucketId, crushFile.size(), bucketFiles.size())); key.set(bucketId); for (String f : bucketFiles) { boolean changed = uncrushedFiles.remove(f); assert changed : f; pathMatcher.reset(f); pathMatcher.matches(); value.set(pathMatcher.group(5)); writer.append(key, value); /* * Print the input file with four leading spaces. */ print(Verbosity.VERBOSE, "\n " + f); } jobCounters.incrCounter(MapperCounter.FILES_ELIGIBLE, bucketFiles.size()); partitionBucketer.add(crushFile); } } } if (!uncrushedFiles.isEmpty()) { print(Verbosity.INFO, "\n\n Skipped " + uncrushedFiles.size() + " files"); for (String uncrushed : uncrushedFiles) { print(Verbosity.VERBOSE, "\n " + uncrushed); } jobCounters.incrCounter(MapperCounter.FILES_SKIPPED, uncrushedFiles.size()); } skippedFiles.addAll(uncrushedFiles); } } dirs = nextLevel; } } finally { try { writer.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + bucketFiles, e); } } /* * Now that we have processed all the directories, write the partition map. */ List<Bucket> partitions = partitionBucketer.createBuckets(); assert partitions.size() <= numPartitions; writer = SequenceFile.createWriter(fs, job, partitionMap, Text.class, IntWritable.class); IntWritable partNum = new IntWritable(); try { for (Bucket partition : partitions) { String partitionName = partition.name(); partNum.set(Integer.parseInt(partitionName.substring(partitionName.lastIndexOf('-') + 1))); for (String bucketId : partition.contents()) { key.set(bucketId); writer.append(key, partNum); } } } finally { try { writer.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + partitionMap, e); } } DataOutputStream countersStream = fs.create(this.counters); try { jobCounters.write(countersStream); } finally { try { countersStream.close(); } catch (Exception e) { LOG.error("Trapped exception during close: " + partitionMap, e); } } }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
private int findMatcher(Path path) { for (int i = 0; i < matchers.size(); i++) { Matcher matcher = matchers.get(i); matcher.reset(path.toUri().getPath()); if (matcher.matches()) { return i; }/*from w w w . j av a 2s. co m*/ } return -1; }
From source file:com.hdfs.concat.crush.integration.CrushMapReduceTest.java
License:Apache License
@Before @Override//from ww w . j a v a 2 s. c o m public void setUp() throws Exception { super.setUp(); job = createJobConf(); job.setBoolean("mapred.output.compress", true); job.set("mapred.output.compression.type", CompressionType.BLOCK.name()); job.set("mapred.output.compression.codec", CustomCompressionCodec.class.getName()); FileSystem fs = getFileSystem(); Path homeDirPath = fs.makeQualified(new Path(".")); homeDir = homeDirPath.toUri().getPath(); fs.delete(homeDirPath, true); defaultCodec = new DefaultCodec(); defaultCodec.setConf(job); customCodec = new CustomCompressionCodec(); customCodec.setConf(job); }
From source file:com.hdfstoftp.main.HdfsToFtp.java
/** * ?// w ww. jav a2 s.c om * * @param srcFS * * @param src * ? * @param dst * * @param queryStr * * @param deleteSource * ?? * @param overwrite * ???? * @return boolean * @throws Exception */ private static boolean copyFromHDFSToFTP(Config config) throws Exception { // ?hdfs Configuration conf = new Configuration(); FileSystem srcFS = FileSystem.get(conf); long start = System.currentTimeMillis(); boolean isRename = config.isRenameUploaded(); int retryTimes = config.getRetryTimes(); // ? String dstPath = config.getDestDir(); Path src = new Path(config.getSouceDir()); FileStatus fileStatus = srcFS.getFileStatus(src); String subDir = null; if (fileStatus.isDirectory()) {// if (isRename) {// ??rename subDir = Config.RENAME_DIR; srcFS.mkdirs(new Path(fileStatus.getPath(), subDir)); } int threadNum = config.getThreadNum(); // ExecutorService threadPool = Executors.newFixedThreadPool(threadNum); // ?ftp FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig())); FTPClient ftpClient = ftpPool.borrowObject(); // ? ftpClient.makeDirectory(dstPath); ftpPool.returnObject(ftpClient); // ?? FileStatus contents[] = srcFS.listStatus(src); long beginFilter = 0; long endFileter = 0; if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h") || config.getCommandLine().hasOption("t")) {// ?"[" beginFilter = System.currentTimeMillis(); Long[] timeRange = parseTimeRange(config.getCommandLine()); contents = getNewContents(timeRange, contents); endFileter = System.currentTimeMillis(); } // ? if (config.getCommandLine().hasOption("r")) {// "["?? beginFilter = System.currentTimeMillis(); contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents); endFileter = System.currentTimeMillis(); } logger.info("total file count:" + contents.length); Map<String, String> fileNameMap = null; long beginSkip = 0; long endSkip = 0; boolean overwrite = true; if (config.getCommandLine().hasOption("o")) { overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim()); } if (!overwrite) {// ????? beginSkip = System.currentTimeMillis(); fileNameMap = getFileNameMap(dstPath, ftpPool); endSkip = System.currentTimeMillis(); } int skiped = 0; List<Future<?>> futureList = new ArrayList<Future<?>>(); for (int i = 0; i < contents.length; i++) { if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) { // skiped++; Log.info("skiped filename:" + contents[i].getPath().getName()); continue; } if (contents[i].isDirectory()) { continue; } // ??? Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(), new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir, retryTimes)); futureList.add(future); } int transfered = 0; int failed = 0; for (Future<?> future : futureList) { Boolean computeResult = (Boolean) future.get(); if (computeResult) { transfered++; if (transfered % 50 == 0 || transfered == contents.length) { logger.info("have transfered:" + transfered + " files"); } } else { failed++; logger.error("failed transter:" + failed + " files"); } } // threadPool.shutdown(); // FTPCient ftpPool.close(); // **************** logger.info("filter time:" + (endFileter - beginFilter) + " ms"); if (!overwrite) { logger.info("skip time:" + (endSkip - beginSkip) + " ms"); } logger.info("total file count:" + contents.length); logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped); } else {// BufferedReader reader = null; FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig()); FTPClient ftpClient = null; InputStream in = null; try { Path path = fileStatus.getPath(); if (!path.getName().contains("log")) { } reader = new BufferedReader(new FileReader(new File(path.toUri().getPath()))); String str = null; ftpClient = facotry.makeObject(); while ((str = reader.readLine()) != null) { String[] feilds = str.split("&"); Path filePath = null; if (feilds.length == 2 && feilds[1] != "") { filePath = new Path(feilds[1]); in = srcFS.open(filePath); boolean result = ftpClient.storeFile(dstPath, in); System.out.println(ftpClient.getReplyCode()); if (result) { logger.info(filePath.toString()); } else { logger_failed.info(filePath.toString()); } } else { continue; } } } catch (Exception e) { e.printStackTrace(); } finally { in.close(); reader.close(); facotry.destroyObject(ftpClient); } } long end = System.currentTimeMillis(); logger.info("finished transfer,total time:" + (end - start) / 1000 + "s"); return true; }
From source file:com.hortonworks.minicluster.InJvmContainerExecutor.java
License:Apache License
/** * YARN provides ability to pass resources (e.g., classpath) through * {@link LocalResource}s which allows user to provision all the resources * required to run the app. This method will extract those resources as a * {@link Set} of {@link URL}s so they are used when {@link ClassLoader} for a * container is created./*from w ww. ja v a 2s.co m*/ * * This is done primarily as a convenience for applications that rely on * automatic classpath propagation (e.g., pull everything from my dev * classpath) instead of manual. * * @param container * @return */ private Set<URL> filterAndBuildUserClasspath(Container container) { if (logger.isDebugEnabled()) { logger.debug("Building additional classpath for the container: " + container); } Set<URL> additionalClassPathUrls = new HashSet<URL>(); Set<Path> userClassPath = this.extractUserProvidedClassPathEntries(container); for (Path resourcePath : userClassPath) { String resourceName = resourcePath.getName(); if (logger.isDebugEnabled()) { logger.debug("\t adding " + resourceName + " to the classpath"); } try { additionalClassPathUrls.add(resourcePath.toUri().toURL()); } catch (Exception e) { throw new IllegalArgumentException(e); } } return additionalClassPathUrls; }
From source file:com.ibm.bi.dml.runtime.matrix.CSVReblockMR.java
License:Open Source License
private static JobReturn runCSVReblockJob(MRJobInstruction inst, String[] inputs, InputInfo[] inputInfos, long[] rlens, long[] clens, int[] brlens, int[] bclens, String reblockInstructions, String otherInstructionsInReducer, int numReducers, int replication, byte[] resultIndexes, String[] outputs, OutputInfo[] outputInfos, Path counterFile, String[] smallestFiles) throws Exception { JobConf job;//from www . j a va 2s .c o m job = new JobConf(ReblockMR.class); job.setJobName("CSV-Reblock-MR"); byte[] realIndexes = new byte[inputs.length]; for (byte b = 0; b < realIndexes.length; b++) realIndexes[b] = b; //set up the input files and their format information MRJobConfiguration.setUpMultipleInputs(job, realIndexes, inputs, inputInfos, brlens, bclens, false, ConvertTarget.CELL); job.setStrings(SMALLEST_FILE_NAME_PER_INPUT, smallestFiles); //set up the dimensions of input matrices MRJobConfiguration.setMatricesDimensions(job, realIndexes, rlens, clens); //set up the block size MRJobConfiguration.setBlocksSizes(job, realIndexes, brlens, bclens); //set up the aggregate instructions that will happen in the combiner and reducer MRJobConfiguration.setCSVReblockInstructions(job, reblockInstructions); //set up the instructions that will happen in the reducer, after the aggregation instrucions MRJobConfiguration.setInstructionsInReducer(job, otherInstructionsInReducer); //set up the replication factor for the results job.setInt("dfs.replication", replication); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); //set up what matrices are needed to pass from the mapper to reducer HashSet<Byte> mapoutputIndexes = MRJobConfiguration.setUpOutputIndexesForMapper(job, realIndexes, null, reblockInstructions, null, otherInstructionsInReducer, resultIndexes); MatrixChar_N_ReducerGroups ret = MRJobConfiguration.computeMatrixCharacteristics(job, realIndexes, null, reblockInstructions, null, null, otherInstructionsInReducer, resultIndexes, mapoutputIndexes, false); MatrixCharacteristics[] stats = ret.stats; //set up the number of reducers int numRed = WriteCSVMR.determineNumReducers(rlens, clens, ConfigurationManager.getConfig().getIntValue(DMLConfig.NUM_REDUCERS), ret.numReducerGroups); job.setNumReduceTasks(numRed); // Print the complete instruction //if (LOG.isTraceEnabled()) // inst.printCompelteMRJobInstruction(stats); // Update resultDimsUnknown based on computed "stats" byte[] resultDimsUnknown = new byte[resultIndexes.length]; for (int i = 0; i < resultIndexes.length; i++) { if (stats[i].getRows() == -1 || stats[i].getCols() == -1) { resultDimsUnknown[i] = (byte) 1; } else { resultDimsUnknown[i] = (byte) 0; } } //set up the multiple output files, and their format information MRJobConfiguration.setUpMultipleOutputs(job, resultIndexes, resultDimsUnknown, outputs, outputInfos, true, true); // configure mapper and the mapper output key value pairs job.setMapperClass(CSVReblockMapper.class); job.setMapOutputKeyClass(TaggedFirstSecondIndexes.class); job.setMapOutputValueClass(BlockRow.class); //configure reducer job.setReducerClass(CSVReblockReducer.class); //turn off adaptivemr job.setBoolean("adaptivemr.map.enable", false); //set unique working dir MRJobConfiguration.setUniqueWorkingDir(job); Path cachefile = new Path(counterFile, "part-00000"); DistributedCache.addCacheFile(cachefile.toUri(), job); DistributedCache.createSymlink(job); job.set(ROWID_FILE_NAME, cachefile.toString()); RunningJob runjob = JobClient.runJob(job); MapReduceTool.deleteFileIfExistOnHDFS(counterFile, job); /* Process different counters */ Group group = runjob.getCounters().getGroup(MRJobConfiguration.NUM_NONZERO_CELLS); for (int i = 0; i < resultIndexes.length; i++) { // number of non-zeros stats[i].setNonZeros(group.getCounter(Integer.toString(i))); // System.out.println("result #"+resultIndexes[i]+" ===>\n"+stats[i]); } return new JobReturn(stats, outputInfos, runjob.isSuccessful()); }