List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
@Override protected List<FileStatus> listStatus(JobContext job) throws IOException { List<FileStatus> files = super.listStatus(job); FileSystem fs = FileSystem.get(job.getConfiguration()); String fileExtension = new LzopCodec().getDefaultExtension(); for (Iterator<FileStatus> iterator = files.iterator(); iterator.hasNext();) { FileStatus fileStatus = (FileStatus) iterator.next(); Path file = fileStatus.getPath(); if (!file.toString().endsWith(fileExtension)) { //get rid of non lzo files iterator.remove();/*from w w w . j av a 2 s . co m*/ } else { //read the index file LzoIndex index = readIndex(file, fs); indexes.put(file, index); } } return files; }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
/** * Read the index of the lzo file./*from ww w .j a v a 2s . co m*/ * * @param split * Read the index of this file. * @param fs * The index file is on this file system. * @throws IOException */ private LzoIndex readIndex(Path file, FileSystem fs) throws IOException { FSDataInputStream indexIn = null; try { Path indexFile = new Path(file.toString() + LZO_INDEX_SUFFIX); if (!fs.exists(indexFile)) { // return empty index, fall back to the unsplittable mode return new LzoIndex(); } long indexLen = fs.getFileStatus(indexFile).getLen(); int blocks = (int) (indexLen / 8); LzoIndex index = new LzoIndex(blocks); indexIn = fs.open(indexFile); for (int i = 0; i < blocks; i++) { index.set(i, indexIn.readLong()); } return index; } finally { if (indexIn != null) { indexIn.close(); } } }
From source file:com.hadoop.mapreduce.LzoTextInputFormat.java
License:Open Source License
/** * Index an lzo file to allow the input format to split them into separate map * jobs./*from w ww.ja v a2s . c o m*/ * * @param fs * File system that contains the file. * @param lzoFile * the lzo file to index. * @throws IOException */ public static void createIndex(FileSystem fs, Path lzoFile) throws IOException { Configuration conf = fs.getConf(); CompressionCodecFactory factory = new CompressionCodecFactory(fs.getConf()); CompressionCodec codec = factory.getCodec(lzoFile); ((Configurable) codec).setConf(conf); InputStream lzoIs = null; FSDataOutputStream os = null; Path outputFile = new Path(lzoFile.toString() + LzoTextInputFormat.LZO_INDEX_SUFFIX); Path tmpOutputFile = outputFile.suffix(".tmp"); try { FSDataInputStream is = fs.open(lzoFile); os = fs.create(tmpOutputFile); LzopDecompressor decompressor = (LzopDecompressor) codec.createDecompressor(); // for reading the header lzoIs = codec.createInputStream(is, decompressor); int numChecksums = decompressor.getChecksumsCount(); while (true) { // read and ignore, we just want to get to the next int int uncompressedBlockSize = is.readInt(); if (uncompressedBlockSize == 0) { break; } else if (uncompressedBlockSize < 0) { throw new EOFException(); } int compressedBlockSize = is.readInt(); if (compressedBlockSize <= 0) { throw new IOException("Could not read compressed block size"); } long pos = is.getPos(); // write the pos of the block start os.writeLong(pos - 8); // seek to the start of the next block, skip any checksums is.seek(pos + compressedBlockSize + (4 * numChecksums)); } } finally { if (lzoIs != null) { lzoIs.close(); } if (os != null) { os.close(); } } fs.rename(tmpOutputFile, outputFile); }
From source file:com.hazelcast.jet.connector.hadoop.ReadHdfsPTest.java
License:Open Source License
@Test public void testReadFile() throws Exception { Path path = writeToFile("key-1 value-1\n", "key-2 value-2\n", "key-3 value-3\n", "key-4 value-4\n"); JetInstance instance = createJetMember(); createJetMember();/*w ww . j ava2s. c o m*/ DAG dag = new DAG(); Vertex source = dag.newVertex("source", readHdfs(path.toString())).localParallelism(4); Vertex sink = dag.newVertex("sink", writeList("sink")).localParallelism(1); dag.edge(between(source, sink)); Future<Void> future = instance.newJob(dag).execute(); assertCompletesEventually(future); IList<Map.Entry> list = instance.getList("sink"); assertEquals(4, list.size()); assertTrue(list.get(0).getValue().toString().contains("value")); }
From source file:com.hazelcast.jet.connector.hadoop.ReadHdfsPTest.java
License:Open Source License
@Test public void testReadFile_withMapping() throws Exception { Path path = writeToFile("key-1 value-1\n", "key-2 value-2\n", "key-3 value-3\n", "key-4 value-4\n"); JetInstance instance = createJetMember(); createJetMember();/* w ww . ja va2s . c o m*/ DAG dag = new DAG(); Vertex source = dag.newVertex("source", readHdfs(path.toString(), (k, v) -> v.toString())) .localParallelism(4); Vertex sink = dag.newVertex("sink", writeList("sink")).localParallelism(1); dag.edge(between(source, sink)); Future<Void> future = instance.newJob(dag).execute(); assertCompletesEventually(future); IList<String> list = instance.getList("sink"); assertEquals(4, list.size()); assertTrue(list.get(0).contains("key")); }
From source file:com.hazelcast.jet.connector.hadoop.WriteHdfsPTest.java
License:Open Source License
@Test public void testWriteFile() throws Exception { int messageCount = 20; String mapName = randomMapName(); JetInstance instance = createJetMember(); createJetMember();/*from w w w . j av a 2 s.c o m*/ Map<Integer, Integer> map = IntStream.range(0, messageCount).boxed() .collect(Collectors.toMap(m -> m, m -> m)); instance.getMap(mapName).putAll(map); DAG dag = new DAG(); Vertex producer = dag.newVertex("producer", readMap(mapName)).localParallelism(1); Path path = getPath(); Vertex consumer = dag.newVertex("consumer", writeHdfs(path.toString())).localParallelism(4); dag.edge(between(producer, consumer)); Future<Void> future = instance.newJob(dag).execute(); assertCompletesEventually(future); dag = new DAG(); producer = dag.newVertex("producer", readHdfs(path.toString())).localParallelism(8); consumer = dag.newVertex("consumer", writeList("results")).localParallelism(1); dag.edge(between(producer, consumer)); future = instance.newJob(dag).execute(); assertCompletesEventually(future); IList<Object> results = instance.getList("results"); assertEquals(messageCount, results.size()); }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
private void standAlone() throws IOException { String absSrcDir = fs.makeQualified(srcDir).toUri().getPath(); String absOutDir = fs.makeQualified(outDir).toUri().getPath(); Text bucket = new Text(absSrcDir + "-0"); List<Text> files = new ArrayList<Text>(); FileStatus[] contents = fs.listStatus(new Path(absSrcDir)); for (FileStatus content : contents) { if (!content.isDir()) { if (ignoredFiles != null) { // Check for files to skip ignoredFiles.reset(content.getPath().toUri().getPath()); if (ignoredFiles.matches()) { LOG.trace("Ignoring " + content.getPath().toString()); continue; }/*from ww w . ja v a 2 s . c o m*/ } files.add(new Text(content.getPath().toUri().getPath())); } } /* * Is the directory empty? */ if (files.isEmpty()) { return; } /* * We trick the reducer into doing some work for us by setting these configuration properties. */ job.set("mapred.tip.id", "task_000000000000_00000_r_000000"); job.set("mapred.task.id", "attempt_000000000000_0000_r_000000_0"); job.set("mapred.output.dir", absOutDir); /* * File output committer needs this. */ fs.mkdirs(new Path(absOutDir, "_temporary")); CrushReducer reducer = new CrushReducer(); reducer.configure(job); reducer.reduce(bucket, files.iterator(), new NullOutputCollector<Text, Text>(), Reporter.NULL); reducer.close(); /* * Use a glob here because the temporary and task attempt work dirs have funny names. * Include a * at the end to cover wildcards for compressed files. */ Path crushOutput = new Path(absOutDir + "/*/*/crush" + absSrcDir + "/" + dest.getName() + "*"); FileStatus[] statuses = fs.globStatus(crushOutput); if (statuses == null || 1 != statuses.length) { throw new AssertionError("Did not find the expected output in " + crushOutput.toString()); } rename(statuses[0].getPath(), dest.getParent(), dest.getName()); }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
/** * Moves all crush input files to {@link #dest} and then moves the crush output file to {@link #srcDir}. *//* ww w.ja v a2s.c o m*/ private void swap(List<Path> crushInput, String crushFileName) throws IOException { if (crushInput.isEmpty()) { return; } print(Verbosity.INFO, format("\n\nSwapping %s", crushFileName)); List<Path> movedSrc = new ArrayList<Path>(crushInput.size()); List<Path> movedDest = new ArrayList<Path>(crushInput.size()); Path crushedDir = crushInput.get(0).getParent(); boolean crushFileNotInstalled = true; try { /* * Move each source file into the clone directory, replacing the root with the path of the clone dir. */ for (Iterator<Path> iter = crushInput.iterator(); iter.hasNext();) { Path source = iter.next(); /* * Remove the leading slash from the input file to create a path relative to the clone dir. */ Path destPath = new Path(dest, source.toString().substring(1)); rename(source, destPath.getParent(), null); iter.remove(); movedSrc.add(source); movedDest.add(destPath); } /* * Install the crush output file now that all the source files have been moved to the clone dir. Sometimes the compression * codec messes with the names so watch out. */ Path crushFile = new Path(crushFileName); rename(crushFile, crushedDir, null); crushFileNotInstalled = false; } finally { if (!crushInput.isEmpty()) { /* * We failed while moving the source files to the clone directory. */ LOG.error(format( "Failed while moving files into the clone directory and before installing the crush output file (%d moved and %d remaining)", movedSrc.size(), crushInput.size())); StringBuilder sb = new StringBuilder("hadoop fs -mv "); for (int i = 0; i < movedSrc.size(); i++) { sb.append(" "); sb.append(movedDest.get(i)); } sb.append(" "); sb.append(crushedDir); LOG.error("Execute the following to restore the file system to a good state: " + sb.toString()); } else if (crushFileNotInstalled) { /* * We failed moving the crush output file to the source directory. */ LOG.error(format("Failed while moving crush output file (%s) to the source directory (%s)", crushFileName, crushedDir)); } } }
From source file:com.hdfs.concat.crush.Crush.java
License:Apache License
/** * Strips out the scheme and authority./* w w w. j a v a2 s. c om*/ */ private String getPathPart(Path path) { pathMatcher.reset(path.toString()); pathMatcher.matches(); return pathMatcher.group(5); }
From source file:com.hdfstoftp.main.HdfsToFtp.java
/** * ?//from www . j av a 2 s . co m * * @param srcFS * * @param src * ? * @param dst * * @param queryStr * * @param deleteSource * ?? * @param overwrite * ???? * @return boolean * @throws Exception */ private static boolean copyFromHDFSToFTP(Config config) throws Exception { // ?hdfs Configuration conf = new Configuration(); FileSystem srcFS = FileSystem.get(conf); long start = System.currentTimeMillis(); boolean isRename = config.isRenameUploaded(); int retryTimes = config.getRetryTimes(); // ? String dstPath = config.getDestDir(); Path src = new Path(config.getSouceDir()); FileStatus fileStatus = srcFS.getFileStatus(src); String subDir = null; if (fileStatus.isDirectory()) {// if (isRename) {// ??rename subDir = Config.RENAME_DIR; srcFS.mkdirs(new Path(fileStatus.getPath(), subDir)); } int threadNum = config.getThreadNum(); // ExecutorService threadPool = Executors.newFixedThreadPool(threadNum); // ?ftp FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig())); FTPClient ftpClient = ftpPool.borrowObject(); // ? ftpClient.makeDirectory(dstPath); ftpPool.returnObject(ftpClient); // ?? FileStatus contents[] = srcFS.listStatus(src); long beginFilter = 0; long endFileter = 0; if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h") || config.getCommandLine().hasOption("t")) {// ?"[" beginFilter = System.currentTimeMillis(); Long[] timeRange = parseTimeRange(config.getCommandLine()); contents = getNewContents(timeRange, contents); endFileter = System.currentTimeMillis(); } // ? if (config.getCommandLine().hasOption("r")) {// "["?? beginFilter = System.currentTimeMillis(); contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents); endFileter = System.currentTimeMillis(); } logger.info("total file count:" + contents.length); Map<String, String> fileNameMap = null; long beginSkip = 0; long endSkip = 0; boolean overwrite = true; if (config.getCommandLine().hasOption("o")) { overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim()); } if (!overwrite) {// ????? beginSkip = System.currentTimeMillis(); fileNameMap = getFileNameMap(dstPath, ftpPool); endSkip = System.currentTimeMillis(); } int skiped = 0; List<Future<?>> futureList = new ArrayList<Future<?>>(); for (int i = 0; i < contents.length; i++) { if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) { // skiped++; Log.info("skiped filename:" + contents[i].getPath().getName()); continue; } if (contents[i].isDirectory()) { continue; } // ??? Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(), new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir, retryTimes)); futureList.add(future); } int transfered = 0; int failed = 0; for (Future<?> future : futureList) { Boolean computeResult = (Boolean) future.get(); if (computeResult) { transfered++; if (transfered % 50 == 0 || transfered == contents.length) { logger.info("have transfered:" + transfered + " files"); } } else { failed++; logger.error("failed transter:" + failed + " files"); } } // threadPool.shutdown(); // FTPCient ftpPool.close(); // **************** logger.info("filter time:" + (endFileter - beginFilter) + " ms"); if (!overwrite) { logger.info("skip time:" + (endSkip - beginSkip) + " ms"); } logger.info("total file count:" + contents.length); logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped); } else {// BufferedReader reader = null; FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig()); FTPClient ftpClient = null; InputStream in = null; try { Path path = fileStatus.getPath(); if (!path.getName().contains("log")) { } reader = new BufferedReader(new FileReader(new File(path.toUri().getPath()))); String str = null; ftpClient = facotry.makeObject(); while ((str = reader.readLine()) != null) { String[] feilds = str.split("&"); Path filePath = null; if (feilds.length == 2 && feilds[1] != "") { filePath = new Path(feilds[1]); in = srcFS.open(filePath); boolean result = ftpClient.storeFile(dstPath, in); System.out.println(ftpClient.getReplyCode()); if (result) { logger.info(filePath.toString()); } else { logger_failed.info(filePath.toString()); } } else { continue; } } } catch (Exception e) { e.printStackTrace(); } finally { in.close(); reader.close(); facotry.destroyObject(ftpClient); } } long end = System.currentTimeMillis(); logger.info("finished transfer,total time:" + (end - start) / 1000 + "s"); return true; }