List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:com.github.sakserv.sequencefile.SequenceFileWriter.java
License:Apache License
public static void main(String[] args) { String outputFile = args[0];/*from w w w .j av a 2 s.com*/ Configuration conf = new Configuration(); try { FileSystem fs = FileSystem.get(conf); Path seqFilePath = new Path(outputFile); fs.mkdirs(seqFilePath.getParent()); SequenceFile.Writer writer = SequenceFile.createWriter(conf, SequenceFile.Writer.file(seqFilePath), SequenceFile.Writer.keyClass(Text.class), SequenceFile.Writer.valueClass(IntWritable.class)); writer.append(new Text("key1"), new IntWritable(1)); writer.append(new Text("key2"), new IntWritable(2)); writer.close(); LOG.info("SUCCESS: Successfully wrote " + seqFilePath + " to HDFS."); } catch (IOException e) { LOG.error("ERROR: Could not load hadoop configuration"); e.printStackTrace(); } }
From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java
License:Open Source License
/** * Generate random data, compress it, index and md5 hash the data. * Then read it all back and md5 that too, to verify that it all went ok. * /*from www. j a va 2 s. c om*/ * @param testWithIndex Should we index or not? * @param charsToOutput How many characters of random data should we output. * @throws IOException * @throws NoSuchAlgorithmException * @throws InterruptedException */ private void runTest(boolean testWithIndex, int charsToOutput) throws IOException, NoSuchAlgorithmException, InterruptedException { if (!GPLNativeCodeLoader.isNativeCodeLoaded()) { LOG.warn("Cannot run this test without the native lzo libraries"); return; } Configuration conf = new Configuration(); conf.setLong("fs.local.block.size", charsToOutput / 2); // reducing block size to force a split of the tiny file conf.set("io.compression.codecs", LzopCodec.class.getName()); FileSystem localFs = FileSystem.getLocal(conf); localFs.delete(outputDir, true); localFs.mkdirs(outputDir); Job job = new Job(conf); TextOutputFormat.setCompressOutput(job, true); TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class); TextOutputFormat.setOutputPath(job, outputDir); TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2)); // create some input data byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput); if (testWithIndex) { Path lzoFile = new Path(outputDir, lzoFileName); LzoTextInputFormat.createIndex(localFs, lzoFile); } LzoTextInputFormat inputFormat = new LzoTextInputFormat(); TextInputFormat.setInputPaths(job, outputDir); List<InputSplit> is = inputFormat.getSplits(job); //verify we have the right number of lzo chunks if (testWithIndex && OUTPUT_BIG == charsToOutput) { assertEquals(3, is.size()); } else { assertEquals(1, is.size()); } // let's read it all and calculate the md5 hash for (InputSplit inputSplit : is) { RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext); rr.initialize(inputSplit, attemptContext); while (rr.nextKeyValue()) { Text value = rr.getCurrentValue(); md5.update(value.getBytes(), 0, value.getLength()); } rr.close(); } localFs.close(); assertTrue(Arrays.equals(expectedMd5, md5.digest())); }
From source file:com.hdfs.concat.clean.TestClean.java
License:Apache License
@Test public void testAge() throws Exception { Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path(ROOT_DIR, "a")); fs.mkdirs(new Path(new Path(ROOT_DIR, "a"), "1")); fs.mkdirs(new Path(ROOT_DIR, "b")); fs.mkdirs(new Path(ROOT_DIR, "c")); fs.mkdirs(new Path(new Path(ROOT_DIR, "c"), "2")); Path oldFile = new Path(new Path(new Path(ROOT_DIR, "a"), "1"), "oldfile"); FSDataOutputStream out = fs.create(oldFile); out.write("bla".getBytes()); out.close();/* ww w . j ava2 s . c om*/ Path cFile = new Path(new Path(new Path(ROOT_DIR, "c"), "1"), "cfile"); FSDataOutputStream out2 = fs.create(cFile); out2.write("wah".getBytes()); out2.close(); assertEquals(true, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean cleanWarn = new Clean(); Configuration warnConf = createJobConf(); warnConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); warnConf.set(Clean.TARGET_EXPR, "cfile"); warnConf.set(Clean.WARN_MODE, "true"); ToolRunner.run(warnConf, cleanWarn, new String[] {}); assertEquals(true, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean cleanReg = new Clean(); Configuration regConf = createJobConf(); regConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); regConf.set(Clean.TARGET_EXPR, "cfile"); ToolRunner.run(regConf, cleanReg, new String[] {}); assertEquals(false, fs.exists(cFile)); assertEquals(true, fs.exists(oldFile)); Clean clean = new Clean(); Configuration cleanConf = createJobConf(); cleanConf.setLong(Clean.CUTTOFF_MILLIS, 20000); cleanConf.set(Clean.TARGET_DIR, ROOT_DIR.toString()); ToolRunner.run(cleanConf, clean, new String[] {}); assertEquals(true, fs.exists(oldFile)); Thread.sleep(3); Clean clean2 = new Clean(); Configuration cleanConf2 = createJobConf(); cleanConf2.setLong(Clean.CUTTOFF_MILLIS, 1); cleanConf2.set(Clean.TARGET_DIR, ROOT_DIR.toString()); ToolRunner.run(cleanConf2, clean2, new String[] {}); assertEquals(false, fs.exists(oldFile)); }
From source file:com.hdfstoftp.main.HdfsToFtp.java
/** * ?//ww w. j ava 2s.c o m * * @param srcFS * * @param src * ? * @param dst * * @param queryStr * * @param deleteSource * ?? * @param overwrite * ???? * @return boolean * @throws Exception */ private static boolean copyFromHDFSToFTP(Config config) throws Exception { // ?hdfs Configuration conf = new Configuration(); FileSystem srcFS = FileSystem.get(conf); long start = System.currentTimeMillis(); boolean isRename = config.isRenameUploaded(); int retryTimes = config.getRetryTimes(); // ? String dstPath = config.getDestDir(); Path src = new Path(config.getSouceDir()); FileStatus fileStatus = srcFS.getFileStatus(src); String subDir = null; if (fileStatus.isDirectory()) {// if (isRename) {// ??rename subDir = Config.RENAME_DIR; srcFS.mkdirs(new Path(fileStatus.getPath(), subDir)); } int threadNum = config.getThreadNum(); // ExecutorService threadPool = Executors.newFixedThreadPool(threadNum); // ?ftp FTPClientPool ftpPool = new FTPClientPool(threadNum, new FtpClientFactory(config.getFTPClientConfig())); FTPClient ftpClient = ftpPool.borrowObject(); // ? ftpClient.makeDirectory(dstPath); ftpPool.returnObject(ftpClient); // ?? FileStatus contents[] = srcFS.listStatus(src); long beginFilter = 0; long endFileter = 0; if (config.getCommandLine().hasOption("d") || config.getCommandLine().hasOption("h") || config.getCommandLine().hasOption("t")) {// ?"[" beginFilter = System.currentTimeMillis(); Long[] timeRange = parseTimeRange(config.getCommandLine()); contents = getNewContents(timeRange, contents); endFileter = System.currentTimeMillis(); } // ? if (config.getCommandLine().hasOption("r")) {// "["?? beginFilter = System.currentTimeMillis(); contents = getFilterContents(config.getCommandLine().getOptionValue("r").trim(), contents); endFileter = System.currentTimeMillis(); } logger.info("total file count:" + contents.length); Map<String, String> fileNameMap = null; long beginSkip = 0; long endSkip = 0; boolean overwrite = true; if (config.getCommandLine().hasOption("o")) { overwrite = "true".equals(config.getCommandLine().getOptionValue("o").trim()); } if (!overwrite) {// ????? beginSkip = System.currentTimeMillis(); fileNameMap = getFileNameMap(dstPath, ftpPool); endSkip = System.currentTimeMillis(); } int skiped = 0; List<Future<?>> futureList = new ArrayList<Future<?>>(); for (int i = 0; i < contents.length; i++) { if (!overwrite && fileNameMap.containsKey(contents[i].getPath().getName())) { // skiped++; Log.info("skiped filename:" + contents[i].getPath().getName()); continue; } if (contents[i].isDirectory()) { continue; } // ??? Future<?> future = threadPool.submit(new UploadFileTask(srcFS, contents[i].getPath(), new Path(dstPath, contents[i].getPath().getName()), ftpPool, false, isRename, subDir, retryTimes)); futureList.add(future); } int transfered = 0; int failed = 0; for (Future<?> future : futureList) { Boolean computeResult = (Boolean) future.get(); if (computeResult) { transfered++; if (transfered % 50 == 0 || transfered == contents.length) { logger.info("have transfered:" + transfered + " files"); } } else { failed++; logger.error("failed transter:" + failed + " files"); } } // threadPool.shutdown(); // FTPCient ftpPool.close(); // **************** logger.info("filter time:" + (endFileter - beginFilter) + " ms"); if (!overwrite) { logger.info("skip time:" + (endSkip - beginSkip) + " ms"); } logger.info("total file count:" + contents.length); logger.info("total transtered: " + transfered + ",total failed:" + failed + ",total skiped:" + skiped); } else {// BufferedReader reader = null; FtpClientFactory facotry = new FtpClientFactory(config.getFTPClientConfig()); FTPClient ftpClient = null; InputStream in = null; try { Path path = fileStatus.getPath(); if (!path.getName().contains("log")) { } reader = new BufferedReader(new FileReader(new File(path.toUri().getPath()))); String str = null; ftpClient = facotry.makeObject(); while ((str = reader.readLine()) != null) { String[] feilds = str.split("&"); Path filePath = null; if (feilds.length == 2 && feilds[1] != "") { filePath = new Path(feilds[1]); in = srcFS.open(filePath); boolean result = ftpClient.storeFile(dstPath, in); System.out.println(ftpClient.getReplyCode()); if (result) { logger.info(filePath.toString()); } else { logger_failed.info(filePath.toString()); } } else { continue; } } } catch (Exception e) { e.printStackTrace(); } finally { in.close(); reader.close(); facotry.destroyObject(ftpClient); } } long end = System.currentTimeMillis(); logger.info("finished transfer,total time:" + (end - start) / 1000 + "s"); return true; }
From source file:com.hive_unit.HiveTestBase.java
License:Apache License
public void setUp() throws Exception { super.setUp(); String jarFile = org.apache.hadoop.hive.ql.exec.CopyTask.class.getProtectionDomain().getCodeSource() .getLocation().getFile();/*from w ww . j a va 2 s .c om*/ System.setProperty(HiveConf.ConfVars.HIVEJAR.toString(), jarFile); Path rootDir = getDir(ROOT_DIR); Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.delete(rootDir, true); Path metastorePath = new Path("/tmp/metastore_db"); fs.delete(metastorePath, true); Path warehouse = new Path("/tmp/warehouse"); fs.delete(warehouse, true); fs.mkdirs(warehouse); }
From source file:com.hive_unit.HiveTestService.java
License:Apache License
public void setUp() throws Exception { super.setUp(); Path rootDir = getDir(ROOT_DIR); Configuration conf = createJobConf(); FileSystem fs = FileSystem.get(conf); fs.delete(rootDir, true);/*from w w w.j a va 2 s. c o m*/ Path metastorePath = new Path("/tmp/metastore_db"); fs.delete(metastorePath, true); Path warehouse = new Path("/tmp/warehouse"); fs.delete(warehouse, true); fs.mkdirs(warehouse); if (standAloneServer) { try { transport = new TSocket(host, port); TProtocol protocol = new TBinaryProtocol(transport); client = new HiveClient(protocol); transport.open(); } catch (Throwable e) { e.printStackTrace(); } } else { client = new HiveServer.HiveServerHandler(); } }
From source file:com.hp.hpl.jena.sparql.algebra.MyOpVisitor.java
License:Open Source License
public void execute() { Configuration conf = new Configuration(); FileSystem fs = null; try {// w ww . j a v a2 s .c o m fs = FileSystem.get(conf); Path out = new Path("output"); if (!fs.exists(out)) { fs.delete(out, true); fs.mkdirs(out); } } catch (IOException e) { e.printStackTrace(); } Triple[] Q = new Triple[0]; Q = opBGP.getPattern().getList().toArray(Q); Set<Var> vars = PatternVars.vars(query.getQueryPattern()); JoinPlaner.setid(id); JoinPlaner.newVaRS(vars); try { JoinPlaner.form(Q); JoinPlaner.removeNonJoiningVaribles(Q); int i = 0; while (!JoinPlaner.isEmpty()) { String v = JoinPlaner.getNextJoin(); System.out.println(v); i++; } if (i == 0) { Path outFile = new Path("output/Join_" + id + "_" + 0); OutputBuffer out = new OutputBuffer(outFile, fs); //if (fs.exists(outFile)) { // fs.delete(outFile,true); //} //fs.create(outFile); QueryProcessor.executeSelect(Q[0], out, "P0"); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.ibm.bi.dml.runtime.controlprogram.parfor.ResultMergeLocalFile.java
License:Open Source License
/** * /*from w w w . j a v a 2 s .c om*/ * @param fnameNew * @param inMO * @throws CacheException * @throws IOException */ private void copyAllFiles(String fnameNew, ArrayList<MatrixObject> inMO) throws CacheException, IOException { JobConf job = new JobConf(ConfigurationManager.getCachedJobConf()); FileSystem fs = FileSystem.get(job); Path path = new Path(fnameNew); //create output dir fs.mkdirs(path); //merge in all input matrix objects IDSequence seq = new IDSequence(); for (MatrixObject in : inMO) { LOG.trace("ResultMerge (local, file): Merge input " + in.getVarName() + " (fname=" + in.getFileName() + ") via file rename."); //copy over files (just rename file or entire dir) Path tmpPath = new Path(in.getFileName()); String lname = tmpPath.getName(); fs.rename(tmpPath, new Path(fnameNew + "/" + lname + seq.getNextID())); } }
From source file:com.ibm.bi.dml.runtime.matrix.data.MultipleOutputCommitter.java
License:Open Source License
@Override public void setupJob(JobContext context) throws IOException { super.setupJob(context); // get output file directories and create directories JobConf conf = context.getJobConf(); String[] loutputs = MRJobConfiguration.getOutputs(conf); for (String dir : loutputs) { Path path = new Path(dir); FileSystem fs = path.getFileSystem(conf); if (!fs.mkdirs(path)) LOG.error("Mkdirs failed to create " + path.toString()); }// w w w . ja v a 2 s. com }
From source file:com.ibm.bi.dml.runtime.transform.DataTransform.java
License:Open Source License
/** * Helper function to move transformation metadata files from a temporary * location to permanent location. These files (e.g., header before and * after transformation) are generated by a single mapper, while applying * data transformations. Note that, these files must be ultimately be placed * under the existing metadata directory (txMtdPath), which is * simultaneously read by other mappers. If they are not created at a * temporary location, then MR tasks fail due to changing timestamps on * txMtdPath.//from w ww . j av a 2 s.co m * * @param fs * @param tmpPath * @param txMtdPath * @throws IllegalArgumentException * @throws IOException */ private static void moveFilesFromTmp(FileSystem fs, String tmpPath, String txMtdPath) throws IllegalArgumentException, IOException { // move files from temporary location to txMtdPath MapReduceTool.renameFileOnHDFS(tmpPath + "/" + TransformationAgent.OUT_HEADER, txMtdPath + "/" + TransformationAgent.OUT_HEADER); MapReduceTool.renameFileOnHDFS(tmpPath + "/" + TransformationAgent.OUT_DCD_HEADER, txMtdPath + "/" + TransformationAgent.OUT_DCD_HEADER); MapReduceTool.renameFileOnHDFS(tmpPath + "/" + TransformationAgent.COLTYPES_FILE_NAME, txMtdPath + "/" + TransformationAgent.COLTYPES_FILE_NAME); if (fs.exists(new Path(tmpPath + "/Dummycode/" + TransformationAgent.DCD_FILE_NAME))) { if (!fs.exists(new Path(txMtdPath + "/Dummycode/"))) fs.mkdirs(new Path(txMtdPath + "/Dummycode/")); MapReduceTool.renameFileOnHDFS(tmpPath + "/Dummycode/" + TransformationAgent.DCD_FILE_NAME, txMtdPath + "/Dummycode/" + TransformationAgent.DCD_FILE_NAME); } }