List of usage examples for org.apache.hadoop.fs FileSystem mkdirs
public boolean mkdirs(Path f) throws IOException
From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDirOrFile = new File(args[0]); Path outputDir = new Path(args[1]); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (!fs.exists(outputDir)) { fs.mkdirs(outputDir); }//from w ww .j av a 2s. c o m if (inDirOrFile.isFile()) { load(fs, conf, inDirOrFile, outputDir); } else { for (File file : inDirOrFile.listFiles()) { if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } load(fs, conf, file, outputDir); } } return 0; }
From source file:io.covert.binary.analysis.BuildTarBzSequenceFile.java
License:Apache License
@Override public int run(String[] args) throws Exception { File inDir = new File(args[0]); Path name = new Path(args[1]); Text key = new Text(); BytesWritable val = new BytesWritable(); Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); if (!fs.exists(name)) { fs.mkdirs(name); }// www .j a va2 s .c om for (File file : inDir.listFiles()) { Path sequenceName = new Path(name, file.getName() + ".seq"); System.out.println("Writing to " + sequenceName); SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class, BytesWritable.class, CompressionType.RECORD); if (!file.isFile()) { System.out.println("Skipping " + file + " (not a file) ..."); continue; } final InputStream is = new FileInputStream(file); final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory() .createArchiveInputStream("tar", is); TarArchiveEntry entry = null; while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) { if (!entry.isDirectory()) { final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream(); IOUtils.copy(debInputStream, outputFileStream); outputFileStream.close(); byte[] outputFile = outputFileStream.toByteArray(); val.set(outputFile, 0, outputFile.length); MessageDigest md = MessageDigest.getInstance("MD5"); md.update(outputFile); byte[] digest = md.digest(); String hexdigest = ""; for (int i = 0; i < digest.length; i++) { hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1); } key.set(hexdigest); writer.append(key, val); } } debInputStream.close(); writer.close(); } return 0; }
From source file:io.druid.indexer.JobHelper.java
License:Apache License
static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath, FileSystem fs, Job job) throws IOException { // Create distributed directory if it does not exist. // rename will always fail if destination does not exist. fs.mkdirs(distributedClassPath); // Non-snapshot jar files are uploaded to the shared classpath. final Path hdfsPath = new Path(distributedClassPath, jarFile.getName()); if (!fs.exists(hdfsPath)) { // Muliple jobs can try to upload the jar here, // to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath. final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName()); uploadJar(jarFile, intermediateHdfsPath, fs); IOException exception = null; try {/*from w w w. j a v a2s. c om*/ log.info("Renaming jar to path[%s]", hdfsPath); fs.rename(intermediateHdfsPath, hdfsPath); if (!fs.exists(hdfsPath)) { throw new IOException(String.format("File does not exist even after moving from[%s] to [%s]", intermediateHdfsPath, hdfsPath)); } } catch (IOException e) { // rename failed, possibly due to race condition. check if some other job has uploaded the jar file. try { if (!fs.exists(hdfsPath)) { log.error(e, "IOException while Renaming jar file"); exception = e; } } catch (IOException e1) { e.addSuppressed(e1); exception = e; } } finally { try { if (fs.exists(intermediateHdfsPath)) { fs.delete(intermediateHdfsPath, false); } } catch (IOException e) { if (exception == null) { exception = e; } else { exception.addSuppressed(e); } } if (exception != null) { throw exception; } } } job.addFileToClassPath(hdfsPath); }
From source file:io.druid.segment.loading.HdfsDataSegmentPusher.java
License:Open Source License
@Override public DataSegment push(File inDir, DataSegment segment) throws IOException { final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment); Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir)); FileSystem fs = outFile.getFileSystem(hadoopConfig); fs.mkdirs(outFile.getParent()); log.info("Compressing files from[%s] to [%s]", inDir, outFile); FSDataOutputStream out = null;//w ww. ja v a2 s .c o m long size; try { out = fs.create(outFile); size = CompressionUtils.zip(inDir, out); out.close(); } finally { Closeables.closeQuietly(out); } return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size) .withBinaryVersion(IndexIO.CURRENT_VERSION_ID), outFile.getParent(), fs); }
From source file:io.druid.storage.hdfs.HdfsDataSegmentKillerTest.java
License:Apache License
private void makePartitionDirWithIndex(FileSystem fs, Path path) throws IOException { Assert.assertTrue(fs.mkdirs(path)); try (FSDataOutputStream os = fs.create(new Path(path, "index.zip"))) { }//from w w w . ja va 2 s . com }
From source file:io.druid.storage.hdfs.HdfsDataSegmentPusher.java
License:Apache License
@Override public DataSegment push(File inDir, DataSegment segment) throws IOException { final String storageDir = DataSegmentPusherUtil.getHdfsStorageDir(segment); log.info("Copying segment[%s] to HDFS at location[%s/%s]", segment.getIdentifier(), config.getStorageDirectory(), storageDir); Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir)); FileSystem fs = outFile.getFileSystem(hadoopConfig); fs.mkdirs(outFile.getParent()); log.info("Compressing files from[%s] to [%s]", inDir, outFile); final long size; try (FSDataOutputStream out = fs.create(outFile)) { size = CompressionUtils.zip(inDir, out); }/*w w w.ja v a 2s . c om*/ return createDescriptorFile(segment.withLoadSpec(makeLoadSpec(outFile)).withSize(size) .withBinaryVersion(SegmentUtils.getVersionFromDir(inDir)), outFile.getParent(), fs); }
From source file:io.fluo.stress.trie.Init.java
License:Apache License
private int buildTree(int nodeSize, FluoConfiguration props, Path tmp, int stopLevel) throws Exception { Job job = Job.getInstance(getConf()); job.setJarByClass(Init.class); job.setJobName(Init.class.getName() + "_load"); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.getConfiguration().setInt(TRIE_NODE_SIZE_PROP, nodeSize); job.getConfiguration().setInt(TRIE_STOP_LEVEL_PROP, stopLevel); job.setInputFormatClass(SequenceFileInputFormat.class); SequenceFileInputFormat.addInputPath(job, new Path(tmp, "nums")); job.setMapperClass(InitMapper.class); job.setCombinerClass(InitCombiner.class); job.setReducerClass(InitReducer.class); job.setOutputFormatClass(AccumuloFileOutputFormat.class); job.setPartitionerClass(RangePartitioner.class); FileSystem fs = FileSystem.get(job.getConfiguration()); Connector conn = AccumuloUtil.getConnector(props); Path splitsPath = new Path(tmp, "splits.txt"); Collection<Text> splits1 = writeSplits(props, fs, conn, splitsPath); RangePartitioner.setSplitFile(job, splitsPath.toString()); job.setNumReduceTasks(splits1.size() + 1); Path outPath = new Path(tmp, "out"); AccumuloFileOutputFormat.setOutputPath(job, outPath); boolean success = job.waitForCompletion(true); if (success) { Path failPath = new Path(tmp, "failures"); fs.mkdirs(failPath); conn.tableOperations().importDirectory(props.getAccumuloTable(), outPath.toString(), failPath.toString(), false); }//from ww w .j a v a2s . com return success ? 0 : 1; }
From source file:io.fluo.stress.trie.NumberIngest.java
License:Apache License
private static void setupHdfs(String hadoopPrefix, String testDir, int numMappers, int numPerMapper) throws IllegalArgumentException, IOException { Configuration config = new Configuration(); config.addResource(new Path(hadoopPrefix + "/conf/core-site.xml")); config.addResource(new Path(hadoopPrefix + "/conf/hdfs-site.xml")); @SuppressWarnings("resource") FileSystem hdfs = FileSystem.get(config); String inputDir = testDir + "/input"; hdfs.mkdirs(new Path(inputDir)); try (FSDataOutputStream fos = hdfs.create(new Path(inputDir + "/data"))) { for (int i = 0; i < numMappers; i++) { fos.writeUTF(Integer.toString(numPerMapper) + "\n"); }// w ww . j a v a2s .c o m } }
From source file:io.fluo.webindex.data.Copy.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { log.error("Usage: Copy <pathsFile> <range> <dest>"); System.exit(1);//w w w . j a va 2s . c o m } final String hadoopConfDir = IndexEnv.getHadoopConfDir(); final List<String> copyList = IndexEnv.getPathsRange(args[0], args[1]); if (copyList.isEmpty()) { log.error("No files to copy given {} {}", args[0], args[1]); System.exit(1); } DataConfig dataConfig = DataConfig.load(); SparkConf sparkConf = new SparkConf().setAppName("webindex-copy"); try (JavaSparkContext ctx = new JavaSparkContext(sparkConf)) { FileSystem hdfs = FileSystem.get(ctx.hadoopConfiguration()); Path destPath = new Path(args[2]); if (!hdfs.exists(destPath)) { hdfs.mkdirs(destPath); } log.info("Copying {} files (Range {} of paths file {}) from AWS to HDFS {}", copyList.size(), args[1], args[0], destPath.toString()); JavaRDD<String> copyRDD = ctx.parallelize(copyList, dataConfig.getNumExecutorInstances()); final String prefix = DataConfig.CC_URL_PREFIX; final String destDir = destPath.toString(); copyRDD.foreachPartition(iter -> { FileSystem fs = IndexEnv.getHDFS(hadoopConfDir); iter.forEachRemaining(ccPath -> { try { Path dfsPath = new Path(destDir + "/" + getFilename(ccPath)); if (fs.exists(dfsPath)) { log.error("File {} exists in HDFS and should have been previously filtered", dfsPath.getName()); } else { String urlToCopy = prefix + ccPath; log.info("Starting copy of {} to {}", urlToCopy, destDir); try (OutputStream out = fs.create(dfsPath); BufferedInputStream in = new BufferedInputStream( new URL(urlToCopy).openStream())) { IOUtils.copy(in, out); } log.info("Created {}", dfsPath.getName()); } } catch (IOException e) { log.error("Exception while copying {}", ccPath, e); } }); }); } }
From source file:io.gzinga.hadoop.TestHadoopGZipRandomAccess.java
License:Apache License
@Test public void testGZipOutputStream() { try {/*from w w w. j a v a2 s . com*/ Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.get(conf); fs.mkdirs(new Path("target/test")); GZipOutputStreamRandomAccess gzip = new GZipOutputStreamRandomAccess( fs.create(new Path("target/test/testfile"))); byte[] str = "This is line\n".getBytes(); for (int i = 1; i <= 10000; i++) { if (i % 100 == 0) { gzip.addOffset(i / 100l); } gzip.write(str); } Assert.assertEquals(gzip.getOffsetMap().size(), 100); gzip.close(); fs.copyFromLocalFile(new Path(fs.getWorkingDirectory().toString() + "/target/test-classes/testfile1"), new Path("target/test/testfile1")); FSDataInputStream fin = fs.open(new Path("target/test/testfile")); long len = fs.getFileStatus(new Path("target/test/testfile")).getLen(); SeekableGZipDataInputStream sin = new SeekableGZipDataInputStream(fin, len); Assert.assertTrue(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin)); fin = fs.open(new Path("target/test/testfile1")); sin = new SeekableGZipDataInputStream(fin, len); Assert.assertFalse(GZipInputStreamRandomAccess.isGzipRandomOutputFile(sin)); fin = fs.open(new Path("target/test/testfile")); sin = new SeekableGZipDataInputStream(fin, len); GZipInputStreamRandomAccess gzin = new GZipInputStreamRandomAccess(sin); Assert.assertEquals(gzin.getMetadata().size(), 100); Assert.assertTrue(gzin.getMetadata().containsKey(1l)); Assert.assertTrue(gzin.getMetadata().containsKey(100l)); Assert.assertFalse(gzin.getMetadata().containsKey(200l)); gzin.jumpToIndex(50l); int count1 = 0; while (true) { int l = gzin.read(); if (l == -1) { break; } count1++; } gzin.jumpToIndex(60l); int count2 = 0; while (true) { int l = gzin.read(); if (l == -1) { break; } count2++; } Assert.assertTrue(count1 > count2); gzin.close(); } catch (Exception e) { e.printStackTrace(); Assert.fail(); } }