List of usage examples for org.apache.hadoop.fs Path getName
public String getName()
From source file:com.ricemap.spateDB.util.RandomSpatialGenerator.java
License:Apache License
public static void generateMapReduce(Path file, Prism mbr, long size, long blocksize, Shape shape, String sindex, long seed, int rectsize, RandomShapeGenerator.DistributionType type, boolean overwrite) throws IOException { JobConf job = new JobConf(RandomSpatialGenerator.class); job.setJobName("Generator"); FileSystem outFs = file.getFileSystem(job); // Overwrite output file if (outFs.exists(file)) { if (overwrite) outFs.delete(file, true);/* w w w . j ava2 s . c om*/ else throw new RuntimeException( "Output file '" + file + "' already exists and overwrite flag is not set"); } // Set generation parameters in job job.setLong(RandomShapeGenerator.GenerationSize, size); SpatialSite.setPrism(job, RandomShapeGenerator.GenerationMBR, mbr); if (seed != 0) job.setLong(RandomShapeGenerator.GenerationSeed, seed); if (rectsize != 0) job.setInt(RandomShapeGenerator.GenerationRectSize, rectsize); if (type != null) job.set(RandomShapeGenerator.GenerationType, type.toString()); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); // Set input format and map class job.setInputFormat(RandomInputFormat.class); job.setMapperClass(Repartition.RepartitionMap.class); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(shape.getClass()); job.setNumMapTasks(10 * Math.max(1, clusterStatus.getMaxMapTasks())); SpatialSite.setShapeClass(job, shape.getClass()); if (blocksize != 0) { job.setLong(SpatialSite.LOCAL_INDEX_BLOCK_SIZE, blocksize); } CellInfo[] cells; if (sindex == null) { cells = new CellInfo[] { new CellInfo(1, mbr) }; } else if (sindex.equals("grid")) { GridInfo gridInfo = new GridInfo(mbr.t1, mbr.x1, mbr.y1, mbr.t2, mbr.x2, mbr.y2); FileSystem fs = file.getFileSystem(job); if (blocksize == 0) { blocksize = fs.getDefaultBlockSize(file); } int numOfCells = Repartition.calculateNumberOfPartitions(job, size, fs, file, blocksize); gridInfo.calculateCellDimensions(numOfCells); cells = gridInfo.getAllCells(); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } SpatialSite.setCells(job, cells); // Do not set a reduce function. Use the default identity reduce function if (cells.length == 1) { // All objects are in one partition. No need for a reduce phase job.setNumReduceTasks(0); } else { // More than one partition. Need a reduce phase to group shapes of the // same partition together job.setReducerClass(RepartitionReduce.class); job.setNumReduceTasks( Math.max(1, Math.min(cells.length, (clusterStatus.getMaxReduceTasks() * 9 + 5) / 10))); } // Set output path FileOutputFormat.setOutputPath(job, file); if (sindex == null || sindex.equals("grid")) { job.setOutputFormat(GridOutputFormat.class); } else { throw new RuntimeException("Unsupported spatial index: " + sindex); } JobClient.runJob(job); // Concatenate all master files into one file FileStatus[] resultFiles = outFs.listStatus(file, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().contains("_master"); } }); String ext = resultFiles[0].getPath().getName() .substring(resultFiles[0].getPath().getName().lastIndexOf('.')); Path masterPath = new Path(file, "_master" + ext); OutputStream destOut = outFs.create(masterPath); byte[] buffer = new byte[4096]; for (FileStatus f : resultFiles) { InputStream in = outFs.open(f.getPath()); int bytes_read; do { bytes_read = in.read(buffer); if (bytes_read > 0) destOut.write(buffer, 0, bytes_read); } while (bytes_read > 0); in.close(); outFs.delete(f.getPath(), false); } destOut.close(); // Plot an image for the partitions used in file Path imagePath = new Path(file, "_partitions.png"); int imageSize = (int) (Math.sqrt(cells.length) * 300); Plot.plotLocal(masterPath, imagePath, new Partition(), imageSize, imageSize, Color.BLACK, false, false, false); }
From source file:com.rockstor.compact.recovery.CompactRecovery.java
License:Apache License
public void recovery() throws IOException, NoSuchAlgorithmException { if (!dfs.exists(dirPath)) { LOG.info("directory is not exists" + dirPath + "!"); return;//ww w . j a va 2 s.com } if (dfs.isFile(dirPath)) { LOG.error("input path (" + dirPath + ") should be a directory!"); throw new IOException("Input Error"); } FileStatus[] files = dfs.listStatus(dirPath); if (files == null || files.length == 0) { LOG.info("empty directory " + dirPath + "!"); return; } Path dataPath = null; Path metaPath = null; Path indexPath = null; Path curPath = null; String curName = null; String rockIdStr = null; byte[] rockId = null; for (FileStatus f : files) { curPath = f.getPath(); curName = curPath.getName(); if (curName.equals(PathUtil.TASK_INDEX_NAME)) { indexPath = curPath; } else if (curName.equals(PathUtil.TASK_META_NAME)) { metaPath = curPath; } else { dataPath = curPath; rockIdStr = curName; rockId = MD5HashUtil.bytesFromHexString(rockIdStr); } } /* * in compact subdir, files co-exists in such situation: 1. null not * started, or finished 2. only meta not started 3. meta, data, not * started, we created data before index. 4. meta, data, index started, * but failed 5. meta, index data had been compacted, but chunk index * had not been synchronized 6. only index data had been compacted, and * invalid chunks had been removed from chunk db need sync left chunk * index 7. null total finished */ do { // compact failed, remove failed rock from rock db, situation 3 or 4 if (dataPath != null) { RockDB.remove(rockId); break; } if (metaPath != null && indexPath == null) { // situation 2 break; } if (metaPath != null) { // situation 5 // remove invalid chunks compactor.removeInvalidChunks(taskIdName); } if (indexPath != null) { // situation 5 or 6 // sync left chunks compactor.syncLeftChunks(taskIdName); } } while (false); // remove current path dfs.delete(dirPath, true); }
From source file:com.sensei.indexing.hadoop.reduce.ShardWriter.java
License:Apache License
private void moveFromTempToPerm() throws IOException { FileStatus[] fileStatus = localFs.listStatus(temp, LuceneIndexFileNameFilter.getFilter()); // move the files created in temp dir except segments_N and segments.gen for (int i = 0; i < fileStatus.length; i++) { Path path = fileStatus[i].getPath(); String name = path.getName(); // if (fs.exists(new Path(perm, name))) { // moveToTrash(iconf, perm); // } // // fs.copyFromLocalFile(path, new Path(perm, name)); try {/*ww w . ja v a 2s. c om*/ if (!fs.exists(new Path(perm, name))) { fs.copyFromLocalFile(path, new Path(perm, name)); } else { moveToTrash(iconf, perm); fs.copyFromLocalFile(path, new Path(perm, name)); } } catch (Exception e) { ; } } }
From source file:com.splicemachine.fs.s3.PrestoS3FileSystem.java
License:Apache License
@Override public FileStatus getFileStatus(Path path) throws IOException { if (path.getName().isEmpty()) { // the bucket root requires special handling if (getS3ObjectMetadata(path) != null) { return new FileStatus(0, true, 1, 0, 0, qualifiedPath(path)); }/* w w w .j a va 2 s.c om*/ throw new FileNotFoundException("File does not exist: " + path); } ObjectMetadata metadata = getS3ObjectMetadata(path); if (metadata == null) { // check if this path is a directory Iterator<LocatedFileStatus> iterator = listPrefix(path); if (iterator.hasNext()) { return new FileStatus(0, true, 1, 0, 0, qualifiedPath(path)); } throw new FileNotFoundException("File does not exist: " + path); } return new FileStatus(getObjectSize(metadata), false, 1, BLOCK_SIZE.toBytes(), lastModifiedTime(metadata), qualifiedPath(path)); }
From source file:com.splicemachine.fs.s3.PrestoS3FileSystem.java
License:Apache License
@Override public boolean rename(Path src, Path dst) throws IOException { boolean srcDirectory; try {/*from w ww . j a v a 2 s. c o m*/ srcDirectory = directory(src); } catch (FileNotFoundException e) { return false; } try { if (!directory(dst)) { // cannot copy a file to an existing file return keysEqual(src, dst); } // move source under destination directory dst = new Path(dst, src.getName()); } catch (FileNotFoundException e) { // destination does not exist } if (keysEqual(src, dst)) { return true; } if (srcDirectory) { for (FileStatus file : listStatus(src)) { rename(file.getPath(), new Path(dst, file.getPath().getName())); } deleteObject(keyFromPath(src) + DIRECTORY_SUFFIX); } else { s3.copyObject(uri.getHost(), keyFromPath(src), uri.getHost(), keyFromPath(dst)); delete(src, true); } return true; }
From source file:com.splout.db.dnode.Fetcher.java
License:Open Source License
private File hdfsFetch(Path fromPath, Reporter reporter) throws IOException, InterruptedException { UUID uniqueId = UUID.randomUUID(); File toFile = new File(tempDir, uniqueId.toString() + "/" + fromPath.getName()); File toDir = new File(toFile.getParent()); if (toDir.exists()) { FileUtils.deleteDirectory(toDir); }/*ww w .j av a 2 s .c om*/ toDir.mkdirs(); Path toPath = new Path(toFile.getCanonicalPath()); FileSystem fS = fromPath.getFileSystem(hadoopConf); FileSystem tofS = FileSystem.getLocal(hadoopConf); Throttler throttler = new Throttler((double) bytesPerSecThrottle); try { for (FileStatus fStatus : fS.globStatus(fromPath)) { log.info("Copying " + fStatus.getPath() + " to " + toPath); long bytesSoFar = 0; FSDataInputStream iS = fS.open(fStatus.getPath()); FSDataOutputStream oS = tofS.create(toPath); byte[] buffer = new byte[downloadBufferSize]; int nRead; while ((nRead = iS.read(buffer, 0, buffer.length)) != -1) { // Needed to being able to be interrupted at any moment. if (Thread.interrupted()) { iS.close(); oS.close(); cleanDirNoExceptions(toDir); throw new InterruptedException(); } bytesSoFar += nRead; oS.write(buffer, 0, nRead); throttler.incrementAndThrottle(nRead); if (bytesSoFar >= bytesToReportProgress) { reporter.progress(bytesSoFar); bytesSoFar = 0l; } } if (reporter != null) { reporter.progress(bytesSoFar); } oS.close(); iS.close(); } return toDir; } catch (ClosedByInterruptException e) { // This can be thrown by the method read. cleanDirNoExceptions(toDir); throw new InterruptedIOException(); } }
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetching() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); Fetcher fetcher = new Fetcher(testConfig); Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write("This is what happens when you don't know what to write".getBytes()); oS.close();/* w w w .j a v a 2s . co m*/ File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); assertTrue(f.exists()); assertTrue(f.isDirectory()); File file = new File(f, "tmp-" + TestFetcher.class.getName()); assertTrue(file.exists()); assertEquals("This is what happens when you don't know what to write", Files.toString(file, Charset.defaultCharset())); fS.delete(path, true); FileUtils.deleteDirectory(f); }
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetchingInterrupted() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); final FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); final Fetcher fetcher = new Fetcher(testConfig); final Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write("This is what happens when you don't know what to write".getBytes()); oS.close();//from w w w.j a v a 2s.co m Thread t = new Thread() { @Override public void run() { try { try { File f = fetcher .fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); } catch (IOException e) { e.printStackTrace(); } catch (URISyntaxException e) { e.printStackTrace(); } fail("An InterruptedException was expected."); } catch (InterruptedException e) { // Everything good. } } }; // We interrupt the thread before starting so we are sure that the interruption check // will be seen even if the file to copy is very small. t.interrupt(); t.start(); }
From source file:com.splout.db.dnode.TestFetcher.java
License:Open Source License
@Test public void testHdfsFetchingAndThrottling() throws IOException, URISyntaxException, InterruptedException { Configuration conf = new Configuration(); FileSystem fS = FileSystem.getLocal(conf); SploutConfiguration testConfig = SploutConfiguration.getTestConfig(); testConfig.setProperty(FetcherProperties.TEMP_DIR, "tmp-dir-" + TestFetcher.class.getName()); testConfig.setProperty(FetcherProperties.DOWNLOAD_BUFFER, 4); testConfig.setProperty(FetcherProperties.BYTES_PER_SEC_THROTTLE, 8); Fetcher fetcher = new Fetcher(testConfig); final String str = "This is what happens when you don't know what to write"; Path path = new Path("tmp-" + TestFetcher.class.getName()); OutputStream oS = fS.create(path); oS.write(str.getBytes());/* w w w . j a va 2 s . c o m*/ oS.close(); long startTime = System.currentTimeMillis(); File f = fetcher.fetch(new Path(fS.getWorkingDirectory(), path.getName()).toUri().toString()); long endTime = System.currentTimeMillis(); double bytesPerSec = (str.getBytes().length / (double) (endTime - startTime)) * 1000; assertEquals(8, bytesPerSec, 0.5); assertTrue(f.exists()); assertTrue(f.isDirectory()); File file = new File(f, "tmp-" + TestFetcher.class.getName()); assertTrue(file.exists()); assertEquals(str, Files.toString(file, Charset.defaultCharset())); fS.delete(path, true); FileUtils.deleteDirectory(f); }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
@SuppressWarnings("deprecation") private long fullScanSampling(TablespaceSpec tablespace, final long sampleSize, Configuration hadoopConf, Path outputPath, final int nSplits) throws TupleSamplerException { MapOnlyJobBuilder builder = new MapOnlyJobBuilder(hadoopConf, "Reservoir Sampling to path " + outputPath); for (Table table : tablespace.getPartitionedTables()) { final TableSpec tableSpec = table.getTableSpec(); final String getPartitionByJavaScript = tableSpec.getPartitionByJavaScript(); for (TableInput inputFile : table.getFiles()) { final RecordProcessor processor = inputFile.getRecordProcessor(); for (Path path : inputFile.getPaths()) { builder.addInput(path, inputFile.getFormat(), new MapOnlyMapper<ITuple, NullWritable, Text, NullWritable>() { final int nSamples = (int) (sampleSize / nSplits); final String[] samples = new String[nSamples]; CounterInterface counterInterface; long recordCounter = 0; JavascriptEngine jsEngine = null; @Override protected void setup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { counterInterface = new CounterInterface(context); // Initialize JavaScript engine if needed if (getPartitionByJavaScript != null) { try { jsEngine = new JavascriptEngine(getPartitionByJavaScript); } catch (Throwable e) { throw new RuntimeException(e); }//from w ww. j av a2 s .c om } } ; // Collect Tuples with decreasing probability // (http://en.wikipedia.org/wiki/Reservoir_sampling) protected void map(ITuple key, NullWritable value, Context context) throws IOException, InterruptedException { ITuple uTuple; try { uTuple = processor.process(key, key.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple == null) { // user may have filtered the record return; } long reservoirIndex; if (recordCounter < nSamples) { reservoirIndex = recordCounter; } else { reservoirIndex = (long) (Math.random() * recordCounter); } if (reservoirIndex < nSamples) { String pkey = null; try { pkey = TablespaceGenerator.getPartitionByKey(uTuple, tableSpec, jsEngine); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } samples[(int) reservoirIndex] = pkey; } recordCounter++; } // Write the in-memory sampled Tuples protected void cleanup(Context context, MultipleOutputsCollector coll) throws IOException, InterruptedException { Text key = new Text(); for (String keyStr : samples) { if (keyStr != null) { key.set(keyStr); context.write(key, NullWritable.get()); } } } }, inputFile.getSpecificHadoopInputFormatContext()); } } } // Set output path Path outReservoirPath = new Path(outputPath + "-reservoir"); builder.setOutput(outReservoirPath, new HadoopOutputFormat(SequenceFileOutputFormat.class), Text.class, NullWritable.class); builder.setJarByClass(callingClass); try { Job job = null; job = builder.createJob(); if (!job.waitForCompletion(true)) { throw new TupleSamplerException("Reservoir Sampling failed!"); } } catch (Exception e) { throw new TupleSamplerException("Error creating or launching the sampling job.", e); } finally { try { builder.cleanUpInstanceFiles(); } catch (IOException e) { throw new TupleSamplerException("Error cleaning up the sampling job.", e); } } long retrievedSamples = 0; try { FileSystem outFs = outReservoirPath.getFileSystem(hadoopConf); if (outFs.listStatus(outReservoirPath) == null) { throw new IOException("Output folder not created: the Job failed!"); } retrievedSamples = 0; // Instantiate the writer we will write samples to SequenceFile.Writer writer = new SequenceFile.Writer(outFs, hadoopConf, outputPath, Text.class, NullWritable.class); // Aggregate the output into a single file for being consistent with the other sampling methods for (FileStatus fileStatus : outFs.listStatus(outReservoirPath)) { Path thisPath = fileStatus.getPath(); if (thisPath.getName().startsWith("part-m-")) { SequenceFile.Reader reader = new SequenceFile.Reader(outFs, thisPath, hadoopConf); Text key = new Text(); while (reader.next(key)) { writer.append(key, NullWritable.get()); retrievedSamples++; } reader.close(); } } writer.close(); outFs.delete(outReservoirPath, true); } catch (IOException e) { throw new TupleSamplerException("Error consolidating the sample job results into one file.", e); } return retrievedSamples; }