List of usage examples for org.apache.hadoop.fs FileSystem getConf
@Override
public Configuration getConf()
From source file:org.apache.falcon.regression.core.util.HadoopUtil.java
License:Apache License
/** * Removes directory with a given name and creates empty one with the same name. * @param fs filesystem/*from w ww . j a va2s . c o m*/ * @param path path to a directory * @throws IOException */ public static void recreateDir(FileSystem fs, String path) throws IOException { deleteDirIfExists(path, fs); LOGGER.info("creating hdfs dir: " + path + " on " + fs.getConf().get("fs.default.name")); fs.mkdirs(new Path(path)); }
From source file:org.apache.falcon.service.SharedLibraryHostingService.java
License:Apache License
private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs) throws FalconException { if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) { LOG.info("ExtensionService not registered, return"); return;/*from w ww .java2 s .c o m*/ } ExtensionStore store = ExtensionStore.get(); if (!store.isExtensionStoreInitialized()) { LOG.info( "Extension store not initialized by Extension service. Make sure Extension service is added in " + "start up properties"); return; } final String filterPath = "/apps/falcon/extensions/mirroring/"; Path extensionStorePath = store.getExtensionStorePath(); LOG.info("extensionStorePath :{}", extensionStorePath); FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri()); String nameNode = StringUtils .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator); String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator); // If default fs for Falcon server is same as cluster fs abort copy if (nameNode.equalsIgnoreCase(clusterStorageUrl)) { LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl); return; } try { RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem .listFiles(extensionStorePath, true); while (fileStatusListIterator.hasNext()) { LocatedFileStatus srcfileStatus = fileStatusListIterator.next(); Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath()); if (filePath != null && filePath.toString().startsWith(filterPath)) { /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension artifacts */ continue; } if (srcfileStatus.isDirectory()) { if (!clusterFs.exists(filePath)) { HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission()); } } else { if (clusterFs.exists(filePath)) { FileStatus targetfstat = clusterFs.getFileStatus(filePath); if (targetfstat.getLen() == srcfileStatus.getLen()) { continue; } } Path parentPath = filePath.getParent(); if (!clusterFs.exists(parentPath)) { FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission(); HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm); } FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true, falconFileSystem.getConf()); FileUtil.chmod(clusterFs.makeQualified(filePath).toString(), srcfileStatus.getPermission().toString()); } } } catch (IOException | InterruptedException e) { throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e); } }
From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java
License:Apache License
private static void createSnapshotInFileSystem(String dirName, String snapshotName, FileSystem fs) throws FalconException { try {//w w w . j a va 2s . c om LOG.info("Creating snapshot {} in directory {}", snapshotName, dirName); fs.createSnapshot(new Path(dirName), snapshotName); } catch (IOException e) { LOG.warn("Unable to create snapshot {} in filesystem {}. Exception is {}", snapshotName, fs.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), e.getMessage()); throw new FalconException("Unable to create snapshot " + snapshotName, e); } }
From source file:org.apache.flume.sink.kite.TestDatasetSink.java
License:Apache License
@Test public void testMiniClusterStore() throws EventDeliveryException, IOException { // setup a minicluster MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()).build(); DatasetRepository hdfsRepo = null;/* w w w . j a va 2 s. com*/ try { FileSystem dfs = cluster.getFileSystem(); Configuration conf = dfs.getConf(); String repoURI = "repo:" + conf.get("fs.defaultFS") + "/tmp/repo"; // create a repository and dataset in HDFS hdfsRepo = DatasetRepositories.open(repoURI); hdfsRepo.create(DATASET_NAME, DESCRIPTOR); // update the config to use the HDFS repository config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, repoURI); DatasetSink sink = sink(in, config); // run the sink sink.start(); sink.process(); sink.stop(); Assert.assertEquals(Sets.newHashSet(expected), read(hdfsRepo.<GenericData.Record>load(DATASET_NAME))); Assert.assertEquals("Should have committed", 0, remaining(in)); } finally { if (hdfsRepo != null && hdfsRepo.exists(DATASET_NAME)) { hdfsRepo.delete(DATASET_NAME); } cluster.shutdown(); } }
From source file:org.apache.gobblin.util.AvroUtils.java
License:Apache License
/** * Write a schema to a file//from www. j av a2 s . com * @param schema the schema * @param filePath the target file * @param tempFilePath if not null then this path is used for a temporary file used to stage the write * @param fs a {@link FileSystem} * @param overwrite should any existing target file be overwritten? * @param perm permissions * @throws IOException */ public static void writeSchemaToFile(Schema schema, Path filePath, Path tempFilePath, FileSystem fs, boolean overwrite, FsPermission perm) throws IOException { boolean fileExists = fs.exists(filePath); if (!overwrite) { Preconditions.checkState(!fileExists, filePath + " already exists"); } else { // delete the target file now if not using a staging file if (fileExists && null == tempFilePath) { HadoopUtils.deletePath(fs, filePath, true); // file has been removed fileExists = false; } } // If the file exists then write to a temp file to make the replacement as close to atomic as possible Path writeFilePath = fileExists ? tempFilePath : filePath; try (DataOutputStream dos = fs.create(writeFilePath)) { dos.writeChars(schema.toString()); } fs.setPermission(writeFilePath, perm); // Replace existing file with the staged file if (fileExists) { if (!fs.delete(filePath, true)) { throw new IOException(String.format("Failed to delete %s while renaming %s to %s", filePath, tempFilePath, filePath)); } HadoopUtils.movePath(fs, tempFilePath, fs, filePath, true, fs.getConf()); } }
From source file:org.apache.hama.util.Files.java
License:Open Source License
/** * Merges k sequence files each of size n using knlog(k) merge algorithm. * @param inputPath :the input directory which contains sorted sequence * files, that have to be merged. * @param fs :the filesystem/*from ww w.j a v a 2s . c o m*/ * @param outputPath :the path to the merged sorted sequence file. */ public static <KEY extends WritableComparable<? super KEY>, VALUE extends Writable> void merge(FileSystem fs, Path inputPath, Path outputPath, Class<KEY> keyClazz, Class<VALUE> valClazz) { Configuration conf = fs.getConf(); PriorityQueue<KVPair<KEY, VALUE>> pq = new PriorityQueue<KVPair<KEY, VALUE>>(); //Map from KeyValuePair to the split number to which it belongs. HashMap<KVPair<KEY, VALUE>, Integer> keySplitMap = new HashMap<KVPair<KEY, VALUE>, Integer>(); FileStatus[] files; SequenceFile.Writer writer = null; SequenceFile.Reader reader[] = null; try { files = fs.listStatus(inputPath); reader = new SequenceFile.Reader[files.length]; for (int i = 0; i < files.length; i++) { if (files[i].getLen() > 0) { reader[i] = new SequenceFile.Reader(fs, files[i].getPath(), conf); KEY key = ReflectionUtils.newInstance(keyClazz, new Object[0]); VALUE val = ReflectionUtils.newInstance(valClazz, new Object[0]); reader[i].next(key, val); KVPair<KEY, VALUE> kv = new KVPair<KEY, VALUE>(key, val); pq.add(kv); keySplitMap.put(kv, i); } } writer = SequenceFile.createWriter(fs, conf, outputPath, keyClazz, valClazz); while (!pq.isEmpty()) { KVPair<KEY, VALUE> smallestKey = pq.poll(); writer.append(smallestKey.getKey(), smallestKey.getValue()); Integer index = keySplitMap.get(smallestKey); keySplitMap.remove(smallestKey); KEY key = ReflectionUtils.newInstance(keyClazz, new Object[0]); VALUE val = ReflectionUtils.newInstance(valClazz, new Object[0]); if (reader[index].next(key, val)) { KVPair<KEY, VALUE> kv = new KVPair<KEY, VALUE>(key, val); pq.add(kv); keySplitMap.put(kv, index); } } } catch (IOException e) { LOG.error("Couldn't get status, exiting ...", e); System.exit(-1); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { LOG.error("Cannot close writer to sorted seq. file. Exiting ...", e); System.exit(-1); } } } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.SSVDTestsHelper.java
License:Apache License
/** * Generate some randome but meaningful input with singular value ratios of n, * n-1...1//from ww w .j av a2 s . c o m * * @param outputPath */ static void generateDenseInput(Path outputPath, FileSystem dfs, Vector svalues, int m, int n, int startRowKey) throws IOException { Random rnd = RandomUtils.getRandom(); int svCnt = svalues.size(); Matrix v = generateDenseOrthonormalRandom(n, svCnt, rnd); Matrix u = generateDenseOrthonormalRandom(m, svCnt, rnd); // apply singular values Matrix mx = m > n ? v : u; for (int i = 0; i < svCnt; i++) { mx.assignColumn(i, mx.viewColumn(i).times(svalues.getQuick(i))); } SequenceFile.Writer w = SequenceFile.createWriter(dfs, dfs.getConf(), outputPath, IntWritable.class, VectorWritable.class); try { Vector outV = new DenseVector(n); Writable vw = new VectorWritable(outV); IntWritable iw = new IntWritable(); for (int i = 0; i < m; i++) { iw.set(startRowKey + i); for (int j = 0; j < n; j++) { outV.setQuick(j, u.viewRow(i).dot(v.viewRow(j))); } w.append(iw, vw); } } finally { w.close(); } }
From source file:org.apache.mahout.math.hadoop.stochasticsvd.SSVDTestsHelper.java
License:Apache License
public static void main(String[] args) throws Exception { // create 1Gb input for distributed tests. MahoutTestCase ca = new MahoutTestCase(); Configuration conf = ca.getConfiguration(); FileSystem dfs = FileSystem.getLocal(conf); Path outputDir = new Path("/tmp/DRM"); dfs.mkdirs(outputDir);/*from w ww.j a va 2s . com*/ // for ( int i = 1; i <= 10; i++ ) { // generateDenseInput(new Path(outputDir,String.format("part-%05d",i)),dfs, // new DenseVector ( new double[] { // 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0.8,0.3,0.1,0.01 // }),1200,10000,(i-1)*1200); // } /* * create 2Gb sparse 4.5 m x 4.5m input . (similar to wikipedia graph). * * In order to get at 2Gb, we need to generate ~ 40 non-zero items per row average. * */ outputDir = new Path("/tmp/DRM-sparse"); Random rnd = RandomUtils.getRandom(); SequenceFile.Writer w = SequenceFile.createWriter(dfs, dfs.getConf(), new Path(outputDir, "sparse.seq"), IntWritable.class, VectorWritable.class); try { IntWritable iw = new IntWritable(); VectorWritable vw = new VectorWritable(); int avgNZero = 40; int n = 4500000; for (int i = 1; i < n; i++) { Vector vector = new RandomAccessSparseVector(n); double nz = Math.round(avgNZero * (rnd.nextGaussian() + 1)); if (nz < 0) { nz = 0; } for (int j = 1; j < nz; j++) { vector.set(rnd.nextInt(n), rnd.nextGaussian() * 25 + 3); } iw.set(i); vw.set(vector); w.append(iw, vw); } } finally { w.close(); } }
From source file:org.apache.mahout.utils.ConcatenateVectorsJob.java
License:Apache License
private Class<? extends Writable> getKeyClass(Path path, FileSystem fs) throws IOException { // this works for both part* and a directory/ with part*. Path pathPattern = new Path(path, "part*"); FileStatus[] paths = fs.globStatus(pathPattern); Preconditions.checkArgument(paths.length > 0, path.getName() + " is a file, should be a directory"); Path file = paths[0].getPath(); SequenceFile.Reader reader = null; try {//from w w w . j a v a 2s .c o m reader = new SequenceFile.Reader(fs, file, fs.getConf()); return reader.getKeyClass().asSubclass(Writable.class); } finally { Closeables.close(reader, true); } }
From source file:org.apache.mahout.utils.SplitInputJob.java
License:Apache License
/** * Run job to downsample, randomly permute and split data into test and * training sets. This job takes a SequenceFile as input and outputs two * SequenceFiles test-r-00000 and training-r-00000 which contain the test and * training sets respectively// www. ja v a2s . c o m * * @param initialConf * @param inputPath * path to input data SequenceFile * @param outputPath * path for output data SequenceFiles * @param keepPct * percentage of key value pairs in input to keep. The rest are * discarded * @param randomSelectionPercent * percentage of key value pairs to allocate to test set. Remainder * are allocated to training set */ @SuppressWarnings("rawtypes") public static void run(Configuration initialConf, Path inputPath, Path outputPath, int keepPct, float randomSelectionPercent) throws IOException, ClassNotFoundException, InterruptedException { int downsamplingFactor = (int) (100.0 / keepPct); initialConf.setInt(DOWNSAMPLING_FACTOR, downsamplingFactor); initialConf.setFloat(RANDOM_SELECTION_PCT, randomSelectionPercent); // Determine class of keys and values FileSystem fs = FileSystem.get(initialConf); SequenceFileDirIterator<? extends WritableComparable, Writable> iterator = new SequenceFileDirIterator<WritableComparable, Writable>( inputPath, PathType.LIST, PathFilters.partFilter(), null, false, fs.getConf()); Class<? extends WritableComparable> keyClass; Class<? extends Writable> valueClass; if (iterator.hasNext()) { Pair<? extends WritableComparable, Writable> pair = iterator.next(); keyClass = pair.getFirst().getClass(); valueClass = pair.getSecond().getClass(); } else { throw new IllegalStateException("Couldn't determine class of the input values"); } Job job = new Job(new Configuration(initialConf)); MultipleOutputs.addNamedOutput(job, TRAINING_TAG, SequenceFileOutputFormat.class, keyClass, valueClass); MultipleOutputs.addNamedOutput(job, TEST_TAG, SequenceFileOutputFormat.class, keyClass, valueClass); job.setJarByClass(SplitInputJob.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); job.setNumReduceTasks(1); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(SplitInputMapper.class); job.setReducerClass(SplitInputReducer.class); job.setSortComparatorClass(SplitInputComparator.class); job.setOutputKeyClass(keyClass); job.setOutputValueClass(valueClass); job.submit(); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } }