Example usage for org.apache.hadoop.fs FileSystem getConf

List of usage examples for org.apache.hadoop.fs FileSystem getConf

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getConf.

Prototype

@Override
    public Configuration getConf() 

Source Link

Usage

From source file:org.apache.falcon.regression.core.util.HadoopUtil.java

License:Apache License

/**
 * Removes directory with a given name and creates empty one with the same name.
 * @param fs filesystem/*from  w  ww . j a va2s .  c  o  m*/
 * @param path path to a directory
 * @throws IOException
 */
public static void recreateDir(FileSystem fs, String path) throws IOException {
    deleteDirIfExists(path, fs);
    LOGGER.info("creating hdfs dir: " + path + " on " + fs.getConf().get("fs.default.name"));
    fs.mkdirs(new Path(path));
}

From source file:org.apache.falcon.service.SharedLibraryHostingService.java

License:Apache License

private void pushExtensionArtifactsToCluster(final Cluster cluster, final FileSystem clusterFs)
        throws FalconException {
    if (!Services.get().isRegistered(ExtensionService.SERVICE_NAME)) {
        LOG.info("ExtensionService not registered, return");
        return;/*from w  ww  .java2  s  .c  o m*/
    }

    ExtensionStore store = ExtensionStore.get();
    if (!store.isExtensionStoreInitialized()) {
        LOG.info(
                "Extension store not initialized by Extension service. Make sure Extension service is added in "
                        + "start up properties");
        return;
    }

    final String filterPath = "/apps/falcon/extensions/mirroring/";
    Path extensionStorePath = store.getExtensionStorePath();
    LOG.info("extensionStorePath :{}", extensionStorePath);
    FileSystem falconFileSystem = HadoopClientFactory.get().createFalconFileSystem(extensionStorePath.toUri());
    String nameNode = StringUtils
            .removeEnd(falconFileSystem.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), File.separator);

    String clusterStorageUrl = StringUtils.removeEnd(ClusterHelper.getStorageUrl(cluster), File.separator);

    // If default fs for Falcon server is same as cluster fs abort copy
    if (nameNode.equalsIgnoreCase(clusterStorageUrl)) {
        LOG.info("clusterStorageUrl :{} same return", clusterStorageUrl);
        return;
    }

    try {
        RemoteIterator<LocatedFileStatus> fileStatusListIterator = falconFileSystem
                .listFiles(extensionStorePath, true);

        while (fileStatusListIterator.hasNext()) {
            LocatedFileStatus srcfileStatus = fileStatusListIterator.next();
            Path filePath = Path.getPathWithoutSchemeAndAuthority(srcfileStatus.getPath());

            if (filePath != null && filePath.toString().startsWith(filterPath)) {
                /* HiveDR uses filter path as store path in DRStatusStore, so skip it. Copy only the extension
                 artifacts */
                continue;
            }

            if (srcfileStatus.isDirectory()) {
                if (!clusterFs.exists(filePath)) {
                    HadoopClientFactory.mkdirs(clusterFs, filePath, srcfileStatus.getPermission());
                }
            } else {
                if (clusterFs.exists(filePath)) {
                    FileStatus targetfstat = clusterFs.getFileStatus(filePath);
                    if (targetfstat.getLen() == srcfileStatus.getLen()) {
                        continue;
                    }
                }

                Path parentPath = filePath.getParent();
                if (!clusterFs.exists(parentPath)) {
                    FsPermission dirPerm = falconFileSystem.getFileStatus(parentPath).getPermission();
                    HadoopClientFactory.mkdirs(clusterFs, parentPath, dirPerm);
                }

                FileUtil.copy(falconFileSystem, srcfileStatus, clusterFs, filePath, false, true,
                        falconFileSystem.getConf());
                FileUtil.chmod(clusterFs.makeQualified(filePath).toString(),
                        srcfileStatus.getPermission().toString());
            }
        }
    } catch (IOException | InterruptedException e) {
        throw new FalconException("Failed to copy extension artifacts to cluster" + cluster.getName(), e);
    }
}

From source file:org.apache.falcon.snapshots.replication.HdfsSnapshotReplicator.java

License:Apache License

private static void createSnapshotInFileSystem(String dirName, String snapshotName, FileSystem fs)
        throws FalconException {
    try {//w w w  .  j  a va  2s  .  c om
        LOG.info("Creating snapshot {} in directory {}", snapshotName, dirName);
        fs.createSnapshot(new Path(dirName), snapshotName);
    } catch (IOException e) {
        LOG.warn("Unable to create snapshot {} in filesystem {}. Exception is {}", snapshotName,
                fs.getConf().get(HadoopClientFactory.FS_DEFAULT_NAME_KEY), e.getMessage());
        throw new FalconException("Unable to create snapshot " + snapshotName, e);
    }
}

From source file:org.apache.flume.sink.kite.TestDatasetSink.java

License:Apache License

@Test
public void testMiniClusterStore() throws EventDeliveryException, IOException {
    // setup a minicluster
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(new Configuration()).build();
    DatasetRepository hdfsRepo = null;/*  w  w w . j a va 2  s.  com*/
    try {
        FileSystem dfs = cluster.getFileSystem();
        Configuration conf = dfs.getConf();
        String repoURI = "repo:" + conf.get("fs.defaultFS") + "/tmp/repo";

        // create a repository and dataset in HDFS
        hdfsRepo = DatasetRepositories.open(repoURI);
        hdfsRepo.create(DATASET_NAME, DESCRIPTOR);

        // update the config to use the HDFS repository
        config.put(DatasetSinkConstants.CONFIG_KITE_REPO_URI, repoURI);

        DatasetSink sink = sink(in, config);

        // run the sink
        sink.start();
        sink.process();
        sink.stop();

        Assert.assertEquals(Sets.newHashSet(expected), read(hdfsRepo.<GenericData.Record>load(DATASET_NAME)));
        Assert.assertEquals("Should have committed", 0, remaining(in));

    } finally {
        if (hdfsRepo != null && hdfsRepo.exists(DATASET_NAME)) {
            hdfsRepo.delete(DATASET_NAME);
        }
        cluster.shutdown();
    }
}

From source file:org.apache.gobblin.util.AvroUtils.java

License:Apache License

/**
 * Write a schema to a file//from  www.  j  av  a2 s  .  com
 * @param schema the schema
 * @param filePath the target file
 * @param tempFilePath if not null then this path is used for a temporary file used to stage the write
 * @param fs a {@link FileSystem}
 * @param overwrite should any existing target file be overwritten?
 * @param perm permissions
 * @throws IOException
 */
public static void writeSchemaToFile(Schema schema, Path filePath, Path tempFilePath, FileSystem fs,
        boolean overwrite, FsPermission perm) throws IOException {
    boolean fileExists = fs.exists(filePath);

    if (!overwrite) {
        Preconditions.checkState(!fileExists, filePath + " already exists");
    } else {
        // delete the target file now if not using a staging file
        if (fileExists && null == tempFilePath) {
            HadoopUtils.deletePath(fs, filePath, true);
            // file has been removed
            fileExists = false;
        }
    }

    // If the file exists then write to a temp file to make the replacement as close to atomic as possible
    Path writeFilePath = fileExists ? tempFilePath : filePath;

    try (DataOutputStream dos = fs.create(writeFilePath)) {
        dos.writeChars(schema.toString());
    }
    fs.setPermission(writeFilePath, perm);

    // Replace existing file with the staged file
    if (fileExists) {
        if (!fs.delete(filePath, true)) {
            throw new IOException(String.format("Failed to delete %s while renaming %s to %s", filePath,
                    tempFilePath, filePath));
        }

        HadoopUtils.movePath(fs, tempFilePath, fs, filePath, true, fs.getConf());
    }
}

From source file:org.apache.hama.util.Files.java

License:Open Source License

/**
 * Merges k sequence files each of size n using knlog(k) merge algorithm.
 * @param  inputPath :the input directory which contains sorted sequence
 *                    files, that have to be merged.
 * @param  fs        :the filesystem/*from  ww w.j  a v a 2s  . c o m*/
 * @param outputPath :the path to the merged sorted sequence file.
 */
public static <KEY extends WritableComparable<? super KEY>, VALUE extends Writable> void merge(FileSystem fs,
        Path inputPath, Path outputPath, Class<KEY> keyClazz, Class<VALUE> valClazz) {

    Configuration conf = fs.getConf();

    PriorityQueue<KVPair<KEY, VALUE>> pq = new PriorityQueue<KVPair<KEY, VALUE>>();

    //Map from KeyValuePair to the split number to which it belongs.
    HashMap<KVPair<KEY, VALUE>, Integer> keySplitMap = new HashMap<KVPair<KEY, VALUE>, Integer>();

    FileStatus[] files;
    SequenceFile.Writer writer = null;
    SequenceFile.Reader reader[] = null;
    try {
        files = fs.listStatus(inputPath);
        reader = new SequenceFile.Reader[files.length];

        for (int i = 0; i < files.length; i++) {
            if (files[i].getLen() > 0) {
                reader[i] = new SequenceFile.Reader(fs, files[i].getPath(), conf);
                KEY key = ReflectionUtils.newInstance(keyClazz, new Object[0]);
                VALUE val = ReflectionUtils.newInstance(valClazz, new Object[0]);

                reader[i].next(key, val);
                KVPair<KEY, VALUE> kv = new KVPair<KEY, VALUE>(key, val);
                pq.add(kv);
                keySplitMap.put(kv, i);
            }
        }

        writer = SequenceFile.createWriter(fs, conf, outputPath, keyClazz, valClazz);

        while (!pq.isEmpty()) {
            KVPair<KEY, VALUE> smallestKey = pq.poll();
            writer.append(smallestKey.getKey(), smallestKey.getValue());
            Integer index = keySplitMap.get(smallestKey);
            keySplitMap.remove(smallestKey);

            KEY key = ReflectionUtils.newInstance(keyClazz, new Object[0]);
            VALUE val = ReflectionUtils.newInstance(valClazz, new Object[0]);

            if (reader[index].next(key, val)) {
                KVPair<KEY, VALUE> kv = new KVPair<KEY, VALUE>(key, val);
                pq.add(kv);
                keySplitMap.put(kv, index);
            }
        }

    } catch (IOException e) {
        LOG.error("Couldn't get status, exiting ...", e);
        System.exit(-1);
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (IOException e) {
                LOG.error("Cannot close writer to sorted seq. file. Exiting ...", e);
                System.exit(-1);
            }
        }
    }
}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.SSVDTestsHelper.java

License:Apache License

/**
 * Generate some randome but meaningful input with singular value ratios of n,
 * n-1...1//from  ww w  .j  av  a2 s . c o  m
 * 
 * @param outputPath
 */

static void generateDenseInput(Path outputPath, FileSystem dfs, Vector svalues, int m, int n, int startRowKey)
        throws IOException {

    Random rnd = RandomUtils.getRandom();

    int svCnt = svalues.size();
    Matrix v = generateDenseOrthonormalRandom(n, svCnt, rnd);
    Matrix u = generateDenseOrthonormalRandom(m, svCnt, rnd);

    // apply singular values
    Matrix mx = m > n ? v : u;
    for (int i = 0; i < svCnt; i++) {
        mx.assignColumn(i, mx.viewColumn(i).times(svalues.getQuick(i)));
    }

    SequenceFile.Writer w = SequenceFile.createWriter(dfs, dfs.getConf(), outputPath, IntWritable.class,
            VectorWritable.class);
    try {

        Vector outV = new DenseVector(n);
        Writable vw = new VectorWritable(outV);
        IntWritable iw = new IntWritable();

        for (int i = 0; i < m; i++) {
            iw.set(startRowKey + i);
            for (int j = 0; j < n; j++) {
                outV.setQuick(j, u.viewRow(i).dot(v.viewRow(j)));
            }
            w.append(iw, vw);
        }

    } finally {
        w.close();
    }

}

From source file:org.apache.mahout.math.hadoop.stochasticsvd.SSVDTestsHelper.java

License:Apache License

public static void main(String[] args) throws Exception {
    // create 1Gb input for distributed tests.
    MahoutTestCase ca = new MahoutTestCase();
    Configuration conf = ca.getConfiguration();
    FileSystem dfs = FileSystem.getLocal(conf);
    Path outputDir = new Path("/tmp/DRM");
    dfs.mkdirs(outputDir);/*from w  ww.j  a va  2s  .  com*/
    //    for ( int i = 1; i <= 10; i++ ) {
    //      generateDenseInput(new Path(outputDir,String.format("part-%05d",i)),dfs,
    //                         new DenseVector ( new double[] {
    //                             15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0.8,0.3,0.1,0.01
    //                         }),1200,10000,(i-1)*1200);
    //    }

    /*
     *  create 2Gb sparse 4.5 m x 4.5m input . (similar to wikipedia graph).
     *  
     *  In order to get at 2Gb, we need to generate ~ 40 non-zero items per row average.
     *   
     */

    outputDir = new Path("/tmp/DRM-sparse");
    Random rnd = RandomUtils.getRandom();

    SequenceFile.Writer w = SequenceFile.createWriter(dfs, dfs.getConf(), new Path(outputDir, "sparse.seq"),
            IntWritable.class, VectorWritable.class);

    try {

        IntWritable iw = new IntWritable();
        VectorWritable vw = new VectorWritable();
        int avgNZero = 40;
        int n = 4500000;
        for (int i = 1; i < n; i++) {
            Vector vector = new RandomAccessSparseVector(n);
            double nz = Math.round(avgNZero * (rnd.nextGaussian() + 1));
            if (nz < 0) {
                nz = 0;
            }
            for (int j = 1; j < nz; j++) {
                vector.set(rnd.nextInt(n), rnd.nextGaussian() * 25 + 3);
            }
            iw.set(i);
            vw.set(vector);
            w.append(iw, vw);
        }
    } finally {
        w.close();
    }

}

From source file:org.apache.mahout.utils.ConcatenateVectorsJob.java

License:Apache License

private Class<? extends Writable> getKeyClass(Path path, FileSystem fs) throws IOException {
    // this works for both part* and a directory/ with part*.
    Path pathPattern = new Path(path, "part*");
    FileStatus[] paths = fs.globStatus(pathPattern);
    Preconditions.checkArgument(paths.length > 0, path.getName() + " is a file, should be a directory");

    Path file = paths[0].getPath();
    SequenceFile.Reader reader = null;
    try {//from   w  w  w .  j a v a 2s .c o m
        reader = new SequenceFile.Reader(fs, file, fs.getConf());
        return reader.getKeyClass().asSubclass(Writable.class);
    } finally {
        Closeables.close(reader, true);
    }
}

From source file:org.apache.mahout.utils.SplitInputJob.java

License:Apache License

/**
 * Run job to downsample, randomly permute and split data into test and
 * training sets. This job takes a SequenceFile as input and outputs two
 * SequenceFiles test-r-00000 and training-r-00000 which contain the test and
 * training sets respectively//  www.  ja  v  a2s . c o  m
 *
 * @param initialConf
 * @param inputPath
 *          path to input data SequenceFile
 * @param outputPath
 *          path for output data SequenceFiles
 * @param keepPct
 *          percentage of key value pairs in input to keep. The rest are
 *          discarded
 * @param randomSelectionPercent
 *          percentage of key value pairs to allocate to test set. Remainder
 *          are allocated to training set
 */
@SuppressWarnings("rawtypes")
public static void run(Configuration initialConf, Path inputPath, Path outputPath, int keepPct,
        float randomSelectionPercent) throws IOException, ClassNotFoundException, InterruptedException {

    int downsamplingFactor = (int) (100.0 / keepPct);
    initialConf.setInt(DOWNSAMPLING_FACTOR, downsamplingFactor);
    initialConf.setFloat(RANDOM_SELECTION_PCT, randomSelectionPercent);

    // Determine class of keys and values
    FileSystem fs = FileSystem.get(initialConf);

    SequenceFileDirIterator<? extends WritableComparable, Writable> iterator = new SequenceFileDirIterator<WritableComparable, Writable>(
            inputPath, PathType.LIST, PathFilters.partFilter(), null, false, fs.getConf());
    Class<? extends WritableComparable> keyClass;
    Class<? extends Writable> valueClass;
    if (iterator.hasNext()) {
        Pair<? extends WritableComparable, Writable> pair = iterator.next();
        keyClass = pair.getFirst().getClass();
        valueClass = pair.getSecond().getClass();
    } else {
        throw new IllegalStateException("Couldn't determine class of the input values");
    }

    Job job = new Job(new Configuration(initialConf));

    MultipleOutputs.addNamedOutput(job, TRAINING_TAG, SequenceFileOutputFormat.class, keyClass, valueClass);
    MultipleOutputs.addNamedOutput(job, TEST_TAG, SequenceFileOutputFormat.class, keyClass, valueClass);
    job.setJarByClass(SplitInputJob.class);
    FileInputFormat.addInputPath(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setNumReduceTasks(1);
    job.setInputFormatClass(SequenceFileInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setMapperClass(SplitInputMapper.class);
    job.setReducerClass(SplitInputReducer.class);
    job.setSortComparatorClass(SplitInputComparator.class);
    job.setOutputKeyClass(keyClass);
    job.setOutputValueClass(valueClass);
    job.submit();
    boolean succeeded = job.waitForCompletion(true);
    if (!succeeded) {
        throw new IllegalStateException("Job failed!");
    }
}