Example usage for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java

License:Open Source License

/** Merges the files in the given directory that have names given by
 * getMergeableWorkFile() into out./*ww w .  ja  va2 s  .  co  m*/
 *
 * Outputs progress reports if commandName is non-null.
 */
public static void mergeInto(OutputStream out, Path directory, String basePrefix, String basePostfix,
        Configuration conf, String commandName) throws IOException {
    final FileSystem fs = directory.getFileSystem(conf);

    final FileStatus[] parts = fs.globStatus(new Path(directory,
            basePrefix + conf.get(WORK_FILENAME_PROPERTY) + basePostfix + "-[0-9][0-9][0-9][0-9][0-9][0-9]*"));

    int i = 0;
    Timer t = new Timer();
    for (final FileStatus part : parts) {
        if (commandName != null) {
            System.out.printf("%s :: Merging part %d (size %d)...", commandName, ++i, part.getLen());
            System.out.flush();

            t.start();
        }

        final InputStream in = fs.open(part.getPath());
        IOUtils.copyBytes(in, out, conf, false);
        in.close();

        if (commandName != null)
            System.out.printf(" done in %d.%03d s.\n", t.stopS(), t.fms());
    }
    for (final FileStatus part : parts)
        fs.delete(part.getPath(), false);
}

From source file:FormatStorage1.IColumnDataFile.java

License:Open Source License

public void open(String fileName, ArrayList<Integer> idxs) throws IOException {
    this.columnprojects = new ArrayList<ArrayList<Integer>>();
    this.head = new IHead();
    this.readidxs = new TreeSet<Integer>();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] statuss = fs.globStatus(new Path(fileName + "_idx*"));
    this.cp2ifdfs.clear();
    this.idx2ifdfs.clear();
    if (idxs != null && idxs.size() > 0) {
        readidxs.addAll(idxs);/* w ww.  jav  a 2  s  .c o m*/
        for (int i = 0; i < statuss.length; i++) {
            String file = statuss[i].getPath().toString();
            IFormatDataFile ifdf = null;
            String idxstr = file.substring(file.lastIndexOf("_idx") + 4);
            String[] sts = idxstr.split("_");
            TreeSet<Integer> ts = new TreeSet<Integer>();
            for (int j = 0; j < sts.length; j++) {
                ts.add(Integer.parseInt(sts[j]));
            }

            boolean contains = false;
            for (Integer id : idxs) {
                if (ts.contains(id)) {
                    contains = true;
                    if (ifdf == null) {
                        ifdf = new IFormatDataFile(conf);
                        ifdf.open(file);
                    }
                    this.idx2ifdfs.put(id, ifdf);
                }
            }
            if (contains) {
                ArrayList<Integer> cp = new ArrayList<Integer>();
                cp.addAll(ts);
                this.columnprojects.add(cp);
                this.cp2ifdfs.put(cp, ifdf);
            }
        }

    } else {
        for (int i = 0; i < statuss.length; i++) {
            String file = statuss[i].getPath().toString();
            IFormatDataFile ifdf = null;
            String idxstr = file.substring(file.lastIndexOf("_idx") + 4);
            String[] sts = idxstr.split("_");
            TreeSet<Integer> ts = new TreeSet<Integer>();
            ifdf = new IFormatDataFile(conf);
            ifdf.open(file);

            for (int j = 0; j < sts.length; j++) {
                int id = Integer.parseInt(sts[j]);
                ts.add(id);
                this.idx2ifdfs.put(id, ifdf);
            }

            ArrayList<Integer> cp = new ArrayList<Integer>();
            cp.addAll(ts);
            this.readidxs.addAll(ts);
            this.columnprojects.add(cp);
            this.cp2ifdfs.put(cp, ifdf);
        }
    }
    this.fieldtypes = new HashMap<Integer, IRecord.IFType>();
    for (Integer idx : this.readidxs) {
        this.fieldtypes.put(idx, this.idx2ifdfs.get(idx).fileInfo().head().fieldMap().fieldtypes().get(idx));
    }
    workstatus = ConstVar.WS_Read;
}

From source file:gobblin.data.management.retention.DatasetVersionFinderTest.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = mock(FileSystem.class);

    String datasetPathStr = "/path/to/dataset";
    String dataset1 = "datasetVersion1";
    String dataset2 = "datasetVersion2";
    Path datasetPath = new Path(datasetPathStr);
    Path globbedPath = new Path(datasetPathStr + "/*");
    Path datasetVersion1 = new Path(datasetPathStr + "/" + dataset1);
    Path datasetVersion2 = new Path(datasetPathStr + "/" + dataset2);

    when(fs.globStatus(globbedPath))
            .thenReturn(new FileStatus[] { new FileStatus(0, true, 0, 0, 0, datasetVersion1),
                    new FileStatus(0, true, 0, 0, 0, datasetVersion2) });

    DatasetVersionFinder<StringDatasetVersion> versionFinder = new MockDatasetVersionFinder(fs,
            new Properties());

    List<StringDatasetVersion> datasetVersions = Lists
            .newArrayList(versionFinder.findDatasetVersions(new MockDataset(datasetPath)));
    Assert.assertEquals(datasetVersions.size(), 2);
    Assert.assertEquals(datasetVersions.get(0).getVersion(), dataset1);
    Assert.assertEquals(datasetVersions.get(0).getPathsToDelete().iterator().next(), datasetVersion1);
    Assert.assertEquals(datasetVersions.get(1).getVersion(), dataset2);
    Assert.assertEquals(datasetVersions.get(1).getPathsToDelete().iterator().next(), datasetVersion2);
}

From source file:gobblin.runtime.template.PullFileToConfigConverter.java

License:Apache License

public void convert() throws IOException {
    Config baseConfig = ConfigFactory.parseString(DO_NOT_OVERRIDE_KEY + ": []");

    FileSystem pullFileFs = pullFileRootPath.getFileSystem(new Configuration());
    FileSystem outputFs = this.outputPath.getFileSystem(new Configuration());

    Config sysConfig = ConfigFactory.parseFile(this.sysConfigPath);

    PullFileLoader pullFileLoader = new PullFileLoader(this.pullFileRootPath, pullFileFs,
            PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS,
            PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);

    PackagedTemplatesJobCatalogDecorator catalog = new PackagedTemplatesJobCatalogDecorator();

    ConfigResolveOptions configResolveOptions = ConfigResolveOptions.defaults();
    configResolveOptions = configResolveOptions.setAllowUnresolved(true);

    ResourceBasedJobTemplate template;/*  ww  w.ja va2s.c  om*/
    Config templateConfig;
    try {
        template = (ResourceBasedJobTemplate) catalog.getTemplate(templateURI.toUri());

        templateConfig = sysConfig.withFallback(template.getRawTemplateConfig()).withFallback(baseConfig)
                .resolve(configResolveOptions);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new IOException(exc);
    }

    Set<String> doNotOverride = templateConfig.hasPath(DO_NOT_OVERRIDE_KEY)
            ? Sets.newHashSet(templateConfig.getStringList(DO_NOT_OVERRIDE_KEY))
            : Sets.<String>newHashSet();

    ConfigRenderOptions configRenderOptions = ConfigRenderOptions.defaults();
    configRenderOptions = configRenderOptions.setComments(false);
    configRenderOptions = configRenderOptions.setOriginComments(false);
    configRenderOptions = configRenderOptions.setFormatted(true);
    configRenderOptions = configRenderOptions.setJson(false);

    for (FileStatus pullFile : pullFileFs.globStatus(this.fileGlobToConvert)) {
        Config pullFileConfig = pullFileLoader.loadPullFile(pullFile.getPath(), ConfigFactory.empty(), true)
                .resolve();
        Map<String, String> outputConfigMap = Maps.newHashMap();

        outputConfigMap.put(ConfigurationKeys.JOB_TEMPLATE_PATH, this.templateURI.toString());

        boolean somethingChanged;
        do {
            somethingChanged = false;

            Config currentOutputConfig = ConfigFactory.parseMap(outputConfigMap);
            Config currentResolvedConfig = currentOutputConfig.withFallback(templateConfig)
                    .resolve(configResolveOptions);

            for (Map.Entry<Object, Object> entry : ConfigUtils.configToProperties(pullFileConfig).entrySet()) {
                String key = (String) entry.getKey();
                String value = (String) entry.getValue();

                try {
                    if ((!currentResolvedConfig.hasPath(key))
                            || (!currentResolvedConfig.getString(key).equals(value)
                                    && !doNotOverride.contains(key))) {
                        if (!FILTER_KEYS.contains(key)) {
                            somethingChanged = true;
                            outputConfigMap.put(key, value);
                        }
                    }
                } catch (ConfigException.NotResolved nre) {
                    // path is unresolved in config, will try again next iteration
                }
            }

        } while (somethingChanged);

        try {
            Config outputConfig = ConfigFactory.parseMap(outputConfigMap);
            Config currentResolvedConfig = outputConfig.withFallback(templateConfig).resolve();

            String rendered = outputConfig.root().render(configRenderOptions);

            Path newPath = PathUtils.removeExtension(pullFile.getPath(),
                    PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.toArray(new String[] {}));
            newPath = PathUtils.addExtension(newPath, "conf");
            newPath = new Path(this.outputPath, newPath.getName());

            FSDataOutputStream os = outputFs.create(newPath);
            os.write(rendered.getBytes(Charsets.UTF_8));
            os.close();
        } catch (ConfigException.NotResolved nre) {
            throw new IOException("Not all configuration keys were resolved in pull file " + pullFile.getPath(),
                    nre);
        }

    }
}

From source file:gr.ntua.h2rdf.LoadTriples.DistinctIds.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //io.compression.codecs
    Job job = new Job();

    job.setInputFormatClass(TextInputFormat.class);
    Configuration conf = new Configuration();
    Path blockProjection = new Path("blockIds/");
    Path translations = new Path("translations/");
    Path sample = new Path("sample/");
    Path temp = new Path("temp/");
    Path uniqueIds = new Path("uniqueIds/");
    FileSystem fs;
    try {//from  w w  w.ja v  a  2  s.  co  m
        fs = FileSystem.get(conf);
        if (fs.exists(uniqueIds)) {
            fs.delete(uniqueIds, true);
        }
        if (fs.exists(translations)) {
            fs.delete(translations, true);
        }
        if (fs.exists(blockProjection)) {
            fs.delete(blockProjection, true);
        }
        if (fs.exists(sample)) {
            fs.delete(sample, true);
        }
        if (fs.exists(temp)) {
            fs.delete(temp, true);
        }

        FileOutputFormat.setOutputPath(job, uniqueIds);
        Path inp = new Path(args[0]);
        FileInputFormat.setInputPaths(job, inp);

        double type = 1;
        double datasetSize = 0;
        if (fs.isFile(inp)) {
            datasetSize = fs.getFileStatus(inp).getLen();
        } else if (fs.isDirectory(inp)) {
            FileStatus[] s = fs.listStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        } else {
            FileStatus[] s = fs.globStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        }
        datasetSize = datasetSize * type;
        System.out.println("type: " + type);
        System.out.println("datasetSize: " + datasetSize);
        samplingRate = (double) sampleChunk / (double) datasetSize;
        if (samplingRate >= 0.1) {
            samplingRate = 0.1;
        }
        if (samplingRate <= 0.001) {
            samplingRate = 0.001;
        }
        numReducers = (int) (datasetSize / ReducerChunk);
        if (numReducers == 0)
            numReducers = 1;
        numReducers++;
    } catch (IOException e) {
        e.printStackTrace();
    }

    HBaseAdmin hadmin = new HBaseAdmin(conf);
    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("counter");
    desc.addFamily(family);
    if (!hadmin.tableExists(TABLE_NAME)) {
        hadmin.createTable(desc);
    }

    job.setNumReduceTasks(numReducers);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(ImmutableBytesWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(DistinctIds.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(SamplingPartitioner.class);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");

    //job.setCombinerClass(Combiner.class);
    job.setJobName("Distinct Id Wordcount");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job;

}

From source file:hadoop.TestingDriver.java

License:Open Source License

public static Configuration addPathToDC(Configuration conf, String path) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] fstatus = fs.globStatus(new Path(path));
    Path[] listedPaths = FileUtil.stat2Paths(fstatus);
    for (Path p : listedPaths) {
        System.out.println(" Add File to DC " + p.toUri().toString());
        DistributedCache.addCacheFile(p.toUri(), conf);
    }//from w ww. j  a  v  a  2 s  .co m
    return conf;
}

From source file:hitune.analysis.mapreduce.processor.FileFilter.FileFilter.java

License:Apache License

/**
 * Simply scan all those files under the path recursively
 * @param path//from   www. jav a 2s.c  o  m
 * @param files
 */
public void scan(Path path, StringBuilder files) {
    //log.debug("parentpath: " + path.toString());
    if (files == null) {
        log.error("The files[StringBuilder] object isn't initialized");
        return;
    }
    try {
        //log.debug("pattern: " + pattern);
        FileSystem fs = path.getFileSystem(conf);
        FileStatus[] fstats = null;
        fstats = fs.globStatus(new Path(path.toString() + "/*"));

        for (FileStatus fstat : fstats) {
            //log.debug("current file/folder: "+ fstat.getPath().toString());
            if (fstat.isDir()) {
                scan(fstat.getPath(), files);
            } else {
                FileStatus[] rst = null;
                if (pattern == null || pattern.equals("") || pattern.length() == 0) {
                    rst = fs.globStatus(fstat.getPath());
                } else {
                    rst = fs.globStatus(fstat.getPath(), new regpatternFilter());
                }
                if (rst != null && rst.length != 0) {
                    String filepath = rst[0].getPath().toString();

                    if (files.length() == 0) {
                        files.append(filepath);
                    } else {
                        files.append(SEPARATOR).append(filepath);
                    }
                }
            }
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error("Cannot do the file system operation: " + path.toString());
        e.printStackTrace();
    }
}

From source file:IndexService.IColumnInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Path tmpPath = null;//from   w  ww.  j a v a2  s .  co  m
    FileSystem fs = FileSystem.get(job);
    List<IColumnInputSplit> splits = new ArrayList<IColumnInputSplit>();
    HashMap<String, FileStatus> files = new HashMap<String, FileStatus>();
    String[] inputfiles = job.getStrings("mapred.input.dir");

    for (String file : inputfiles) {
        FileStatus[] fss = fs.globStatus(new Path(file + "_idx*"));
        FileStatus status = null;
        long length = 0;
        for (FileStatus ss : fss) {
            if (ss.getLen() > length) {
                length = ss.getLen();
                status = ss;
            }
        }
        files.put(file, status);
    }

    for (String filekey : files.keySet()) {
        FileStatus file = files.get(filekey);
        Path path = file.getPath();
        Path keypath = new Path(filekey);
        long length = file.getLen();

        tmpPath = keypath;

        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

        if (blkLocations.length <= 1) {
            IColumnInputSplit split = new IColumnInputSplit(keypath, length, blkLocations[0].getHosts());
            splits.add(split);
        } else {

            String filename = path.toString();
            IFormatDataFile ifd = new IFormatDataFile(job);
            ifd.open(filename);

            ISegmentIndex segmentIndex = ifd.segIndex();

            for (int i = 0; i < segmentIndex.getSegnum(); i++) {
                IColumnInputSplit split = new IColumnInputSplit(keypath, segmentIndex.getseglen(i),
                        segmentIndex.getILineIndex(i).beginline(),
                        segmentIndex.getILineIndex(i).endline() - segmentIndex.getILineIndex(i).beginline() + 1,
                        blkLocations[i].getHosts());
                splits.add(split);
            }

            ifd.close();
        }
    }

    if (splits.size() == 0) {
        splits.add(new IColumnInputSplit(tmpPath, 0, 0, 0, new String[0]));
    }

    System.out.println("Total # of splits: " + splits.size());
    return splits.toArray(new IColumnInputSplit[splits.size()]);

}

From source file:io.covert.dns.util.DumpResponses.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    FileStatus[] listing;//from   w  w  w . java  2 s  .c om
    Path inpath = new Path(args[0]);
    if (fs.getFileStatus(inpath) != null && fs.getFileStatus(inpath).isDir())
        listing = fs.listStatus(inpath);
    else
        listing = fs.globStatus(inpath);

    for (FileStatus f : listing) {
        if (f.isDir() || f.getPath().getName().startsWith("_"))
            continue;

        System.out.println("Opennning " + f.getPath() + " ...");
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, f.getPath(), conf);

        while (reader.next(key, val)) {
            Message msg = new Message(val.getBytes());
            System.out.println(key + ": " + msg);
            System.out.println("---");
        }
        reader.close();
    }
    return 0;
}

From source file:io.hops.erasure_coding.TestErasureCodingManagerEndless.java

License:Apache License

@Override
public void setUp() throws Exception {
    cluster = new MiniDFSCluster.Builder(getConfig()).numDataNodes(NUMBER_OF_DATANODES).build();
    cluster.waitActive();//ww w  .j  av a2 s  .  c o m

    fs = cluster.getFileSystem();
    FileSystem fs = getFileSystem();
    FileStatus[] files = fs.globStatus(new Path("/*"));
    for (FileStatus file : files) {
        fs.delete(file.getPath(), true);
    }
}