Example usage for org.apache.hadoop.fs FileSystem globStatus

List of usage examples for org.apache.hadoop.fs FileSystem globStatus

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem globStatus.

Prototype

public FileStatus[] globStatus(Path pathPattern) throws IOException 

Source Link

Document

Return all the files that match filePattern and are not checksum files.

Usage

From source file:fi.tkk.ics.hadoop.bam.cli.Utils.java

License:Open Source License

/** Merges the files in the given directory that have names given by
 * getMergeableWorkFile() into out./*ww w .  ja  va2 s  .  co  m*/
 *
 * Outputs progress reports if commandName is non-null.
 */
public static void mergeInto(OutputStream out, Path directory, String basePrefix, String basePostfix,
        Configuration conf, String commandName) throws IOException {
    final FileSystem fs = directory.getFileSystem(conf);

    final FileStatus[] parts = fs.globStatus(new Path(directory,
            basePrefix + conf.get(WORK_FILENAME_PROPERTY) + basePostfix + "-[0-9][0-9][0-9][0-9][0-9][0-9]*"));

    int i = 0;
    Timer t = new Timer();
    for (final FileStatus part : parts) {
        if (commandName != null) {
            System.out.printf("%s :: Merging part %d (size %d)...", commandName, ++i, part.getLen());
            System.out.flush();

            t.start();
        }

        final InputStream in = fs.open(part.getPath());
        IOUtils.copyBytes(in, out, conf, false);
        in.close();

        if (commandName != null)
            System.out.printf(" done in %d.%03d s.\n", t.stopS(), t.fms());
    }
    for (final FileStatus part : parts)
        fs.delete(part.getPath(), false);
}

From source file:FormatStorage1.IColumnDataFile.java

License:Open Source License

public void open(String fileName, ArrayList<Integer> idxs) throws IOException {
    this.columnprojects = new ArrayList<ArrayList<Integer>>();
    this.head = new IHead();
    this.readidxs = new TreeSet<Integer>();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] statuss = fs.globStatus(new Path(fileName + "_idx*"));
    this.cp2ifdfs.clear();
    this.idx2ifdfs.clear();
    if (idxs != null && idxs.size() > 0) {
        readidxs.addAll(idxs);/* w ww.  jav  a 2  s  .c o m*/
        for (int i = 0; i < statuss.length; i++) {
            String file = statuss[i].getPath().toString();
            IFormatDataFile ifdf = null;
            String idxstr = file.substring(file.lastIndexOf("_idx") + 4);
            String[] sts = idxstr.split("_");
            TreeSet<Integer> ts = new TreeSet<Integer>();
            for (int j = 0; j < sts.length; j++) {
                ts.add(Integer.parseInt(sts[j]));
            }

            boolean contains = false;
            for (Integer id : idxs) {
                if (ts.contains(id)) {
                    contains = true;
                    if (ifdf == null) {
                        ifdf = new IFormatDataFile(conf);
                        ifdf.open(file);
                    }
                    this.idx2ifdfs.put(id, ifdf);
                }
            }
            if (contains) {
                ArrayList<Integer> cp = new ArrayList<Integer>();
                cp.addAll(ts);
                this.columnprojects.add(cp);
                this.cp2ifdfs.put(cp, ifdf);
            }
        }

    } else {
        for (int i = 0; i < statuss.length; i++) {
            String file = statuss[i].getPath().toString();
            IFormatDataFile ifdf = null;
            String idxstr = file.substring(file.lastIndexOf("_idx") + 4);
            String[] sts = idxstr.split("_");
            TreeSet<Integer> ts = new TreeSet<Integer>();
            ifdf = new IFormatDataFile(conf);
            ifdf.open(file);

            for (int j = 0; j < sts.length; j++) {
                int id = Integer.parseInt(sts[j]);
                ts.add(id);
                this.idx2ifdfs.put(id, ifdf);
            }

            ArrayList<Integer> cp = new ArrayList<Integer>();
            cp.addAll(ts);
            this.readidxs.addAll(ts);
            this.columnprojects.add(cp);
            this.cp2ifdfs.put(cp, ifdf);
        }
    }
    this.fieldtypes = new HashMap<Integer, IRecord.IFType>();
    for (Integer idx : this.readidxs) {
        this.fieldtypes.put(idx, this.idx2ifdfs.get(idx).fileInfo().head().fieldMap().fieldtypes().get(idx));
    }
    workstatus = ConstVar.WS_Read;
}

From source file:gobblin.data.management.retention.DatasetVersionFinderTest.java

License:Apache License

@Test
public void test() throws IOException {
    FileSystem fs = mock(FileSystem.class);

    String datasetPathStr = "/path/to/dataset";
    String dataset1 = "datasetVersion1";
    String dataset2 = "datasetVersion2";
    Path datasetPath = new Path(datasetPathStr);
    Path globbedPath = new Path(datasetPathStr + "/*");
    Path datasetVersion1 = new Path(datasetPathStr + "/" + dataset1);
    Path datasetVersion2 = new Path(datasetPathStr + "/" + dataset2);

    when(fs.globStatus(globbedPath))
            .thenReturn(new FileStatus[] { new FileStatus(0, true, 0, 0, 0, datasetVersion1),
                    new FileStatus(0, true, 0, 0, 0, datasetVersion2) });

    DatasetVersionFinder<StringDatasetVersion> versionFinder = new MockDatasetVersionFinder(fs,
            new Properties());

    List<StringDatasetVersion> datasetVersions = Lists
            .newArrayList(versionFinder.findDatasetVersions(new MockDataset(datasetPath)));
    Assert.assertEquals(datasetVersions.size(), 2);
    Assert.assertEquals(datasetVersions.get(0).getVersion(), dataset1);
    Assert.assertEquals(datasetVersions.get(0).getPathsToDelete().iterator().next(), datasetVersion1);
    Assert.assertEquals(datasetVersions.get(1).getVersion(), dataset2);
    Assert.assertEquals(datasetVersions.get(1).getPathsToDelete().iterator().next(), datasetVersion2);
}

From source file:gobblin.runtime.template.PullFileToConfigConverter.java

License:Apache License

public void convert() throws IOException {
    Config baseConfig = ConfigFactory.parseString(DO_NOT_OVERRIDE_KEY + ": []");

    FileSystem pullFileFs = pullFileRootPath.getFileSystem(new Configuration());
    FileSystem outputFs = this.outputPath.getFileSystem(new Configuration());

    Config sysConfig = ConfigFactory.parseFile(this.sysConfigPath);

    PullFileLoader pullFileLoader = new PullFileLoader(this.pullFileRootPath, pullFileFs,
            PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS,
            PullFileLoader.DEFAULT_HOCON_PULL_FILE_EXTENSIONS);

    PackagedTemplatesJobCatalogDecorator catalog = new PackagedTemplatesJobCatalogDecorator();

    ConfigResolveOptions configResolveOptions = ConfigResolveOptions.defaults();
    configResolveOptions = configResolveOptions.setAllowUnresolved(true);

    ResourceBasedJobTemplate template;/*  ww  w.ja va2s.c  om*/
    Config templateConfig;
    try {
        template = (ResourceBasedJobTemplate) catalog.getTemplate(templateURI.toUri());

        templateConfig = sysConfig.withFallback(template.getRawTemplateConfig()).withFallback(baseConfig)
                .resolve(configResolveOptions);
    } catch (SpecNotFoundException | JobTemplate.TemplateException exc) {
        throw new IOException(exc);
    }

    Set<String> doNotOverride = templateConfig.hasPath(DO_NOT_OVERRIDE_KEY)
            ? Sets.newHashSet(templateConfig.getStringList(DO_NOT_OVERRIDE_KEY))
            : Sets.<String>newHashSet();

    ConfigRenderOptions configRenderOptions = ConfigRenderOptions.defaults();
    configRenderOptions = configRenderOptions.setComments(false);
    configRenderOptions = configRenderOptions.setOriginComments(false);
    configRenderOptions = configRenderOptions.setFormatted(true);
    configRenderOptions = configRenderOptions.setJson(false);

    for (FileStatus pullFile : pullFileFs.globStatus(this.fileGlobToConvert)) {
        Config pullFileConfig = pullFileLoader.loadPullFile(pullFile.getPath(), ConfigFactory.empty(), true)
                .resolve();
        Map<String, String> outputConfigMap = Maps.newHashMap();

        outputConfigMap.put(ConfigurationKeys.JOB_TEMPLATE_PATH, this.templateURI.toString());

        boolean somethingChanged;
        do {
            somethingChanged = false;

            Config currentOutputConfig = ConfigFactory.parseMap(outputConfigMap);
            Config currentResolvedConfig = currentOutputConfig.withFallback(templateConfig)
                    .resolve(configResolveOptions);

            for (Map.Entry<Object, Object> entry : ConfigUtils.configToProperties(pullFileConfig).entrySet()) {
                String key = (String) entry.getKey();
                String value = (String) entry.getValue();

                try {
                    if ((!currentResolvedConfig.hasPath(key))
                            || (!currentResolvedConfig.getString(key).equals(value)
                                    && !doNotOverride.contains(key))) {
                        if (!FILTER_KEYS.contains(key)) {
                            somethingChanged = true;
                            outputConfigMap.put(key, value);
                        }
                    }
                } catch (ConfigException.NotResolved nre) {
                    // path is unresolved in config, will try again next iteration
                }
            }

        } while (somethingChanged);

        try {
            Config outputConfig = ConfigFactory.parseMap(outputConfigMap);
            Config currentResolvedConfig = outputConfig.withFallback(templateConfig).resolve();

            String rendered = outputConfig.root().render(configRenderOptions);

            Path newPath = PathUtils.removeExtension(pullFile.getPath(),
                    PullFileLoader.DEFAULT_JAVA_PROPS_PULL_FILE_EXTENSIONS.toArray(new String[] {}));
            newPath = PathUtils.addExtension(newPath, "conf");
            newPath = new Path(this.outputPath, newPath.getName());

            FSDataOutputStream os = outputFs.create(newPath);
            os.write(rendered.getBytes(Charsets.UTF_8));
            os.close();
        } catch (ConfigException.NotResolved nre) {
            throw new IOException("Not all configuration keys were resolved in pull file " + pullFile.getPath(),
                    nre);
        }

    }
}

From source file:gr.ntua.h2rdf.LoadTriples.DistinctIds.java

License:Open Source License

public Job createSubmittableJob(String[] args) throws IOException, ClassNotFoundException {
    //io.compression.codecs
    Job job = new Job();

    job.setInputFormatClass(TextInputFormat.class);
    Configuration conf = new Configuration();
    Path blockProjection = new Path("blockIds/");
    Path translations = new Path("translations/");
    Path sample = new Path("sample/");
    Path temp = new Path("temp/");
    Path uniqueIds = new Path("uniqueIds/");
    FileSystem fs;
    try {//from  w w  w.ja v  a  2  s.  co  m
        fs = FileSystem.get(conf);
        if (fs.exists(uniqueIds)) {
            fs.delete(uniqueIds, true);
        }
        if (fs.exists(translations)) {
            fs.delete(translations, true);
        }
        if (fs.exists(blockProjection)) {
            fs.delete(blockProjection, true);
        }
        if (fs.exists(sample)) {
            fs.delete(sample, true);
        }
        if (fs.exists(temp)) {
            fs.delete(temp, true);
        }

        FileOutputFormat.setOutputPath(job, uniqueIds);
        Path inp = new Path(args[0]);
        FileInputFormat.setInputPaths(job, inp);

        double type = 1;
        double datasetSize = 0;
        if (fs.isFile(inp)) {
            datasetSize = fs.getFileStatus(inp).getLen();
        } else if (fs.isDirectory(inp)) {
            FileStatus[] s = fs.listStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        } else {
            FileStatus[] s = fs.globStatus(inp);
            for (int i = 0; i < s.length; i++) {
                if (s[i].getPath().getName().toString().endsWith(".gz"))
                    type = 27;
                if (s[i].getPath().getName().toString().endsWith(".snappy"))
                    type = 10;
                datasetSize += s[i].getLen();
            }
        }
        datasetSize = datasetSize * type;
        System.out.println("type: " + type);
        System.out.println("datasetSize: " + datasetSize);
        samplingRate = (double) sampleChunk / (double) datasetSize;
        if (samplingRate >= 0.1) {
            samplingRate = 0.1;
        }
        if (samplingRate <= 0.001) {
            samplingRate = 0.001;
        }
        numReducers = (int) (datasetSize / ReducerChunk);
        if (numReducers == 0)
            numReducers = 1;
        numReducers++;
    } catch (IOException e) {
        e.printStackTrace();
    }

    HBaseAdmin hadmin = new HBaseAdmin(conf);
    HTableDescriptor desc = new HTableDescriptor(TABLE_NAME);

    HColumnDescriptor family = new HColumnDescriptor("counter");
    desc.addFamily(family);
    if (!hadmin.tableExists(TABLE_NAME)) {
        hadmin.createTable(desc);
    }

    job.setNumReduceTasks(numReducers);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(ImmutableBytesWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    job.setJarByClass(DistinctIds.class);
    job.setMapperClass(Map.class);
    job.setReducerClass(Reduce.class);

    job.setPartitionerClass(SamplingPartitioner.class);

    FileOutputFormat.setCompressOutput(job, true);
    FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class);
    job.getConfiguration().set("mapred.compress.map.output", "true");
    job.getConfiguration().set("mapred.map.output.compression.codec",
            "org.apache.hadoop.io.compress.SnappyCodec");

    //job.setCombinerClass(Combiner.class);
    job.setJobName("Distinct Id Wordcount");
    job.getConfiguration().setBoolean("mapred.map.tasks.speculative.execution", false);
    job.getConfiguration().setBoolean("mapred.reduce.tasks.speculative.execution", false);
    job.getConfiguration().setInt("io.sort.mb", 100);
    job.getConfiguration().setInt("io.file.buffer.size", 131072);
    job.getConfiguration().setInt("mapred.job.reuse.jvm.num.tasks", -1);

    return job;

}

From source file:hadoop.TestingDriver.java

License:Open Source License

public static Configuration addPathToDC(Configuration conf, String path) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] fstatus = fs.globStatus(new Path(path));
    Path[] listedPaths = FileUtil.stat2Paths(fstatus);
    for (Path p : listedPaths) {
        System.out.println(" Add File to DC " + p.toUri().toString());
        DistributedCache.addCacheFile(p.toUri(), conf);
    }//from w ww. j  a  v  a  2 s  .co m
    return conf;
}

From source file:hitune.analysis.mapreduce.processor.FileFilter.FileFilter.java

License:Apache License

/**
 * Simply scan all those files under the path recursively
 * @param path//from   www. jav a 2s.c  o  m
 * @param files
 */
public void scan(Path path, StringBuilder files) {
    //log.debug("parentpath: " + path.toString());
    if (files == null) {
        log.error("The files[StringBuilder] object isn't initialized");
        return;
    }
    try {
        //log.debug("pattern: " + pattern);
        FileSystem fs = path.getFileSystem(conf);
        FileStatus[] fstats = null;
        fstats = fs.globStatus(new Path(path.toString() + "/*"));

        for (FileStatus fstat : fstats) {
            //log.debug("current file/folder: "+ fstat.getPath().toString());
            if (fstat.isDir()) {
                scan(fstat.getPath(), files);
            } else {
                FileStatus[] rst = null;
                if (pattern == null || pattern.equals("") || pattern.length() == 0) {
                    rst = fs.globStatus(fstat.getPath());
                } else {
                    rst = fs.globStatus(fstat.getPath(), new regpatternFilter());
                }
                if (rst != null && rst.length != 0) {
                    String filepath = rst[0].getPath().toString();

                    if (files.length() == 0) {
                        files.append(filepath);
                    } else {
                        files.append(SEPARATOR).append(filepath);
                    }
                }
            }
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        log.error("Cannot do the file system operation: " + path.toString());
        e.printStackTrace();
    }
}

From source file:IndexService.IColumnInputFormat.java

License:Open Source License

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    Path tmpPath = null;//from   w  ww.  j a v a2  s .  co  m
    FileSystem fs = FileSystem.get(job);
    List<IColumnInputSplit> splits = new ArrayList<IColumnInputSplit>();
    HashMap<String, FileStatus> files = new HashMap<String, FileStatus>();
    String[] inputfiles = job.getStrings("mapred.input.dir");

    for (String file : inputfiles) {
        FileStatus[] fss = fs.globStatus(new Path(file + "_idx*"));
        FileStatus status = null;
        long length = 0;
        for (FileStatus ss : fss) {
            if (ss.getLen() > length) {
                length = ss.getLen();
                status = ss;
            }
        }
        files.put(file, status);
    }

    for (String filekey : files.keySet()) {
        FileStatus file = files.get(filekey);
        Path path = file.getPath();
        Path keypath = new Path(filekey);
        long length = file.getLen();

        tmpPath = keypath;

        BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length);

        if (blkLocations.length <= 1) {
            IColumnInputSplit split = new IColumnInputSplit(keypath, length, blkLocations[0].getHosts());
            splits.add(split);
        } else {

            String filename = path.toString();
            IFormatDataFile ifd = new IFormatDataFile(job);
            ifd.open(filename);

            ISegmentIndex segmentIndex = ifd.segIndex();

            for (int i = 0; i < segmentIndex.getSegnum(); i++) {
                IColumnInputSplit split = new IColumnInputSplit(keypath, segmentIndex.getseglen(i),
                        segmentIndex.getILineIndex(i).beginline(),
                        segmentIndex.getILineIndex(i).endline() - segmentIndex.getILineIndex(i).beginline() + 1,
                        blkLocations[i].getHosts());
                splits.add(split);
            }

            ifd.close();
        }
    }

    if (splits.size() == 0) {
        splits.add(new IColumnInputSplit(tmpPath, 0, 0, 0, new String[0]));
    }

    System.out.println("Total # of splits: " + splits.size());
    return splits.toArray(new IColumnInputSplit[splits.size()]);

}

From source file:io.covert.dns.util.DumpResponses.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    FileStatus[] listing;//from   w  w  w . java  2 s  .c om
    Path inpath = new Path(args[0]);
    if (fs.getFileStatus(inpath) != null && fs.getFileStatus(inpath).isDir())
        listing = fs.listStatus(inpath);
    else
        listing = fs.globStatus(inpath);

    for (FileStatus f : listing) {
        if (f.isDir() || f.getPath().getName().startsWith("_"))
            continue;

        System.out.println("Opennning " + f.getPath() + " ...");
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, f.getPath(), conf);

        while (reader.next(key, val)) {
            Message msg = new Message(val.getBytes());
            System.out.println(key + ": " + msg);
            System.out.println("---");
        }
        reader.close();
    }
    return 0;
}

From source file:io.hops.erasure_coding.TestErasureCodingManagerEndless.java

License:Apache License

@Override
public void setUp() throws Exception {
    cluster = new MiniDFSCluster.Builder(getConfig()).numDataNodes(NUMBER_OF_DATANODES).build();
    cluster.waitActive();//ww w  .j  av a2 s  .  c o m

    fs = cluster.getFileSystem();
    FileSystem fs = getFileSystem();
    FileStatus[] files = fs.globStatus(new Path("/*"));
    for (FileStatus file : files) {
        fs.delete(file.getPath(), true);
    }
}