Example usage for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.mozilla.hadoop.Backup.java

License:Apache License

/**
 * Load a list of paths from a file/*  w w  w . j  a va 2s.co  m*/
 * @param fs
 * @param inputPath
 * @return
 * @throws IOException
 */
public static List<Path> loadPaths(FileSystem fs, Path inputPath) throws IOException {
    List<Path> retPaths = new ArrayList<Path>();
    BufferedReader reader = null;
    try {
        reader = new BufferedReader(new InputStreamReader(fs.open(inputPath)));
        String line = null;
        while ((line = reader.readLine()) != null) {
            retPaths.add(new Path(line));
        }
    } catch (IOException e) {
        LOG.error("Exception in loadPaths for inputPath: " + inputPath.toString());
    } finally {
        checkAndClose(reader);
    }

    return retPaths;
}

From source file:com.mozilla.socorro.hadoop.RawDumpSize.java

License:LGPL

public int run(String[] args) throws Exception {
    if (args.length != 1) {
        return printUsage();
    }/*from   ww w  .j  a  va2  s. c o m*/

    int rc = -1;
    Job job = initJob(args);
    job.waitForCompletion(true);
    if (job.isSuccessful()) {
        rc = 0;
        FileSystem hdfs = null;
        DescriptiveStatistics rawStats = new DescriptiveStatistics();
        long rawTotal = 0L;
        DescriptiveStatistics processedStats = new DescriptiveStatistics();
        long processedTotal = 0L;
        try {
            hdfs = FileSystem.get(job.getConfiguration());
            Pattern tabPattern = Pattern.compile("\t");
            for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) {
                if (!status.isDir()) {
                    BufferedReader reader = null;
                    try {
                        reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath())));
                        String line = null;
                        while ((line = reader.readLine()) != null) {
                            String[] splits = tabPattern.split(line);
                            int byteSize = Integer.parseInt(splits[2]);
                            if ("raw".equals(splits[1])) {
                                rawStats.addValue(byteSize);
                                rawTotal += byteSize;
                            } else if ("processed".equals(splits[1])) {
                                processedStats.addValue(byteSize);
                                processedTotal += byteSize;
                            }
                        }
                    } finally {
                        if (reader != null) {
                            reader.close();
                        }
                    }
                }
            }
        } finally {
            if (hdfs != null) {
                hdfs.close();
            }
        }

        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(),
                rawStats.getMax(), rawStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + rawTotal);
        System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json =====");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(),
                processedStats.getMax(), processedStats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d),
                processedStats.getPercentile(75.0d)));
        System.out.println("Total Bytes: " + processedTotal);
    }

    return rc;
}

From source file:com.mozilla.telemetry.pig.eval.json.ValidateTelemetrySubmission.java

License:Apache License

protected FSDataInputStream getHDFSFile(String fileName) throws IOException {
    FileSystem fs = FileSystem.get(UDFContext.getUDFContext().getJobConf());
    return fs.open(new Path(fileName));
}

From source file:com.msd.gin.halyard.sail.HBaseSail.java

License:Apache License

@Override
public synchronized long size(Resource... contexts) throws SailException {
    if (contexts != null && contexts.length > 0 && contexts[0] != null) {
        throw new SailException("Size calculation is not supported for named graphs");
    }/*www.j  ava2s  .  c  o m*/
    if (sizeTimestamp < 0
            || (isWritable() && sizeTimestamp + STATUS_CACHING_TIMEOUT < System.currentTimeMillis()))
        try {
            long entries = 0;
            FileSystem fs = FileSystem.get(config);
            Collection<HColumnDescriptor> families = table.getTableDescriptor().getFamilies();
            Set<String> familyNames = new HashSet<>(families.size());
            for (HColumnDescriptor hcd : families) {
                familyNames.add(hcd.getNameAsString());
            }
            Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(config), table.getName());
            PathFilter dirFilter = new FSUtils.DirFilter(fs);
            int divider = 1;
            for (HRegionLocation hrl : table.getRegionLocator().getAllRegionLocations()) {
                HRegionInfo hri = hrl.getRegionInfo();
                byte[] skey = hri.getStartKey();
                if (skey.length == 0 || skey[0] == HalyardTableUtils.SPO_PREFIX) {
                    byte[] ekey = hri.getEndKey();
                    if (ekey.length == 0 || ekey[0] > HalyardTableUtils.POS_PREFIX) {
                        divider = 3;
                    }
                    for (FileStatus familyDir : fs.listStatus(new Path(tableDir, hri.getEncodedName()),
                            dirFilter)) {
                        if (familyNames.contains(familyDir.getPath().getName())) {
                            for (FileStatus file : fs.listStatus(familyDir.getPath())) {
                                if (file.isFile()) {
                                    try (FSDataInputStream in = fs.open(file.getPath())) {
                                        entries += FixedFileTrailer.readFromStream(in, file.getLen())
                                                .getEntryCount();
                                    } catch (Exception e) {
                                        LOG.log(Level.WARNING,
                                                "Exception while reading trailer from hfile: " + file.getPath(),
                                                e);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            size = entries / divider;
            sizeTimestamp = System.currentTimeMillis();
        } catch (IOException e) {
            throw new SailException(e);
        }
    return size;
}

From source file:com.mvdb.etl.actions.ActionUtils.java

License:Apache License

public static String getActionChainBrokenCause() throws IOException {
    String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
            ConfigurationKeys.GLOBAL_HADOOP_HOME);
    org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration();
    conf.addResource(new Path(hdfsHome + "/conf/core-site.xml"));
    FileSystem hdfsFileSystem = FileSystem.get(conf);

    String actionChainStatusFile = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
            ConfigurationKeys.GLOBAL_ACTION_CHAIN_STATUS_FILE);
    String actionChainStatusFileName = /* hdfsHome + */File.separator + actionChainStatusFile;
    Path actionChainStatusFilePath = new Path(actionChainStatusFileName);

    if (hdfsFileSystem.exists(actionChainStatusFilePath)) {
        FSDataInputStream in = hdfsFileSystem.open(actionChainStatusFilePath);

        int bytesRead = -1;
        byte[] buffer = new byte[1024];
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        while ((bytesRead = in.read(buffer)) > 0) {
            baos.write(buffer, 0, bytesRead);
        }/*ww  w .j av a  2 s .c  o m*/
        return new String(baos.toByteArray());
    }

    return null;
}

From source file:com.mycompany.keywordsearch.LineRecordReaderV2.java

public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException {
    FileSplit split = (FileSplit) genericSplit;
    Configuration job = context.getConfiguration();
    this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE);
    start = split.getStart();/*from ww w.jav  a2s .  c om*/
    end = start + split.getLength();
    final Path file = split.getPath();

    // open the file and seek to the start of the split
    final FileSystem fs = file.getFileSystem(job);
    fileIn = fs.open(file);
    locationKey.set(file.toString());
    CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file);
    if (null != codec) {
        isCompressedInput = true;
        decompressor = CodecPool.getDecompressor(codec);
        if (codec instanceof SplittableCompressionCodec) {
            final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream(
                    fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK);
            in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes);
            start = cIn.getAdjustedStart();
            end = cIn.getAdjustedEnd();
            filePosition = cIn;
        } else {
            in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job,
                    this.recordDelimiterBytes);
            filePosition = fileIn;
        }
    } else {
        fileIn.seek(start);
        in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes);
        filePosition = fileIn;
    }
    // If this is not the first split, we always throw away first record
    // because we always (except the last split) read one extra line in
    // next() method.
    if (start != 0) {
        start += in.readLine(new Text(), 0, maxBytesToConsume(start));
    }
    this.pos = start;
}

From source file:com.mycompany.movehdfstohbase.MoveHdfsToHbase.java

private static void putData() throws IOException {
    List<Put> putList = new LinkedList();
    FileSystem fs = FileSystem.get(conf);
    FileStatus[] status = fs.listStatus(new Path("/page"));
    int counter = 0;
    Table table = connection.getTable(TableName.valueOf(TABLE_NAME));

    for (FileStatus f : status) {
        BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(f.getPath())));
        Put put = new Put(Bytes.toBytes("row" + (++counter)));
        put.addColumn(Bytes.toBytes("url"), null, Bytes.toBytes(br.readLine())); // url
        put.addColumn(Bytes.toBytes("title"), null, Bytes.toBytes(br.readLine())); // title
        put.addColumn(Bytes.toBytes("body"), null, Bytes.toBytes(br.readLine())); // body
        putList.add(put);//w  w w . j  a  va  2  s .c o  m
    }

    table.put(putList);

    table.close();

}

From source file:com.mycustomloader.vsamloader.VSAMLoader.java

License:Apache License

public void cal(String nam) throws IOException {
    String t = nam;//from  www  .  ja  v a  2s .  c  o m
    Configuration conf = new Configuration();
    Path p = new Path(t);
    FileSystem fs = p.getFileSystem(conf);
    FSDataInputStream fsdin = fs.open(p);
    Copybook parsedcb = CopybookParser.parse("copy", fsdin);
    Iterator<Group> itr = parsedcb.getChildren().iterator();
    rec(itr);
}

From source file:com.nearinfinity.blur.analysis.BlurAnalyzer.java

License:Apache License

public static BlurAnalyzer create(Path path) throws IOException {
    FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration());
    FSDataInputStream inputStream = fileSystem.open(path);
    try {//w ww .  j  a  va  2 s.  c o  m
        return create(inputStream);
    } finally {
        inputStream.close();
    }
}

From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java

License:Apache License

public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException {
    FileSystem fs = output.getFileSystem(conf);

    Vector weightsPerLabel = null;
    Vector perLabelThetaNormalizer = null;
    Vector weightsPerFeature = null;
    Matrix weightsPerLabelAndFeature;//from w  w  w.j  a v  a  2s.  com
    float alphaI;

    FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin"));
    try {
        alphaI = in.readFloat();
        weightsPerFeature = VectorWritable.readVector(in);
        weightsPerLabel = new DenseVector(VectorWritable.readVector(in));
        perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in));

        weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size());
        for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) {
            weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in));
        }
    } finally {
        Closeables.close(in, true);
    }
    NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel,
            perLabelThetaNormalizer, alphaI);
    model.validate();
    return model;
}