List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.mozilla.hadoop.Backup.java
License:Apache License
/** * Load a list of paths from a file/* w w w . j a va 2s.co m*/ * @param fs * @param inputPath * @return * @throws IOException */ public static List<Path> loadPaths(FileSystem fs, Path inputPath) throws IOException { List<Path> retPaths = new ArrayList<Path>(); BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(fs.open(inputPath))); String line = null; while ((line = reader.readLine()) != null) { retPaths.add(new Path(line)); } } catch (IOException e) { LOG.error("Exception in loadPaths for inputPath: " + inputPath.toString()); } finally { checkAndClose(reader); } return retPaths; }
From source file:com.mozilla.socorro.hadoop.RawDumpSize.java
License:LGPL
public int run(String[] args) throws Exception { if (args.length != 1) { return printUsage(); }/*from ww w .j a va2 s. c o m*/ int rc = -1; Job job = initJob(args); job.waitForCompletion(true); if (job.isSuccessful()) { rc = 0; FileSystem hdfs = null; DescriptiveStatistics rawStats = new DescriptiveStatistics(); long rawTotal = 0L; DescriptiveStatistics processedStats = new DescriptiveStatistics(); long processedTotal = 0L; try { hdfs = FileSystem.get(job.getConfiguration()); Pattern tabPattern = Pattern.compile("\t"); for (FileStatus status : hdfs.listStatus(FileOutputFormat.getOutputPath(job))) { if (!status.isDir()) { BufferedReader reader = null; try { reader = new BufferedReader(new InputStreamReader(hdfs.open(status.getPath()))); String line = null; while ((line = reader.readLine()) != null) { String[] splits = tabPattern.split(line); int byteSize = Integer.parseInt(splits[2]); if ("raw".equals(splits[1])) { rawStats.addValue(byteSize); rawTotal += byteSize; } else if ("processed".equals(splits[1])) { processedStats.addValue(byteSize); processedTotal += byteSize; } } } finally { if (reader != null) { reader.close(); } } } } } finally { if (hdfs != null) { hdfs.close(); } } System.out.println("===== " + job.getConfiguration().get(START_DATE) + " raw_data:dump ====="); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", rawStats.getMin(), rawStats.getMax(), rawStats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", rawStats.getPercentile(25.0d), rawStats.getPercentile(50.0d), rawStats.getPercentile(75.0d))); System.out.println("Total Bytes: " + rawTotal); System.out.println("===== " + job.getConfiguration().get(START_DATE) + " processed_data:json ====="); System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", processedStats.getMin(), processedStats.getMax(), processedStats.getMean())); System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f", processedStats.getPercentile(25.0d), processedStats.getPercentile(50.0d), processedStats.getPercentile(75.0d))); System.out.println("Total Bytes: " + processedTotal); } return rc; }
From source file:com.mozilla.telemetry.pig.eval.json.ValidateTelemetrySubmission.java
License:Apache License
protected FSDataInputStream getHDFSFile(String fileName) throws IOException { FileSystem fs = FileSystem.get(UDFContext.getUDFContext().getJobConf()); return fs.open(new Path(fileName)); }
From source file:com.msd.gin.halyard.sail.HBaseSail.java
License:Apache License
@Override public synchronized long size(Resource... contexts) throws SailException { if (contexts != null && contexts.length > 0 && contexts[0] != null) { throw new SailException("Size calculation is not supported for named graphs"); }/*www.j ava2s . c o m*/ if (sizeTimestamp < 0 || (isWritable() && sizeTimestamp + STATUS_CACHING_TIMEOUT < System.currentTimeMillis())) try { long entries = 0; FileSystem fs = FileSystem.get(config); Collection<HColumnDescriptor> families = table.getTableDescriptor().getFamilies(); Set<String> familyNames = new HashSet<>(families.size()); for (HColumnDescriptor hcd : families) { familyNames.add(hcd.getNameAsString()); } Path tableDir = FSUtils.getTableDir(FSUtils.getRootDir(config), table.getName()); PathFilter dirFilter = new FSUtils.DirFilter(fs); int divider = 1; for (HRegionLocation hrl : table.getRegionLocator().getAllRegionLocations()) { HRegionInfo hri = hrl.getRegionInfo(); byte[] skey = hri.getStartKey(); if (skey.length == 0 || skey[0] == HalyardTableUtils.SPO_PREFIX) { byte[] ekey = hri.getEndKey(); if (ekey.length == 0 || ekey[0] > HalyardTableUtils.POS_PREFIX) { divider = 3; } for (FileStatus familyDir : fs.listStatus(new Path(tableDir, hri.getEncodedName()), dirFilter)) { if (familyNames.contains(familyDir.getPath().getName())) { for (FileStatus file : fs.listStatus(familyDir.getPath())) { if (file.isFile()) { try (FSDataInputStream in = fs.open(file.getPath())) { entries += FixedFileTrailer.readFromStream(in, file.getLen()) .getEntryCount(); } catch (Exception e) { LOG.log(Level.WARNING, "Exception while reading trailer from hfile: " + file.getPath(), e); } } } } } } } size = entries / divider; sizeTimestamp = System.currentTimeMillis(); } catch (IOException e) { throw new SailException(e); } return size; }
From source file:com.mvdb.etl.actions.ActionUtils.java
License:Apache License
public static String getActionChainBrokenCause() throws IOException { String hdfsHome = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER, ConfigurationKeys.GLOBAL_HADOOP_HOME); org.apache.hadoop.conf.Configuration conf = new org.apache.hadoop.conf.Configuration(); conf.addResource(new Path(hdfsHome + "/conf/core-site.xml")); FileSystem hdfsFileSystem = FileSystem.get(conf); String actionChainStatusFile = getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER, ConfigurationKeys.GLOBAL_ACTION_CHAIN_STATUS_FILE); String actionChainStatusFileName = /* hdfsHome + */File.separator + actionChainStatusFile; Path actionChainStatusFilePath = new Path(actionChainStatusFileName); if (hdfsFileSystem.exists(actionChainStatusFilePath)) { FSDataInputStream in = hdfsFileSystem.open(actionChainStatusFilePath); int bytesRead = -1; byte[] buffer = new byte[1024]; ByteArrayOutputStream baos = new ByteArrayOutputStream(); while ((bytesRead = in.read(buffer)) > 0) { baos.write(buffer, 0, bytesRead); }/*ww w .j av a 2 s .c o m*/ return new String(baos.toByteArray()); } return null; }
From source file:com.mycompany.keywordsearch.LineRecordReaderV2.java
public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { FileSplit split = (FileSplit) genericSplit; Configuration job = context.getConfiguration(); this.maxLineLength = job.getInt(MAX_LINE_LENGTH, Integer.MAX_VALUE); start = split.getStart();/*from ww w.jav a2s . c om*/ end = start + split.getLength(); final Path file = split.getPath(); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); locationKey.set(file.toString()); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); if (codec instanceof SplittableCompressionCodec) { final SplitCompressionInputStream cIn = ((SplittableCompressionCodec) codec).createInputStream( fileIn, decompressor, start, end, SplittableCompressionCodec.READ_MODE.BYBLOCK); in = new CompressedSplitLineReader(cIn, job, this.recordDelimiterBytes); start = cIn.getAdjustedStart(); end = cIn.getAdjustedEnd(); filePosition = cIn; } else { in = new SplitLineReader(codec.createInputStream(fileIn, decompressor), job, this.recordDelimiterBytes); filePosition = fileIn; } } else { fileIn.seek(start); in = new SplitLineReader(fileIn, job, this.recordDelimiterBytes); filePosition = fileIn; } // If this is not the first split, we always throw away first record // because we always (except the last split) read one extra line in // next() method. if (start != 0) { start += in.readLine(new Text(), 0, maxBytesToConsume(start)); } this.pos = start; }
From source file:com.mycompany.movehdfstohbase.MoveHdfsToHbase.java
private static void putData() throws IOException { List<Put> putList = new LinkedList(); FileSystem fs = FileSystem.get(conf); FileStatus[] status = fs.listStatus(new Path("/page")); int counter = 0; Table table = connection.getTable(TableName.valueOf(TABLE_NAME)); for (FileStatus f : status) { BufferedReader br = new BufferedReader(new InputStreamReader(fs.open(f.getPath()))); Put put = new Put(Bytes.toBytes("row" + (++counter))); put.addColumn(Bytes.toBytes("url"), null, Bytes.toBytes(br.readLine())); // url put.addColumn(Bytes.toBytes("title"), null, Bytes.toBytes(br.readLine())); // title put.addColumn(Bytes.toBytes("body"), null, Bytes.toBytes(br.readLine())); // body putList.add(put);//w w w . j a va 2 s .c o m } table.put(putList); table.close(); }
From source file:com.mycustomloader.vsamloader.VSAMLoader.java
License:Apache License
public void cal(String nam) throws IOException { String t = nam;//from www . ja v a 2s . c o m Configuration conf = new Configuration(); Path p = new Path(t); FileSystem fs = p.getFileSystem(conf); FSDataInputStream fsdin = fs.open(p); Copybook parsedcb = CopybookParser.parse("copy", fsdin); Iterator<Group> itr = parsedcb.getChildren().iterator(); rec(itr); }
From source file:com.nearinfinity.blur.analysis.BlurAnalyzer.java
License:Apache License
public static BlurAnalyzer create(Path path) throws IOException { FileSystem fileSystem = FileSystem.get(path.toUri(), new Configuration()); FSDataInputStream inputStream = fileSystem.open(path); try {//w ww . j a va 2 s. c o m return create(inputStream); } finally { inputStream.close(); } }
From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature;//from w w w.j a v a 2s. com float alphaI; FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin")); try { alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = new DenseVector(VectorWritable.readVector(in)); perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in)); weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { Closeables.close(in, true); } NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }