List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:boostingPL.MR.AdaBoostPLTestMapper.java
License:Open Source License
protected void setup(Context context) throws IOException, InterruptedException { // classifier file Path path = new Path(context.getConfiguration().get("BoostingPL.modelPath") + "/part-r-00000"); String boostingName = context.getConfiguration().get("BoostingPL.boostingName"); boostingPL = BoostingPLFactory.createBoostingPL(boostingName, context.getConfiguration(), path); // testing dataset metadata String pathSrc = context.getConfiguration().get("BoostingPL.metadata"); FileSystem hdfs = FileSystem.get(context.getConfiguration()); FSDataInputStream dis = new FSDataInputStream(hdfs.open(new Path(pathSrc))); LineReader in = new LineReader(dis); insts = InstancesHelper.createInstancesFromMetadata(in); in.close();//from w ww. jav a 2 s . c om dis.close(); try { eval = new Evaluation(insts); } catch (Exception e) { LOG.error("[BoostingPL-Test]: Evaluation init error!"); e.printStackTrace(); } instanceCounter = context.getCounter("BoostingPL", "Number of instances"); }
From source file:boostingPL.MR.AdaBoostPLTestReducer.java
License:Open Source License
protected void setup(Context context) throws IOException, InterruptedException { // classifier file Path path = new Path(context.getConfiguration().get("BoostingPL.modelPath") + "/part-r-00000"); String boostingName = context.getConfiguration().get("BoostingPL.boostingName"); boostingPL = BoostingPLFactory.createBoostingPL(boostingName, context.getConfiguration(), path); // testing dataset metadata String pathSrc = context.getConfiguration().get("BoostingPL.metadata"); FileSystem hdfs = FileSystem.get(context.getConfiguration()); FSDataInputStream dis = new FSDataInputStream(hdfs.open(new Path(pathSrc))); LineReader in = new LineReader(dis); insts = InstancesHelper.createInstancesFromMetadata(in); in.close();/*from w ww. ja v a 2 s .co m*/ dis.close(); try { eval = new Evaluation(insts); } catch (Exception e) { LOG.error("[BoostingPL-Test]: Evaluation init error!"); e.printStackTrace(); } }
From source file:br.ufrj.nce.recureco.distributedindex.search.controller.DocumentViewerServlet.java
License:Open Source License
protected void doGet(javax.servlet.http.HttpServletRequest request, javax.servlet.http.HttpServletResponse response) throws javax.servlet.ServletException, IOException { String doc = request.getParameter("doc"); if (doc != null && doc.trim().length() > 0) { try {/*from ww w . j a va2 s. c o m*/ String filePath = DIR_DOWNLOAD + doc; Configuration conf = new Configuration(); conf.addResource(new Path(DIR_HADOOP_CONF + "core-site.xml")); conf.addResource(new Path(DIR_HADOOP_CONF + "hdfs-site.xml")); conf.addResource(new Path(DIR_HADOOP_CONF + "mapred-site.xml")); FileSystem fileSystem = FileSystem.get(conf); Path path = new Path(filePath); if (!fileSystem.exists(path)) { response.getWriter().print("File not found."); return; } FSDataInputStream in = fileSystem.open(path); response.setContentType("text/plain"); int read = 0; byte[] bytes = new byte[BYTES_DOWNLOAD]; OutputStream os = response.getOutputStream(); while ((read = in.read(bytes)) != -1) { os.write(bytes, 0, read); } os.flush(); os.close(); } catch (FileNotFoundException e) { response.getWriter().print("File not found."); } } else { //print invalid document response.getWriter().print("File not informed."); } }
From source file:brush.FastqRecordReader.java
License:Apache License
/** * Builds a new record reader given a config file and an input split. * * @param conf The Hadoop configuration object. Used for gaining access * to the underlying file system.//from w w w . j ava 2 s . c o m * @param split The file split to read. */ protected FastqRecordReader(final Configuration conf, final FileSplit split) throws IOException { file = split.getPath(); start = split.getStart(); end = start + split.getLength(); FileSystem fs = file.getFileSystem(conf); FSDataInputStream fileIn = fs.open(file); CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf); CompressionCodec codec = codecFactory.getCodec(file); if (codec == null) { // no codec. Uncompressed file. positionAtFirstRecord(fileIn); inputStream = fileIn; } else { // compressed file if (start != 0) { throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")"); } inputStream = codec.createInputStream(fileIn); end = Long.MAX_VALUE; // read until the end of the file } lineReader = new LineReader(inputStream); }
From source file:bucket_sort.NLineInputFormat.java
License:Apache License
public static List<FileSplit> getSplitsForFile(FileStatus status, Configuration conf, int numLinesPerSplit) throws IOException { List<FileSplit> splits = new ArrayList<FileSplit>(); Path fileName = status.getPath(); if (status.isDir()) { throw new IOException("Not a file: " + fileName); }/*w w w. jav a 2 s. co m*/ FileSystem fs = fileName.getFileSystem(conf); LineReader lr = null; try { FSDataInputStream in = fs.open(fileName); lr = new LineReader(in, conf); Text line = new Text(); int numLines = 0; long begin = 0; long length = 0; int num = -1; while ((num = lr.readLine(line)) > 0) { numLines++; length += num; if (numLines == numLinesPerSplit) { // NLineInputFormat uses LineRecordReader, which always reads // (and consumes) at least one character out of its upper split // boundary. So to make sure that each mapper gets N lines, we // move back the upper split limits of each split // by one character here. if (begin == 0) { splits.add(new FileSplit(fileName, begin, length - 1, new String[] {})); } else { splits.add(new FileSplit(fileName, begin - 1, length, new String[] {})); } begin += length; length = 0; numLines = 0; } } if (numLines != 0) { splits.add(new FileSplit(fileName, begin, length, new String[] {})); } } finally { if (lr != null) { lr.close(); } } return splits; }
From source file:ca.sparkera.adapters.mainframe.CobolSerdeUtils.java
License:Apache License
protected static String getLayoutFromFS(String layoutFSUrl, Configuration conf) throws IOException, URISyntaxException { FSDataInputStream in = null;/*from w ww . j a va2 s . co m*/ FileSystem fs = null; try { fs = FileSystem.get(new URI(layoutFSUrl), conf); } catch (IOException ioe) { // return null only if the file system in layout is not recognized String msg = "Failed to open file system for uri " + layoutFSUrl + " assuming it is not a FileSystem url"; LOG.debug(msg, ioe); return null; } try { in = fs.open(new Path(layoutFSUrl)); String s = CobolSerdeUtils.getLayoutFor(in); return s; } finally { if (in != null) in.close(); } }
From source file:ca.sparkera.adapters.mapred.MainframeVBInputFormat.java
License:Apache License
/** * Splits files returned by {@link #listStatus(JobConf)} when they're too * big./*from ww w. j av a 2 s.c o m*/ */ @Override @SuppressWarnings("deprecation") public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { FileStatus[] files = listStatus(job); for (FileStatus file : files) { // check we have valid files if (file.isDir()) { throw new IOException("Not a file: " + file.getPath()); } totalSize += file.getLen(); } long goalSize = totalSize / (numSplits == 0 ? 1 : numSplits); long minSize = Math.max(job.getLong("mapred.min.split.size", 1), minSplitSize); // generate splits ArrayList<FileSplit> splits = new ArrayList<FileSplit>(numSplits); for (FileStatus file : files) { Path path = file.getPath(); FileSystem fs = path.getFileSystem(job); FSDataInputStream fileIn; InputStream inputStream; fileIn = fs.open(path); inputStream = fileIn; filePosition = fileIn; long offset = 0; long length = file.getLen(); BlockLocation[] blkLocations = fs.getFileBlockLocations(file, 0, length); if ((length != 0) && isSplitable(fs, path)) { long blockSize = file.getBlockSize(); long bytesRemaining = length; long splitSize = 0; while (offset < length) { splitSize = computeSplitSize(goalSize, minSize, blockSize, inputStream); int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining); splits.add(new FileSplit(path, length - bytesRemaining, splitSize, blkLocations[blkIndex].getHosts())); bytesRemaining -= splitSize; offset = length - bytesRemaining; } if (bytesRemaining != 0) { throw new IOException( "Partial record(length = " + bytesRemaining + ") found at the end of file " + path); } } else if (length != 0) { splits.add(new FileSplit(path, 0, length, blkLocations[0].getHosts())); } else { // Create empty hosts array for zero length files splits.add(new FileSplit(path, 0, length, new String[0])); } if (inputStream != null) { inputStream.close(); inputStream = null; } } java.util.Date date = new java.util.Date(); System.out.println((new Timestamp(date.getTime())) + ",\t Split = 100% Total Splits - " + (++splitCount) + "\t Total Records in VB file - " + totalRecords); LOG.debug("Total # of splits: " + splits.size()); return splits.toArray(new FileSplit[splits.size()]); }
From source file:ca.sparkera.adapters.mapreduce.MainframeVBRecordReader.java
License:Apache License
public void initialize(Configuration job, long splitStart, long splitLength, Path file) throws IOException { start = splitStart;//from ww w .j a va2s . c o m end = start + splitLength; LOG.info("Start of the split:" + start + "-End of split:" + end); LOG.debug("VLR initialize started: start pos:" + start + "endpos:" + end); // open the file and seek to the start of the split final FileSystem fs = file.getFileSystem(job); fileIn = fs.open(file); CompressionCodec codec = new CompressionCodecFactory(job).getCodec(file); if (null != codec) { isCompressedInput = true; decompressor = CodecPool.getDecompressor(codec); CompressionInputStream cIn = codec.createInputStream(fileIn, decompressor); filePosition = cIn; inputStream = cIn; LOG.info("Compressed input; cannot compute number of records in the split"); } else { fileIn.seek(start); filePosition = fileIn; inputStream = fileIn; numBytesRemainingInSplit = splitLength; LOG.info("Variable length input; cannot compute number of records in the split"); } this.pos = start; }
From source file:cascading.avro.AvroScheme.java
License:Apache License
/** * This method peeks at the source data to get a schema when none has been provided. * * @param flowProcess The cascading FlowProcess object for this flow. * @param tap The cascading Tap object. * @return Schema The schema of the peeked at data, or Schema.NULL if none exists. *///from w ww . j a v a 2 s .c om private Schema getSourceSchema(FlowProcess<JobConf> flowProcess, Tap tap) throws IOException { if (tap instanceof CompositeTap) { tap = (Tap) ((CompositeTap) tap).getChildTaps().next(); } final String path = tap.getIdentifier(); Path p = new Path(path); final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy()); // Get all the input dirs List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter))); // Now get all the things that are one level down for (FileStatus status : new LinkedList<FileStatus>(statuses)) { if (status.isDir()) for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) { if (child.isDir()) { statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter))); } else if (fs.isFile(child.getPath())) { statuses.add(child); } } } for (FileStatus status : statuses) { Path statusPath = status.getPath(); if (fs.isFile(statusPath)) { // no need to open them all InputStream stream = null; DataFileStream reader = null; try { stream = new BufferedInputStream(fs.open(statusPath)); reader = new DataFileStream(stream, new GenericDatumReader()); return reader.getSchema(); } finally { if (reader == null) { if (stream != null) { stream.close(); } } else { reader.close(); } } } } // couldn't find any Avro files, return null schema return Schema.create(Schema.Type.NULL); }
From source file:cascading.scheme.DeprecatedAvroScheme.java
License:Apache License
/** * This method peeks at the source data to get a schema when none has been provided. * * @param flowProcess The cascading FlowProcess object for this flow. * @param tap The cascading Tap object. * @return Schema The schema of the peeked at data, or Schema.NULL if none exists. *//* w w w .j ava 2 s .c om*/ private Schema getSourceSchema(FlowProcess<? extends Configuration> flowProcess, Tap tap) throws IOException { if (tap instanceof CompositeTap) { tap = (Tap) ((CompositeTap) tap).getChildTaps().next(); } final String path = tap.getIdentifier(); Path p = new Path(path); final FileSystem fs = p.getFileSystem(flowProcess.getConfigCopy()); // Get all the input dirs List<FileStatus> statuses = new LinkedList<FileStatus>(Arrays.asList(fs.globStatus(p, filter))); // Now get all the things that are one level down for (FileStatus status : new LinkedList<FileStatus>(statuses)) { if (status.isDir()) for (FileStatus child : Arrays.asList(fs.listStatus(status.getPath(), filter))) { if (child.isDir()) { statuses.addAll(Arrays.asList(fs.listStatus(child.getPath(), filter))); } else if (fs.isFile(child.getPath())) { statuses.add(child); } } } for (FileStatus status : statuses) { Path statusPath = status.getPath(); if (fs.isFile(statusPath)) { // no need to open them all InputStream stream = null; DataFileStream reader = null; try { stream = new BufferedInputStream(fs.open(statusPath)); reader = new DataFileStream(stream, new GenericDatumReader()); return reader.getSchema(); } finally { if (reader == null) { if (stream != null) { stream.close(); } } else { reader.close(); } } } } // couldn't find any Avro files, return null schema return Schema.create(Schema.Type.NULL); }