List of usage examples for org.apache.hadoop.mapred LineRecordReader LineRecordReader
public LineRecordReader(InputStream in, long offset, long endOffset, Configuration job) throws IOException
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
private RecordReader<LongWritable, Text> getReaderForAll(final FSDataInputStream inputStream) throws IOException { final long bytesSize[] = new long[] { 0 }; final long bytesRead[] = new long[] { 0 }; Enumeration<InputStream> enumeration = new Enumeration<InputStream>() { boolean returnCurrent = false; ZipEntry nextEntry;//from w w w . j a v a2 s . c o m ZipInputStream zipInputStream = new ZipInputStream(inputStream); InputStream closeableInputStream = makeInputStream(zipInputStream); public boolean hasMoreElements() { if (returnCurrent) return nextEntry != null; getNext(); return nextEntry != null; } public InputStream nextElement() { if (returnCurrent) { returnCurrent = false; return closeableInputStream; } getNext(); if (nextEntry == null) throw new IllegalStateException("no more zip entries in zip input stream"); return closeableInputStream; } private void getNext() { try { nextEntry = zipInputStream.getNextEntry(); while (nextEntry != null && nextEntry.isDirectory()) nextEntry = zipInputStream.getNextEntry(); if (nextEntry != null) bytesSize[0] += nextEntry.getSize(); returnCurrent = true; } catch (IOException exception) { throw new RuntimeException("could not get next zip entry", exception); } finally { // i think, better than sending across a fake input stream that closes the zip if (nextEntry == null) safeClose(zipInputStream); } } private InputStream makeInputStream(ZipInputStream zipInputStream) { return new FilterInputStream(zipInputStream) { @Override public int read() throws IOException { bytesRead[0]++; return super.read(); } @Override public int read(byte[] bytes) throws IOException { int result = super.read(bytes); bytesRead[0] += result; return result; } @Override public int read(byte[] bytes, int i, int i1) throws IOException { int result = super.read(bytes, i, i1); bytesRead[0] += result; return result; } @Override public long skip(long l) throws IOException { long result = super.skip(l); bytesRead[0] += result; return result; } @Override public void close() throws IOException { // do nothing } }; } }; return new LineRecordReader(new SequenceInputStream(enumeration), 0, Long.MAX_VALUE, Integer.MAX_VALUE) { @Override public float getProgress() { if (0 == bytesSize[0]) return 0.0f; else return Math.min(1.0f, bytesRead[0] / (float) bytesSize[0]); } }; }
From source file:cascading.tap.hadoop.ZipInputFormat.java
License:Open Source License
private RecordReader<LongWritable, Text> getReaderForEntry(FSDataInputStream inputStream, ZipSplit split, long length) throws IOException { ZipInputStream zipInputStream = new ZipInputStream(inputStream); String entryPath = split.getEntryPath(); ZipEntry zipEntry = zipInputStream.getNextEntry(); while (zipEntry != null && !zipEntry.getName().equals(entryPath)) zipEntry = zipInputStream.getNextEntry(); return new LineRecordReader(zipInputStream, 0, length, Integer.MAX_VALUE); }
From source file:edu.umn.cs.spatialHadoop.mapred.SpatialInputFormat.java
License:Open Source License
protected void listStatus(final FileSystem fs, Path dir, final List<FileStatus> result, BlockFilter filter) throws IOException { GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(fs, dir); if (gindex == null) { FileStatus[] listStatus;/*from www . ja v a2 s. c o m*/ if (OperationsParams.isWildcard(dir)) { // Wild card listStatus = fs.globStatus(dir); } else { listStatus = fs.listStatus(dir, SpatialSite.NonHiddenFileFilter); } // Add all files under this directory for (FileStatus status : listStatus) { if (status.isDir()) { listStatus(fs, status.getPath(), result, filter); } else if (status.getPath().getName().toLowerCase().endsWith(".list")) { LineRecordReader in = new LineRecordReader(fs.open(status.getPath()), 0, status.getLen(), Integer.MAX_VALUE); LongWritable key = in.createKey(); Text value = in.createValue(); while (in.next(key, value)) { result.add(fs.getFileStatus(new Path(status.getPath().getParent(), value.toString()))); } in.close(); } else { result.add(status); } } } else { final Path indexDir = OperationsParams.isWildcard(dir) ? dir.getParent() : dir; // Use the global index to limit files filter.selectCells(gindex, new ResultCollector<Partition>() { @Override public void collect(Partition partition) { try { Path cell_path = new Path(indexDir, partition.filename); if (!fs.exists(cell_path)) LOG.warn("Matched file not found: " + cell_path); result.add(fs.getFileStatus(cell_path)); } catch (IOException e) { e.printStackTrace(); } } }); } }
From source file:edu.umn.cs.spatialHadoop.operations.CatUnion.java
License:Open Source License
/** * Read all categories from the category file * @param categoryFile//from w w w.j a v a 2s .c o m * @param categoryShapes * @param idToCategory * @throws IOException */ private static void readCategories(Path categoryFile, Map<Integer, Integer> idToCategory) throws IOException { Map<Integer, String> idToCatName = new HashMap<Integer, String>(); FileSystem fsCategory = FileSystem.getLocal(new Configuration()); long categoryFileSize = fsCategory.getFileStatus(categoryFile).getLen(); if (categoryFileSize > 1024 * 1024) LOG.warn("Category file size is big: " + categoryFileSize); InputStream inCategory = fsCategory.open(categoryFile); LineRecordReader lineReader = new LineRecordReader(inCategory, 0, categoryFileSize, new Configuration()); LongWritable lineOffset = lineReader.createKey(); Text line = lineReader.createValue(); Set<String> catNames = new TreeSet<String>(); while (lineReader.next(lineOffset, line)) { int shape_id = TextSerializerHelper.consumeInt(line, ','); String cat_name = line.toString(); catNames.add(cat_name); idToCatName.put(shape_id, cat_name); } lineReader.close(); // Change category names to numbers Map<String, Integer> cat_name_to_id = new HashMap<String, Integer>(); int cat_id = 0; for (String cat_name : catNames) { cat_name_to_id.put(cat_name, cat_id++); } for (Map.Entry<Integer, String> entry : idToCatName.entrySet()) { idToCategory.put(entry.getKey(), cat_name_to_id.get(entry.getValue())); } }
From source file:kogiri.mapreduce.libra.kmersimilarity_m.KmerSimilarityMap.java
License:Open Source License
private void sumScores(Path outputPath, Configuration conf) throws IOException { Path[] resultFiles = KmerSimilarityHelper.getAllKmerSimilarityResultFilePath(conf, outputPath.toString()); FileSystem fs = outputPath.getFileSystem(conf); KmerSimilarityOutputRecord scoreRec = null; for (Path resultFile : resultFiles) { LOG.info("Reading the scores from " + resultFile.toString()); FSDataInputStream is = fs.open(resultFile); FileStatus status = fs.getFileStatus(resultFile); LineRecordReader reader = new LineRecordReader(is, 0, status.getLen(), conf); LongWritable off = new LongWritable(); Text val = new Text(); while (reader.next(off, val)) { if (scoreRec == null) { scoreRec = KmerSimilarityOutputRecord.createInstance(val.toString()); } else { KmerSimilarityOutputRecord rec2 = KmerSimilarityOutputRecord.createInstance(val.toString()); scoreRec.addScore(rec2.getScore()); }//from w w w.jav a 2 s . c o m } reader.close(); } double[] accumulatedScore = scoreRec.getScore(); String resultFilename = KmerSimilarityHelper.makeKmerSimilarityFinalResultFileName(); Path resultFilePath = new Path(outputPath, resultFilename); LOG.info("Creating a final score file : " + resultFilePath.toString()); FSDataOutputStream os = fs.create(resultFilePath); int n = (int) Math.sqrt(accumulatedScore.length); for (int i = 0; i < accumulatedScore.length; i++) { int x = i / n; int y = i % n; String k = x + "-" + y; String v = Double.toString(accumulatedScore[i]); String out = k + "\t" + v + "\n"; os.write(out.getBytes()); } os.close(); }
From source file:libra.core.kmersimilarity_m.KmerSimilarityMap.java
License:Apache License
private void sumScores(Path outputPath, Configuration conf) throws IOException { Path[] resultFiles = KmerSimilarityHelper.getAllKmerSimilarityResultFilePath(conf, outputPath.toString()); FileSystem fs = outputPath.getFileSystem(conf); KmerSimilarityOutputRecord scoreRec = null; for (Path resultFile : resultFiles) { LOG.info("Reading the scores from " + resultFile.toString()); FSDataInputStream is = fs.open(resultFile); FileStatus status = fs.getFileStatus(resultFile); LineRecordReader reader = new LineRecordReader(is, 0, status.getLen(), conf); LongWritable off = new LongWritable(); Text val = new Text(); while (reader.next(off, val)) { if (scoreRec == null) { scoreRec = KmerSimilarityOutputRecord.createInstance(val.toString()); } else { KmerSimilarityOutputRecord rec2 = KmerSimilarityOutputRecord.createInstance(val.toString()); scoreRec.addScore(rec2.getScore()); }//from w w w . jav a2 s. c o m } reader.close(); } double[] accumulatedScore = scoreRec.getScore(); String resultFilename = KmerSimilarityHelper.makeKmerSimilarityFinalResultFileName(); Path resultFilePath = new Path(outputPath, resultFilename); LOG.info("Creating a final score file : " + resultFilePath.toString()); FSDataOutputStream os = fs.create(resultFilePath); int n = (int) Math.sqrt(accumulatedScore.length); for (int i = 0; i < accumulatedScore.length; i++) { int x = i / n; int y = i % n; String k = x + "-" + y; String v = Double.toString(accumulatedScore[i]); if (x == y) { v = Double.toString(1.0); } String out = k + "\t" + v + "\n"; os.write(out.getBytes()); } os.close(); }