List of usage examples for org.apache.hadoop.mapreduce RecordReader nextKeyValue
public abstract boolean nextKeyValue() throws IOException, InterruptedException;
From source file:com.splicemachine.derby.impl.io.WholeTextInputFormatTest.java
License:Apache License
private long collectRecords(Set<String> fileNames, RecordReader<String, InputStream> recordReader) throws IOException, InterruptedException { long count = 0L; while (recordReader.nextKeyValue()) { String key = recordReader.getCurrentKey(); key = key.replaceAll("/+", "/"); // some platforms add more "/" at the beginning, coalesce them for equality check Assert.assertTrue("Seen the same file twice!", fileNames.add(key)); InputStream is = recordReader.getCurrentValue(); try (BufferedReader br = new BufferedReader(new InputStreamReader(is))) { String n;//from w w w. j av a 2 s . c o m while ((n = br.readLine()) != null) { count++; } } } return count; }
From source file:com.splout.db.hadoop.SchemaSampler.java
License:Apache License
public static Schema sample(Configuration conf, Path input, InputFormat<ITuple, NullWritable> inputFormat) throws IOException, InterruptedException { Schema schema = null;// w w w . ja v a 2 s.c o m // sample schema from input path given the provided InputFormat @SuppressWarnings("deprecation") Job job = new Job(conf); FileInputFormat.setInputPaths(job, input); // get first inputSplit List<InputSplit> inputSplits = inputFormat.getSplits(job); if (inputSplits == null || inputSplits.size() == 0) { throw new IOException( "Given input format doesn't produce any input split. Can't sample first record. PATH: " + input); } InputSplit inputSplit = inputSplits.get(0); TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext; try { attemptContext = TaskAttemptContextFactory.get(conf, attemptId); } catch (Exception e) { throw new IOException(e); } RecordReader<ITuple, NullWritable> rReader = inputFormat.createRecordReader(inputSplit, attemptContext); rReader.initialize(inputSplit, attemptContext); if (!rReader.nextKeyValue()) { throw new IOException( "Can't read first record of first input split of the given path [" + input + "]."); } // finally get the sample schema schema = rReader.getCurrentKey().getSchema(); log.info("Sampled schema from [" + input + "] : " + schema); rReader.close(); return schema; }
From source file:com.splout.db.hadoop.TupleSampler.java
License:Apache License
/** * Random sampling method a-la-TeraSort, getting some consecutive samples from each InputSplit * without using a Job./* w w w. j a v a2 s . co m*/ * The output is SequenceFile with keys. * * @return The number of retrieved samples */ private long randomSampling(long sampleSize, Configuration hadoopConf, Path outFile, List<InputSplit> splits, Map<InputSplit, TableSpec> splitToTableSpec, Map<InputSplit, InputFormat<ITuple, NullWritable>> splitToFormat, Map<InputSplit, Map<String, String>> specificHadoopConf, Map<InputSplit, RecordProcessor> recordProcessorPerSplit, Map<InputSplit, JavascriptEngine> splitToJsEngine, int maxSplitsToVisit) throws IOException { // Instantiate the writer we will write samples to FileSystem fs = FileSystem.get(outFile.toUri(), hadoopConf); if (splits.size() == 0) { throw new IllegalArgumentException("There are no splits to sample from!"); } @SuppressWarnings("deprecation") SequenceFile.Writer writer = new SequenceFile.Writer(fs, hadoopConf, outFile, Text.class, NullWritable.class); logger.info("Sequential sampling options, max splits to visit: " + maxSplitsToVisit + ", samples to take: " + sampleSize + ", total number of splits: " + splits.size()); int blocks = Math.min(maxSplitsToVisit, splits.size()); blocks = Math.min((int) sampleSize, blocks); long recordsPerSample = sampleSize / blocks; int sampleStep = splits.size() / blocks; long records = 0; CounterInterface counterInterface = new CounterInterface(null) { public Counter getCounter(String group, String name) { return Mockito.mock(Counter.class); } ; }; // Take N samples from different parts of the input for (int i = 0; i < blocks; ++i) { TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1); TaskAttemptContext attemptContext = null; try { attemptContext = TaskAttemptContextFactory.get(hadoopConf, attemptId); } catch (Exception e) { throw new RuntimeException(e); } InputSplit split = splits.get(sampleStep * i); if (specificHadoopConf.get(split) != null) { for (Map.Entry<String, String> specificConf : specificHadoopConf.get(split).entrySet()) { attemptContext.getConfiguration().set(specificConf.getKey(), specificConf.getValue()); } } logger.info("Sampling split: " + split); RecordReader<ITuple, NullWritable> reader = null; try { reader = splitToFormat.get(split).createRecordReader(split, attemptContext); reader.initialize(split, attemptContext); RecordProcessor processor = recordProcessorPerSplit.get(split); Text key = new Text(); while (reader.nextKeyValue()) { // ITuple tuple = reader.getCurrentKey(); ITuple uTuple; try { uTuple = processor.process(tuple, tuple.getSchema().getName(), counterInterface); } catch (Throwable e) { throw new RuntimeException(e); } if (uTuple != null) { // user may have filtered the record try { key.set(TablespaceGenerator.getPartitionByKey(uTuple, splitToTableSpec.get(split), splitToJsEngine.get(split))); } catch (Throwable e) { throw new RuntimeException("Error when determining partition key.", e); } writer.append(key, NullWritable.get()); records += 1; if ((i + 1) * recordsPerSample <= records) { break; } } } } catch (InterruptedException e) { throw new RuntimeException(e); } } writer.close(); return records; }
From source file:com.streamsets.pipeline.stage.origin.hdfs.cluster.ClusterHdfsSource.java
License:Apache License
private List<Map.Entry> previewTextBatch(FileStatus fileStatus, int batchSize) throws IOException, InterruptedException { TextInputFormat textInputFormat = new TextInputFormat(); InputSplit fileSplit = new FileSplit(fileStatus.getPath(), 0, fileStatus.getLen(), null); TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(hadoopConf, TaskAttemptID.forName("attempt_1439420318532_0011_m_000000_0")); RecordReader<LongWritable, Text> recordReader = textInputFormat.createRecordReader(fileSplit, taskAttemptContext);/*from w w w . j a v a2s . c o m*/ recordReader.initialize(fileSplit, taskAttemptContext); boolean hasNext = recordReader.nextKeyValue(); List<Map.Entry> batch = new ArrayList<>(); while (hasNext && batch.size() < batchSize) { batch.add(new Pair(fileStatus.getPath().toUri().getPath() + "::" + recordReader.getCurrentKey(), String.valueOf(recordReader.getCurrentValue()))); hasNext = recordReader.nextKeyValue(); // not like iterator.hasNext, actually advances } return batch; }
From source file:edu.uci.ics.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions) throws HyracksDataException { final List<FileSplit> inputSplits = splitsFactory.getSplits(); return new AbstractUnaryOutputSourceOperatorNodePushable() { private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId(); private ContextFactory ctxFactory = new ContextFactory(); @SuppressWarnings("unchecked") @Override// w w w. j a v a 2s . c o m public void initialize() throws HyracksDataException { ClassLoader ctxCL = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader()); Job job = confFactory.getConf(); job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx); writer.open(); InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(), job.getConfiguration()); int size = inputSplits.size(); for (int i = 0; i < size; i++) { /** * read all the partitions scheduled to the current node */ if (scheduledLocations[i].equals(nodeName)) { /** * pick an unread split to read synchronize among * simultaneous partitions in the same machine */ synchronized (executed) { if (executed[i] == false) { executed[i] = true; } else { continue; } } /** * read the split */ TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i); context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader()); RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context); reader.initialize(inputSplits.get(i), context); while (reader.nextKeyValue() == true) { parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer, inputSplits.get(i).toString()); } } } parser.close(writer); writer.close(); } catch (Exception e) { throw new HyracksDataException(e); } finally { Thread.currentThread().setContextClassLoader(ctxCL); } } }; }
From source file:edu.umn.cs.spatialHadoop.core.RectangleNN.java
License:Open Source License
public static long spatialJoinLocal(Path[] inFiles, Path outFile, OperationsParams params) throws IOException, InterruptedException { // Read the inputs and store them in memory List<Shape>[] datasets = new List[inFiles.length]; final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); for (int i = 0; i < inFiles.length; i++) { datasets[i] = new ArrayList<Shape>(); FileSystem inFs = inFiles[i].getFileSystem(params); Job job = Job.getInstance(params); SpatialInputFormat3.addInputPath(job, inFiles[i]); for (InputSplit split : inputFormat.getSplits(job)) { FileSplit fsplit = (FileSplit) split; RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); }//from w w w. jav a 2s .c o m while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); for (Shape shape : shapes) { datasets[i].add(shape.clone()); } } reader.close(); } } // Apply the spatial join algorithm ResultCollector2<Shape, Shape> output = null; PrintStream out = null; if (outFile != null) { FileSystem outFS = outFile.getFileSystem(params); out = new PrintStream(outFS.create(outFile)); final PrintStream outout = out; output = new ResultCollector2<Shape, Shape>() { @Override public void collect(Shape r, Shape s) { outout.println(r.toText(new Text()) + "," + s.toText(new Text())); } }; } long resultCount = SpatialJoin_planeSweep(datasets[0], datasets[1], output, null); if (out != null) out.close(); return resultCount; }
From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java
License:Open Source License
/** * Compute the Deluanay triangulation in the local machine * @param inPaths/* w w w . jav a 2s .c o m*/ * @param outPath * @param params * @throws IOException * @throws InterruptedException */ public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPaths); final List<InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { try { int numPoints = 0; for (int i = i1; i < i2; i++) { List<Point> points = new ArrayList<Point>(); FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } return numPoints; } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } if (params.getBoolean("dedup", true)) { float threshold = params.getFloat("threshold", 1E-5f); allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold); } LOG.info("Computing DT for " + allPoints.length + " points"); GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null); LOG.info("DT computed"); Rectangle mbr = FileMBR.fileMBR(inPaths, params); double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10; Rectangle bigMBR = mbr.buffer(buffer, buffer); if (outPath != null && params.getBoolean("output", true)) { LOG.info("Writing the output as a soup of triangles"); Triangulation answer = dtAlgorithm.getFinalTriangulation(); FileSystem outFS = outPath.getFileSystem(params); PrintStream out = new PrintStream(outFS.create(outPath)); Text text = new Text2(); byte[] tab = "\t".getBytes(); for (Point[] triangle : answer.iterateTriangles()) { text.clear(); triangle[0].toText(text); text.append(tab, 0, tab.length); triangle[1].toText(text); text.append(tab, 0, tab.length); triangle[2].toText(text); out.println(text); } out.close(); } // dtAlgorithm.getFinalTriangulation().draw(); //Triangulation finalPart = new Triangulation(); //Triangulation nonfinalPart = new Triangulation(); //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart); }
From source file:edu.umn.cs.spatialHadoop.indexing.Indexer.java
License:Open Source License
private static void indexLocal(Path inPath, final Path outPath, OperationsParams params) throws IOException, InterruptedException { Job job = Job.getInstance(params);// ww w .ja v a2 s. c o m final Configuration conf = job.getConfiguration(); final String sindex = conf.get("sindex"); // Start reading input file List<InputSplit> splits = new ArrayList<InputSplit>(); final SpatialInputFormat3<Rectangle, Shape> inputFormat = new SpatialInputFormat3<Rectangle, Shape>(); FileSystem inFs = inPath.getFileSystem(conf); FileStatus inFStatus = inFs.getFileStatus(inPath); if (inFStatus != null && !inFStatus.isDir()) { // One file, retrieve it immediately. // This is useful if the input is a hidden file which is automatically // skipped by FileInputFormat. We need to plot a hidden file for the case // of plotting partition boundaries of a spatial index splits.add(new FileSplit(inPath, 0, inFStatus.getLen(), new String[0])); } else { SpatialInputFormat3.setInputPaths(job, inPath); for (InputSplit s : inputFormat.getSplits(job)) splits.add(s); } // Copy splits to a final array to be used in parallel final FileSplit[] fsplits = splits.toArray(new FileSplit[splits.size()]); boolean replicate = PartitionerReplicate.get(sindex); // Set input file MBR if not already set Rectangle inputMBR = (Rectangle) OperationsParams.getShape(conf, "mbr"); if (inputMBR == null) { inputMBR = FileMBR.fileMBR(inPath, new OperationsParams(conf)); OperationsParams.setShape(conf, "mbr", inputMBR); } setLocalIndexer(conf, sindex); final Partitioner partitioner = createPartitioner(inPath, outPath, conf, sindex); final IndexRecordWriter<Shape> recordWriter = new IndexRecordWriter<Shape>(partitioner, replicate, sindex, outPath, conf); for (FileSplit fsplit : fsplits) { RecordReader<Rectangle, Iterable<Shape>> reader = inputFormat.createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, conf); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, conf); } else { throw new RuntimeException("Unknown record reader"); } final IntWritable partitionID = new IntWritable(); while (reader.nextKeyValue()) { Iterable<Shape> shapes = reader.getCurrentValue(); if (replicate) { for (final Shape s : shapes) { partitioner.overlapPartitions(s, new ResultCollector<Integer>() { @Override public void collect(Integer id) { partitionID.set(id); try { recordWriter.write(partitionID, s); } catch (IOException e) { throw new RuntimeException(e); } } }); } } else { for (final Shape s : shapes) { int pid = partitioner.overlapPartition(s); if (pid != -1) { partitionID.set(pid); recordWriter.write(partitionID, s); } } } } reader.close(); } recordWriter.close(null); // Write the WKT formatted master file Path masterPath = new Path(outPath, "_master." + sindex); FileSystem outFs = outPath.getFileSystem(params); Path wktPath = new Path(outPath, "_" + sindex + ".wkt"); PrintStream wktOut = new PrintStream(outFs.create(wktPath)); wktOut.println("ID\tBoundaries\tRecord Count\tSize\tFile name"); Text tempLine = new Text2(); Partition tempPartition = new Partition(); LineReader in = new LineReader(outFs.open(masterPath)); while (in.readLine(tempLine) > 0) { tempPartition.fromText(tempLine); wktOut.println(tempPartition.toWKT()); } in.close(); wktOut.close(); }
From source file:edu.umn.cs.spatialHadoop.operations.ClosestPair.java
License:Open Source License
/** * Computes the closest pair using a local single-machine algorithm * (no MapReduce)/*from ww w .j av a 2 s . c om*/ * @param inPaths * @param params * @return * @throws IOException * @throws InterruptedException */ public static Pair closestPairLocal(Path[] inPaths, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inPaths); final List<InputSplit> splits = inputFormat.getSplits(job); final Point[][] allLists = new Point[splits.size()][]; // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() { @Override public Integer run(int i1, int i2) { int numPoints = 0; for (int i = i1; i < i2; i++) { try { List<Point> points = new ArrayList<Point>(); FileSplit fsplit = (FileSplit) splits.get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points.add(p.clone()); } } reader.close(); numPoints += points.size(); allLists[i] = points.toArray(new Point[points.size()]); } catch (IOException e) { throw new RuntimeException("Error reading file", e); } catch (InterruptedException e) { throw new RuntimeException("Error reading file", e); } } return numPoints; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (int numPoints : numsPoints) totalNumPoints += numPoints; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (int iList = 0; iList < allLists.length; iList++) { System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length); pointer += allLists[iList].length; allLists[iList] = null; // To let the GC collect it } LOG.info("Computing closest-pair for " + allPoints.length + " points"); Pair closestPair = closestPairInMemory(allPoints, params.getInt(BruteForceThreshold, 100)); return closestPair; }
From source file:edu.umn.cs.spatialHadoop.operations.ConvexHull.java
License:Open Source License
/** * Computes the convex hull of an input file using a single machine algorithm. * The output is written to the output file. If output file is null, the * output is just thrown away.//from w ww. j a v a 2 s. c o m * @param inFile * @param outFile * @param params * @throws IOException * @throws InterruptedException */ public static void convexHullLocal(Path inFile, Path outFile, final OperationsParams params) throws IOException, InterruptedException { if (params.getBoolean("mem", false)) MemoryReporter.startReporting(); // 1- Split the input path/file to get splits that can be processed // independently final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>(); Job job = Job.getInstance(params); SpatialInputFormat3.setInputPaths(job, inFile); final List<InputSplit> splits = inputFormat.getSplits(job); // 2- Read all input points in memory LOG.info("Reading points from " + splits.size() + " splits"); List<Point[]> allLists = Parallel.forEach(splits.size(), new RunnableRange<Point[]>() { @Override public Point[] run(int i1, int i2) { try { List<Point> finalPoints = new ArrayList<Point>(); final int MaxSize = 100000; Point[] points = new Point[MaxSize]; int size = 0; for (int i = i1; i < i2; i++) { org.apache.hadoop.mapreduce.lib.input.FileSplit fsplit = (org.apache.hadoop.mapreduce.lib.input.FileSplit) splits .get(i); final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat .createRecordReader(fsplit, null); if (reader instanceof SpatialRecordReader3) { ((SpatialRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof RTreeRecordReader3) { ((RTreeRecordReader3) reader).initialize(fsplit, params); } else if (reader instanceof HDFRecordReader) { ((HDFRecordReader) reader).initialize(fsplit, params); } else { throw new RuntimeException("Unknown record reader"); } while (reader.nextKeyValue()) { Iterable<Point> pts = reader.getCurrentValue(); for (Point p : pts) { points[size++] = p.clone(); if (size >= points.length) { // Perform convex hull and write the result to finalPoints Point[] chPoints = convexHullInMemory(points); for (Point skylinePoint : chPoints) finalPoints.add(skylinePoint); size = 0; // reset } } } reader.close(); } while (size-- > 0) finalPoints.add(points[size]); return finalPoints.toArray(new Point[finalPoints.size()]); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } return null; } }, params.getInt("parallel", Runtime.getRuntime().availableProcessors())); int totalNumPoints = 0; for (Point[] list : allLists) totalNumPoints += list.length; LOG.info("Read " + totalNumPoints + " points and merging into one list"); Point[] allPoints = new Point[totalNumPoints]; int pointer = 0; for (Point[] list : allLists) { System.arraycopy(list, 0, allPoints, pointer, list.length); pointer += list.length; } allLists.clear(); // To the let the GC collect it Point[] ch = convexHullInMemory(allPoints); if (outFile != null) { if (params.getBoolean("overwrite", false)) { FileSystem outFs = outFile.getFileSystem(new Configuration()); outFs.delete(outFile, true); } GridRecordWriter<Point> out = new GridRecordWriter<Point>(outFile, null, null, null); for (Point pt : ch) { out.write(NullWritable.get(), pt); } out.close(null); } }