Example usage for org.apache.hadoop.io Text append

List of usage examples for org.apache.hadoop.io Text append

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text append.

Prototype

public void append(byte[] utf8, int start, int len) 

Source Link

Document

Append a range of bytes to the end of the given text

Usage

From source file:edu.umn.cs.spatialHadoop.delaunay.DelaunayTriangulation.java

License:Open Source License

/**
 * Compute the Deluanay triangulation in the local machine
 * @param inPaths//from w ww. j  a v  a  2s . com
 * @param outPath
 * @param params
 * @throws IOException
 * @throws InterruptedException
 */
public static void delaunayLocal(Path[] inPaths, Path outPath, final OperationsParams params)
        throws IOException, InterruptedException {
    if (params.getBoolean("mem", false))
        MemoryReporter.startReporting();
    // 1- Split the input path/file to get splits that can be processed
    // independently
    final SpatialInputFormat3<Rectangle, Point> inputFormat = new SpatialInputFormat3<Rectangle, Point>();
    Job job = Job.getInstance(params);
    SpatialInputFormat3.setInputPaths(job, inPaths);
    final List<InputSplit> splits = inputFormat.getSplits(job);
    final Point[][] allLists = new Point[splits.size()][];

    // 2- Read all input points in memory
    LOG.info("Reading points from " + splits.size() + " splits");
    List<Integer> numsPoints = Parallel.forEach(splits.size(), new RunnableRange<Integer>() {
        @Override
        public Integer run(int i1, int i2) {
            try {
                int numPoints = 0;
                for (int i = i1; i < i2; i++) {
                    List<Point> points = new ArrayList<Point>();
                    FileSplit fsplit = (FileSplit) splits.get(i);
                    final RecordReader<Rectangle, Iterable<Point>> reader = inputFormat
                            .createRecordReader(fsplit, null);
                    if (reader instanceof SpatialRecordReader3) {
                        ((SpatialRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof RTreeRecordReader3) {
                        ((RTreeRecordReader3) reader).initialize(fsplit, params);
                    } else if (reader instanceof HDFRecordReader) {
                        ((HDFRecordReader) reader).initialize(fsplit, params);
                    } else {
                        throw new RuntimeException("Unknown record reader");
                    }
                    while (reader.nextKeyValue()) {
                        Iterable<Point> pts = reader.getCurrentValue();
                        for (Point p : pts) {
                            points.add(p.clone());
                        }
                    }
                    reader.close();
                    numPoints += points.size();
                    allLists[i] = points.toArray(new Point[points.size()]);
                }
                return numPoints;
            } catch (IOException e) {
                e.printStackTrace();
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            return null;
        }
    }, params.getInt("parallel", Runtime.getRuntime().availableProcessors()));

    int totalNumPoints = 0;
    for (int numPoints : numsPoints)
        totalNumPoints += numPoints;

    LOG.info("Read " + totalNumPoints + " points and merging into one list");
    Point[] allPoints = new Point[totalNumPoints];
    int pointer = 0;

    for (int iList = 0; iList < allLists.length; iList++) {
        System.arraycopy(allLists[iList], 0, allPoints, pointer, allLists[iList].length);
        pointer += allLists[iList].length;
        allLists[iList] = null; // To let the GC collect it
    }

    if (params.getBoolean("dedup", true)) {
        float threshold = params.getFloat("threshold", 1E-5f);
        allPoints = SpatialAlgorithms.deduplicatePoints(allPoints, threshold);
    }

    LOG.info("Computing DT for " + allPoints.length + " points");
    GSDTAlgorithm dtAlgorithm = new GSImprovedAlgorithm(allPoints, null);
    LOG.info("DT computed");

    Rectangle mbr = FileMBR.fileMBR(inPaths, params);
    double buffer = Math.max(mbr.getWidth(), mbr.getHeight()) / 10;
    Rectangle bigMBR = mbr.buffer(buffer, buffer);
    if (outPath != null && params.getBoolean("output", true)) {
        LOG.info("Writing the output as a soup of triangles");
        Triangulation answer = dtAlgorithm.getFinalTriangulation();
        FileSystem outFS = outPath.getFileSystem(params);
        PrintStream out = new PrintStream(outFS.create(outPath));

        Text text = new Text2();
        byte[] tab = "\t".getBytes();
        for (Point[] triangle : answer.iterateTriangles()) {
            text.clear();
            triangle[0].toText(text);
            text.append(tab, 0, tab.length);
            triangle[1].toText(text);
            text.append(tab, 0, tab.length);
            triangle[2].toText(text);
            out.println(text);
        }
        out.close();
    }

    //    dtAlgorithm.getFinalTriangulation().draw();
    //Triangulation finalPart = new Triangulation();
    //Triangulation nonfinalPart = new Triangulation();
    //dtAlgorithm.splitIntoFinalAndNonFinalParts(new Rectangle(-180, -90, 180, 90), finalPart, nonfinalPart);
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static void serializeGeometry(Text text, OGCGeometry geom, char toAppend) {
    String str = bytesToHex(geom.asBinary().array());
    byte[] str_b = str.getBytes();
    text.append(str_b, 0, str_b.length);
    if (toAppend != '\0')
        text.append(new byte[] { (byte) toAppend }, 0, 1);
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static void serializeGeometry(Text text, Geometry geom, char toAppend) {
    String wkt = geom == null ? "" : geom.toText();
    byte[] wkt_b = wkt.getBytes();
    text.append(wkt_b, 0, wkt_b.length);
    if (toAppend != '\0')
        text.append(new byte[] { (byte) toAppend }, 0, 1);
}

From source file:edu.umn.cs.spatialHadoop.mapred.SpatialRecordReader.java

License:Open Source License

/**
 * Reads the next line from input and return true if a line was read.
 * If no more lines are available in this split, a false is returned.
 * @param value/*from   w  w w  . jav a 2s.c  om*/
 * @return
 * @throws IOException
 */
protected boolean nextLine(Text value) throws IOException {
    if (blockType == BlockType.RTREE && pos == 8) {
        // File is positioned at the RTree header
        // Skip the header and go to first data object in file
        pos += RTree.skipHeader(in);
        LOG.info("Skipped R-tree to position: " + pos);
        // Reinitialize record reader at the new position
        lineReader = new LineReader(in);
    }
    while (getFilePosition() <= end) {
        value.clear();
        int b = 0;
        if (buffer != null) {
            // Read the first line encountered in buffer
            int eol = RTree.skipToEOL(buffer, 0);
            b += eol;
            value.append(buffer, 0, eol);
            if (eol < buffer.length) {
                // There are still some bytes remaining in buffer
                byte[] tmp = new byte[buffer.length - eol];
                System.arraycopy(buffer, eol, tmp, 0, tmp.length);
                buffer = tmp;
            } else {
                buffer = null;
            }
            // Check if a complete line has been read from the buffer
            byte last_byte = value.getBytes()[value.getLength() - 1];
            if (last_byte == '\n' || last_byte == '\r')
                return true;
        }

        // Read the first line from stream
        Text temp = new Text();
        b += lineReader.readLine(temp);
        if (b == 0) {
            // Indicates an end of stream
            return false;
        }
        pos += b;

        // Append the part read from stream to the part extracted from buffer
        value.append(temp.getBytes(), 0, temp.getLength());

        if (value.getLength() > 1) {
            // Read a non-empty line. Note that end-of-line character is included
            return true;
        }
    }
    // Reached end of file
    return false;
}

From source file:edu.umn.cs.spatialHadoop.nasa.NASAPoint.java

License:Open Source License

@Override
public Text toText(Text text) {
    super.toText(text);
    text.append(Separator, 0, Separator.length);
    TextSerializerHelper.serializeInt(value, text, ',');

    TextSerializerHelper.serializeLong(timestamp, text, '\0');

    return text;//from   ww w .  jav  a2  s.c o  m
}

From source file:edu.umn.cs.spatialHadoop.nasa.NASARectangle.java

License:Open Source License

@Override
public Text toText(Text text) {
    super.toText(text);
    text.append(Separator, 0, Separator.length);
    TextSerializerHelper.serializeInt(value, text, ',');
    TextSerializerHelper.serializeLong(timestamp, text, '\0');
    return text;/*  w  ww . j  a v  a  2s.  c o m*/
}

From source file:edu.umn.cs.spatialHadoop.operations.LocalSampler.java

License:Open Source License

/**
 * Read from the given stream until end-of-line is reached.
 * @param in - the input stream from where to read the line
 * @param line - the line that has been read from file not including EOL
 * @return - number of bytes read including EOL characters
 * @throws IOException /*from w  w  w . j  a va2s  . c om*/
 */
public static int readUntilEOL(InputStream in, Text line) throws IOException {
    final byte[] bufferBytes = new byte[1024];
    int bufferLength = 0; // Length of the buffer
    do {
        if (bufferLength == bufferBytes.length) {
            // Buffer full. Copy to the output text
            line.append(bufferBytes, 0, bufferLength);
            bufferLength = 0;
        }
        if (bufferLength == 0) {
            // Read and skip any initial EOL characters
            do {
                bufferBytes[0] = (byte) in.read();
            } while (bufferBytes[0] != -1 && (bufferBytes[0] == '\n' || bufferBytes[0] == '\r'));
            if (bufferBytes[0] != -1)
                bufferLength++;
        } else {
            bufferBytes[bufferLength++] = (byte) in.read();
        }
    } while (bufferLength > 0 && bufferBytes[bufferLength - 1] != -1 && bufferBytes[bufferLength - 1] != '\n'
            && bufferBytes[bufferLength - 1] != '\r');
    if (bufferLength > 0) {
        bufferLength--;
        line.append(bufferBytes, 0, bufferLength);
    }
    return line.getLength();
}

From source file:edu.umn.cs.spatialHadoop.osm.OSMEdge.java

License:Open Source License

@Override
public Text toText(Text text) {
    TextSerializerHelper.serializeLong(edgeId, text, ',');
    TextSerializerHelper.serializeLong(nodeId1, text, ',');
    TextSerializerHelper.serializeDouble(lat1, text, ',');
    TextSerializerHelper.serializeDouble(lon1, text, ',');
    TextSerializerHelper.serializeLong(nodeId2, text, ',');
    TextSerializerHelper.serializeDouble(lat2, text, ',');
    TextSerializerHelper.serializeDouble(lon2, text, ',');
    TextSerializerHelper.serializeLong(wayId, text, ',');
    if (tags != null) {
        byte[] tagsBytes = tags.getBytes();
        text.append(tagsBytes, 0, tagsBytes.length);
    }//  www.  ja  v a 2  s  .c  o m
    return text;
}

From source file:edu.umn.cs.spatialHadoop.util.JspSpatialHelper.java

License:Open Source License

/**
 * Runs the given process and returns the result code. Feeds the given string
 * to the stdin of the run process. If stdout or stderr is non-null, they are
 * filled with the stdout or stderr of the run process, respectively.
 * If wait is set to true, the process is run in synchronous mode where we
 * wait until it is finished. Otherwise, this function call returns
 * immediately and leaves the process running in the background. In the later
 * case, stdout, stderr and the return value are not valid.
 * /* ww  w . j a  v a2 s. c  om*/
 * @param workingDir - The working directory to run the script. Set null for
 *   default.
 * @param cmd - The command line to run including all parameters
 * @param stdin - The string to feed to the stdin of the run process.
 * @param stdout - If non-null, the stdout of the process is fed here.
 * @param stderr - If non-null, the stderr of the process is fed here.
 * @param wait - Set to true to wait until the process exits.
 * @return
 * @throws IOException
 */
public static int runProcess(File workingDir, String cmd, String stdin, Text stdout, Text stderr, boolean wait)
        throws IOException {
    new File("asdf").list(new FilenameFilter() {
        @Override
        public boolean accept(File dir, String name) {
            return false;
        }
    });
    Process process;
    if (workingDir == null)
        process = Runtime.getRuntime().exec(cmd);
    else
        process = Runtime.getRuntime().exec(cmd, null, workingDir);
    if (stdin != null) {
        PrintStream ps = new PrintStream(process.getOutputStream());
        ps.print(stdin);
        ps.close();
    }

    if (!wait)
        return 0;

    try {
        int exitCode = process.waitFor();
        byte[] buffer = new byte[4096];
        if (stdout != null) {
            stdout.clear();
            InputStream in = process.getInputStream();
            while (in.available() > 0) {
                int bytesRead = in.read(buffer);
                stdout.append(buffer, 0, bytesRead);
            }
            in.close();
        }
        if (stderr != null) {
            stderr.clear();
            InputStream err = process.getErrorStream();
            while (err.available() > 0) {
                int bytesRead = err.read(buffer);
                stderr.append(buffer, 0, bytesRead);
            }
            err.close();
        }
        return exitCode;
    } catch (InterruptedException e) {
        e.printStackTrace();
        return 1;
    }
}

From source file:edu.umn.cs.sthadoop.core.STRectangle.java

License:Open Source License

@Override
public Text toText(Text text) {
    byte[] separator = new String(",").getBytes();
    text.append(time.getBytes(), 0, time.getBytes().length);
    text.append(separator, 0, separator.length);
    return super.toText(text);
}