Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:edu.uci.ics.pregelix.example.OverflowAggregatorTest.java

License:Apache License

@Test
public void test() throws Exception {
    TestCluster testCluster = new TestCluster();

    try {/* w  ww. ja v a2 s.  c  o m*/
        PregelixJob job = new PregelixJob(PageRankVertex.class.getName());
        job.setVertexClass(PageRankVertex.class);
        job.setVertexInputFormatClass(TextPageRankInputFormat.class);
        job.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class);
        job.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class);
        job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class);
        FileInputFormat.setInputPaths(job, INPUTPATH);
        FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH));
        job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20);
        job.addGlobalAggregatorClass(OverflowAggregator.class);

        testCluster.setUp();
        Driver driver = new Driver(PageRankVertex.class);
        driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT);

        PregelixTestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH));
        Text text = (Text) IterationUtils.readGlobalAggregateValue(job.getConfiguration(),
                BspUtils.getJobId(job.getConfiguration()), OverflowAggregator.class.getName());
        Assert.assertEquals(text.getLength(), 20 * 32767);
    } catch (Exception e) {
        throw e;
    } finally {
        testCluster.tearDown();
    }
}

From source file:edu.umn.cs.spatialHadoop.CommandLineArguments.java

License:Apache License

/**
 * /*from  w  w  w  .java  2s. com*/
 * @param autodetect - Automatically detect shape type from input file
 *   if shape is not explicitly set by user
 * @return
 */
public Shape getShape(boolean autodetect) {
    String shapeTypeStr = get("shape");
    final Text shapeType = new Text();
    if (shapeTypeStr != null)
        shapeType.set(shapeTypeStr.toLowerCase().getBytes());

    if (autodetect && shapeType.getLength() == 0 && getPath() != null) {
        // Shape type not found in parameters. Try to infer from a line in input
        // file
        Path in_file = getPath();
        try {
            Sampler.sampleLocal(in_file.getFileSystem(new Configuration()), in_file, 1, 0,
                    new ResultCollector<Text2>() {
                        @Override
                        public void collect(Text2 value) {
                            String val = value.toString();
                            String[] parts = val.split(",");
                            if (parts.length == 2) {
                                shapeType.set("point".getBytes());
                            } else if (parts.length == 4) {
                                shapeType.set("rect".getBytes());
                            } else if (parts.length > 4) {
                                shapeType.set("tiger".getBytes());
                            }
                        }
                    }, new Text2(), new Text2());
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    Shape stockShape = null;
    if (shapeType.toString().startsWith("rect")) {
        stockShape = new Rectangle();
    } else if (shapeType.toString().startsWith("point")) {
        stockShape = new Point();
    } else if (shapeType.toString().startsWith("tiger")) {
        stockShape = new TigerShape();
    } else if (shapeType.toString().startsWith("poly")) {
        stockShape = new Polygon();
    } else if (shapeType.toString().startsWith("ogc")) {
        stockShape = new OGCShape();
    } else if (shapeType.toString().startsWith("nasa")) {
        stockShape = new NASAPoint();
    } else if (shapeTypeStr != null) {
        // Use the shapeType as a class name and try to instantiate it dynamically
        try {
            Class<? extends Shape> shapeClass = Class.forName(shapeTypeStr).asSubclass(Shape.class);
            stockShape = shapeClass.newInstance();
        } catch (ClassNotFoundException e) {
        } catch (InstantiationException e) {
        } catch (IllegalAccessException e) {
        }
    }
    if (stockShape == null)
        LOG.warn("unknown shape type: " + shapeTypeStr);

    return stockShape;
}

From source file:edu.umn.cs.spatialHadoop.core.CSVOGC.java

License:Open Source License

@Override
public void fromText(Text text) {
    byte[] bytes = text.getBytes();
    int separatorsEncountered = 0;
    int i1 = 0;//from w  ww. jav  a  2  s. c o m
    // Locate the required column
    while (separatorsEncountered < column && i1 < text.getLength()) {
        if (bytes[i1++] == separator)
            separatorsEncountered++;
    }
    if (i1 == text.getLength()) {
        this.prefix = new byte[i1];
        System.arraycopy(bytes, 0, prefix, 0, i1);
        super.geom = null;
        this.suffix = null;
        return;
    }
    int i2 = i1 + 1;
    while (i2 < text.getLength() && bytes[i2] != separator)
        i2++;
    // Copy prefix and suffix
    if (i1 == 0) {
        prefix = null;
    } else {
        prefix = new byte[i1];
        System.arraycopy(bytes, 0, prefix, 0, i1);
    }
    if (i2 == text.getLength()) {
        suffix = null;
    } else {
        suffix = new byte[text.getLength() - i2];
        System.arraycopy(bytes, i2, suffix, 0, text.getLength() - i2);
    }

    // Chop prefix and suffix and leave only the selected column
    text.set(bytes, i1, i2 - i1);
    super.fromText(text);
}

From source file:edu.umn.cs.spatialHadoop.core.GridInfo.java

License:Open Source License

@Override
public void fromText(Text text) {
    super.fromText(text);
    if (text.getLength() > 0) {
        // Remove the first comma
        text.set(text.getBytes(), 1, text.getLength() - 1);
        columns = (int) TextSerializerHelper.consumeInt(text, ',');
        rows = (int) TextSerializerHelper.consumeInt(text, '\0');
    }//  w  ww  . j a v  a  2 s.c  om
}

From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java

License:Open Source License

/**
 * Close the given cell freeing all memory reserved by it.
 * Once a cell is closed, we should not write more data to it.
 * @param intermediateCellPath/*ww w .j a  v a2s . com*/
 * @param finalCellPath
 * @param intermediateCellStream
 * @param masterFile
 * @param cellMbr
 * @param recordCount
 * @param cellSize
 * @throws IOException
 */
protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath,
        final OutputStream intermediateCellStream, final OutputStream masterFile, final CellInfo cellMbr,
        final long recordCount, final long cellSize) throws IOException {

    Thread closingThread = new Thread() {
        @Override
        public void run() {
            try {
                Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream,
                        finalCellPath);
                // Write an entry to the master file

                // Write a line to the master file including file name and cellInfo
                if (masterFile != null) {
                    Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr);
                    partition.recordCount = recordCount;
                    partition.size = cellSize;
                    Text line = partition.toText(new Text());
                    masterFile.write(line.getBytes(), 0, line.getLength());
                    masterFile.write(NEW_LINE);
                }
            } catch (IOException e) {
                throw new RuntimeException("Error closing thread", e);
            }
        }
    };

    closingThreads.add(closingThread);
    // Remove previously terminated threads
    while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) {
        closingThreads.remove(0);
    }
    // Start first thread (if exists)
    if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW)
        closingThreads.get(0).start();
}

From source file:edu.umn.cs.spatialHadoop.core.JTSShape.java

License:Apache License

@Override
public void fromText(Text text) {
    // Read and skip a long
    //    TextSerializerHelper.consumeLong(text, '\t');
    try {//  w w  w .j a  va  2  s  .  co  m
        // Check whether this text is a Well Known Text (WKT) or a hexed string
        boolean wkt = false;
        byte[] bytes = text.getBytes();
        int length = text.getLength();
        int i_shape = 0;
        while (!wkt && i_shape < ShapeNames.length) {
            byte[] shapeName = ShapeNames[i_shape];
            if (length > shapeName.length) {
                int i = 0;
                while (i < shapeName.length && shapeName[i] == bytes[i])
                    i++;
                if (i == shapeName.length) {
                    wkt = true;
                    break;
                }
            }
            i_shape++;
        }

        // Look for the terminator of the shape text
        byte terminator = Separator[0];
        int i1 = 0;
        if (bytes[i1] == '\'' || bytes[i1] == '\"') {
            terminator = bytes[i1++];
        }
        int i2 = i1;
        while (i2 < length && bytes[i2] != terminator)
            i2++;

        String str = new String(bytes, i1, i2 - i1);
        geom = parseText(str);

        if (++i2 < length) {
            extra = new String(bytes, i2, length - i2);
        } else {
            extra = null;
        }
    } catch (RuntimeException e) {
        LOG.error("Error parsing: " + text);
        throw e;
    } catch (ParseException e) {
        LOG.error("Error parsing: " + text);
        e.printStackTrace();
    }
}

From source file:edu.umn.cs.spatialHadoop.core.OGCShape.java

License:Apache License

@Override
public void fromText(Text text) {
    try {/*from w ww.  j av a  2s .c  om*/
        // Check whether this text is a Well Known Text (WKT) or a hexed string
        boolean wkt = false;
        byte[] bytes = text.getBytes();
        int length = text.getLength();
        int i_shape = 0;
        while (!wkt && i_shape < ShapeNames.length) {
            byte[] shapeName = ShapeNames[i_shape];
            if (length > shapeName.length) {
                int i = 0;
                while (i < shapeName.length && shapeName[i] == bytes[i])
                    i++;
                if (i == shapeName.length) {
                    wkt = true;
                    break;
                }
            }
            i_shape++;
        }

        // Look for the terminator of the shape text
        byte terminator = Separator[0];
        int i1 = 0;
        if (bytes[i1] == '\'' || bytes[i1] == '\"') {
            terminator = bytes[i1++];
        }
        int i2 = i1;
        while (i2 < length && bytes[i2] != terminator)
            i2++;

        String str = new String(bytes, i1, i2 - i1);
        geom = parseText(str);

        if (++i2 < length) {
            extra = new String(bytes, i2, length - i2);
        } else {
            extra = null;
        }
    } catch (RuntimeException e) {
        LOG.error("Error parsing: " + text);
        throw e;
    }
}

From source file:edu.umn.cs.spatialHadoop.core.Partition.java

License:Open Source License

@Override
public void fromText(Text text) {
    super.fromText(text);
    text.set(text.getBytes(), 1, text.getLength() - 1); // Skip comma
    this.recordCount = TextSerializerHelper.consumeLong(text, ',');
    this.size = TextSerializerHelper.consumeLong(text, ',');
    filename = text.toString();//from ww w.j av  a2s . c  o  m
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

/**
 * Deserializes and consumes a double from the given text. Consuming means all
 * characters read for deserialization are removed from the given text.
 * If separator is non-zero, a double is read and consumed up to the first
 * occurrence of this separator. The separator is also consumed.
 * @param text// w  w w .j  a v  a2s. c o  m
 * @param separator
 * @return
 */
public static double consumeDouble(Text text, char separator) {
    int i = 0;
    byte[] bytes = text.getBytes();
    // Skip until the separator or end of text
    while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E'
            || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.'))
        i++;
    double d = Double.parseDouble(new String(bytes, 0, i));
    if (i < text.getLength() && bytes[i] == separator)
        i++;
    System.arraycopy(bytes, i, bytes, 0, text.getLength() - i);
    text.set(bytes, 0, text.getLength() - i);
    return d;
}

From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java

License:Open Source License

public static void consumeMap(Text text, Map<String, String> tags) {
    tags.clear();/*from w  w  w . ja  v  a  2s .  c o  m*/
    if (text.getLength() > 0) {
        byte[] tagsBytes = text.getBytes();
        if (tagsBytes[0] != Separators[MapStart])
            return;
        int i1 = 1;
        while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) {
            int i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator])
                i2++;
            String key = new String(tagsBytes, i1, i2 - i1);
            i1 = i2 + 1;

            i2 = i1 + 1;
            while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator]
                    && tagsBytes[i2] != Separators[MapEnd])
                i2++;
            String value = new String(tagsBytes, i1, i2 - i1);
            tags.put(key, value);
            i1 = i2;
            if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator])
                i1++;
        }
        if (i1 < text.getLength())
            text.set(tagsBytes, i1, text.getLength() - i1);
    }
}