List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:edu.uci.ics.pregelix.example.OverflowAggregatorTest.java
License:Apache License
@Test public void test() throws Exception { TestCluster testCluster = new TestCluster(); try {/* w ww. ja v a2 s. c o m*/ PregelixJob job = new PregelixJob(PageRankVertex.class.getName()); job.setVertexClass(PageRankVertex.class); job.setVertexInputFormatClass(TextPageRankInputFormat.class); job.setVertexOutputFormatClass(SimplePageRankVertexOutputFormat.class); job.setMessageCombinerClass(PageRankVertex.SimpleSumCombiner.class); job.setNoramlizedKeyComputerClass(VLongNormalizedKeyComputer.class); FileInputFormat.setInputPaths(job, INPUTPATH); FileOutputFormat.setOutputPath(job, new Path(OUTPUTPAH)); job.getConfiguration().setLong(PregelixJob.NUM_VERTICE, 20); job.addGlobalAggregatorClass(OverflowAggregator.class); testCluster.setUp(); Driver driver = new Driver(PageRankVertex.class); driver.runJob(job, "127.0.0.1", PregelixHyracksIntegrationUtil.TEST_HYRACKS_CC_CLIENT_PORT); PregelixTestUtils.compareWithResultDir(new File(EXPECTEDPATH), new File(OUTPUTPAH)); Text text = (Text) IterationUtils.readGlobalAggregateValue(job.getConfiguration(), BspUtils.getJobId(job.getConfiguration()), OverflowAggregator.class.getName()); Assert.assertEquals(text.getLength(), 20 * 32767); } catch (Exception e) { throw e; } finally { testCluster.tearDown(); } }
From source file:edu.umn.cs.spatialHadoop.CommandLineArguments.java
License:Apache License
/** * /*from w w w .java 2s. com*/ * @param autodetect - Automatically detect shape type from input file * if shape is not explicitly set by user * @return */ public Shape getShape(boolean autodetect) { String shapeTypeStr = get("shape"); final Text shapeType = new Text(); if (shapeTypeStr != null) shapeType.set(shapeTypeStr.toLowerCase().getBytes()); if (autodetect && shapeType.getLength() == 0 && getPath() != null) { // Shape type not found in parameters. Try to infer from a line in input // file Path in_file = getPath(); try { Sampler.sampleLocal(in_file.getFileSystem(new Configuration()), in_file, 1, 0, new ResultCollector<Text2>() { @Override public void collect(Text2 value) { String val = value.toString(); String[] parts = val.split(","); if (parts.length == 2) { shapeType.set("point".getBytes()); } else if (parts.length == 4) { shapeType.set("rect".getBytes()); } else if (parts.length > 4) { shapeType.set("tiger".getBytes()); } } }, new Text2(), new Text2()); } catch (IOException e) { e.printStackTrace(); } } Shape stockShape = null; if (shapeType.toString().startsWith("rect")) { stockShape = new Rectangle(); } else if (shapeType.toString().startsWith("point")) { stockShape = new Point(); } else if (shapeType.toString().startsWith("tiger")) { stockShape = new TigerShape(); } else if (shapeType.toString().startsWith("poly")) { stockShape = new Polygon(); } else if (shapeType.toString().startsWith("ogc")) { stockShape = new OGCShape(); } else if (shapeType.toString().startsWith("nasa")) { stockShape = new NASAPoint(); } else if (shapeTypeStr != null) { // Use the shapeType as a class name and try to instantiate it dynamically try { Class<? extends Shape> shapeClass = Class.forName(shapeTypeStr).asSubclass(Shape.class); stockShape = shapeClass.newInstance(); } catch (ClassNotFoundException e) { } catch (InstantiationException e) { } catch (IllegalAccessException e) { } } if (stockShape == null) LOG.warn("unknown shape type: " + shapeTypeStr); return stockShape; }
From source file:edu.umn.cs.spatialHadoop.core.CSVOGC.java
License:Open Source License
@Override public void fromText(Text text) { byte[] bytes = text.getBytes(); int separatorsEncountered = 0; int i1 = 0;//from w ww. jav a 2 s. c o m // Locate the required column while (separatorsEncountered < column && i1 < text.getLength()) { if (bytes[i1++] == separator) separatorsEncountered++; } if (i1 == text.getLength()) { this.prefix = new byte[i1]; System.arraycopy(bytes, 0, prefix, 0, i1); super.geom = null; this.suffix = null; return; } int i2 = i1 + 1; while (i2 < text.getLength() && bytes[i2] != separator) i2++; // Copy prefix and suffix if (i1 == 0) { prefix = null; } else { prefix = new byte[i1]; System.arraycopy(bytes, 0, prefix, 0, i1); } if (i2 == text.getLength()) { suffix = null; } else { suffix = new byte[text.getLength() - i2]; System.arraycopy(bytes, i2, suffix, 0, text.getLength() - i2); } // Chop prefix and suffix and leave only the selected column text.set(bytes, i1, i2 - i1); super.fromText(text); }
From source file:edu.umn.cs.spatialHadoop.core.GridInfo.java
License:Open Source License
@Override public void fromText(Text text) { super.fromText(text); if (text.getLength() > 0) { // Remove the first comma text.set(text.getBytes(), 1, text.getLength() - 1); columns = (int) TextSerializerHelper.consumeInt(text, ','); rows = (int) TextSerializerHelper.consumeInt(text, '\0'); }// w ww . j a v a 2 s.c om }
From source file:edu.umn.cs.spatialHadoop.core.GridRecordWriter.java
License:Open Source License
/** * Close the given cell freeing all memory reserved by it. * Once a cell is closed, we should not write more data to it. * @param intermediateCellPath/*ww w .j a v a2s . com*/ * @param finalCellPath * @param intermediateCellStream * @param masterFile * @param cellMbr * @param recordCount * @param cellSize * @throws IOException */ protected void closeCellBackground(final Path intermediateCellPath, final Path finalCellPath, final OutputStream intermediateCellStream, final OutputStream masterFile, final CellInfo cellMbr, final long recordCount, final long cellSize) throws IOException { Thread closingThread = new Thread() { @Override public void run() { try { Path finalfinalCellPath = flushAllEntries(intermediateCellPath, intermediateCellStream, finalCellPath); // Write an entry to the master file // Write a line to the master file including file name and cellInfo if (masterFile != null) { Partition partition = new Partition(finalfinalCellPath.getName(), cellMbr); partition.recordCount = recordCount; partition.size = cellSize; Text line = partition.toText(new Text()); masterFile.write(line.getBytes(), 0, line.getLength()); masterFile.write(NEW_LINE); } } catch (IOException e) { throw new RuntimeException("Error closing thread", e); } } }; closingThreads.add(closingThread); // Remove previously terminated threads while (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.TERMINATED) { closingThreads.remove(0); } // Start first thread (if exists) if (!closingThreads.isEmpty() && closingThreads.get(0).getState() == Thread.State.NEW) closingThreads.get(0).start(); }
From source file:edu.umn.cs.spatialHadoop.core.JTSShape.java
License:Apache License
@Override public void fromText(Text text) { // Read and skip a long // TextSerializerHelper.consumeLong(text, '\t'); try {// w w w .j a va 2 s . co m // Check whether this text is a Well Known Text (WKT) or a hexed string boolean wkt = false; byte[] bytes = text.getBytes(); int length = text.getLength(); int i_shape = 0; while (!wkt && i_shape < ShapeNames.length) { byte[] shapeName = ShapeNames[i_shape]; if (length > shapeName.length) { int i = 0; while (i < shapeName.length && shapeName[i] == bytes[i]) i++; if (i == shapeName.length) { wkt = true; break; } } i_shape++; } // Look for the terminator of the shape text byte terminator = Separator[0]; int i1 = 0; if (bytes[i1] == '\'' || bytes[i1] == '\"') { terminator = bytes[i1++]; } int i2 = i1; while (i2 < length && bytes[i2] != terminator) i2++; String str = new String(bytes, i1, i2 - i1); geom = parseText(str); if (++i2 < length) { extra = new String(bytes, i2, length - i2); } else { extra = null; } } catch (RuntimeException e) { LOG.error("Error parsing: " + text); throw e; } catch (ParseException e) { LOG.error("Error parsing: " + text); e.printStackTrace(); } }
From source file:edu.umn.cs.spatialHadoop.core.OGCShape.java
License:Apache License
@Override public void fromText(Text text) { try {/*from w ww. j av a 2s .c om*/ // Check whether this text is a Well Known Text (WKT) or a hexed string boolean wkt = false; byte[] bytes = text.getBytes(); int length = text.getLength(); int i_shape = 0; while (!wkt && i_shape < ShapeNames.length) { byte[] shapeName = ShapeNames[i_shape]; if (length > shapeName.length) { int i = 0; while (i < shapeName.length && shapeName[i] == bytes[i]) i++; if (i == shapeName.length) { wkt = true; break; } } i_shape++; } // Look for the terminator of the shape text byte terminator = Separator[0]; int i1 = 0; if (bytes[i1] == '\'' || bytes[i1] == '\"') { terminator = bytes[i1++]; } int i2 = i1; while (i2 < length && bytes[i2] != terminator) i2++; String str = new String(bytes, i1, i2 - i1); geom = parseText(str); if (++i2 < length) { extra = new String(bytes, i2, length - i2); } else { extra = null; } } catch (RuntimeException e) { LOG.error("Error parsing: " + text); throw e; } }
From source file:edu.umn.cs.spatialHadoop.core.Partition.java
License:Open Source License
@Override public void fromText(Text text) { super.fromText(text); text.set(text.getBytes(), 1, text.getLength() - 1); // Skip comma this.recordCount = TextSerializerHelper.consumeLong(text, ','); this.size = TextSerializerHelper.consumeLong(text, ','); filename = text.toString();//from ww w.j av a2s . c o m }
From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java
License:Open Source License
/** * Deserializes and consumes a double from the given text. Consuming means all * characters read for deserialization are removed from the given text. * If separator is non-zero, a double is read and consumed up to the first * occurrence of this separator. The separator is also consumed. * @param text// w w w .j a v a2s. c o m * @param separator * @return */ public static double consumeDouble(Text text, char separator) { int i = 0; byte[] bytes = text.getBytes(); // Skip until the separator or end of text while (i < text.getLength() && ((bytes[i] >= '0' && bytes[i] <= '9') || bytes[i] == 'e' || bytes[i] == 'E' || bytes[i] == '-' || bytes[i] == '+' || bytes[i] == '.')) i++; double d = Double.parseDouble(new String(bytes, 0, i)); if (i < text.getLength() && bytes[i] == separator) i++; System.arraycopy(bytes, i, bytes, 0, text.getLength() - i); text.set(bytes, 0, text.getLength() - i); return d; }
From source file:edu.umn.cs.spatialHadoop.io.TextSerializerHelper.java
License:Open Source License
public static void consumeMap(Text text, Map<String, String> tags) { tags.clear();/*from w w w . ja v a 2s . c o m*/ if (text.getLength() > 0) { byte[] tagsBytes = text.getBytes(); if (tagsBytes[0] != Separators[MapStart]) return; int i1 = 1; while (i1 < text.getLength() && tagsBytes[i1] != Separators[MapEnd]) { int i2 = i1 + 1; while (i2 < text.getLength() && tagsBytes[i2] != Separators[KeyValueSeparator]) i2++; String key = new String(tagsBytes, i1, i2 - i1); i1 = i2 + 1; i2 = i1 + 1; while (i2 < text.getLength() && tagsBytes[i2] != Separators[FieldSeparator] && tagsBytes[i2] != Separators[MapEnd]) i2++; String value = new String(tagsBytes, i1, i2 - i1); tags.put(key, value); i1 = i2; if (i1 < text.getLength() && tagsBytes[i1] == Separators[FieldSeparator]) i1++; } if (i1 < text.getLength()) text.set(tagsBytes, i1, text.getLength() - i1); } }