List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:org.hypertable.hadoop.mapred.HypertableRecordWriter.java
License:Open Source License
/** * Write data to HT//from w ww. j av a 2 s . c o m */ public void write(Text key, Text value) throws IOException { try { key.append(tab, 0, tab.length); m_line.clear(); m_line.append(key.getBytes(), 0, key.getLength()); m_line.append(value.getBytes(), 0, value.getLength()); int len = m_line.getLength(); int tab_count = 0; int tab_pos = 0; int found = 0; while (found != -1) { found = m_line.find(tab_str, found + 1); if (found > 0) { tab_count++; if (tab_count == 1) tab_pos = found; } } boolean has_timestamp; if (tab_count >= 3) { has_timestamp = true; } else if (tab_count == 2) { has_timestamp = false; } else { throw new Exception("incorrect output line format only " + tab_count + " tabs"); } byte[] byte_array = m_line.getBytes(); int row_offset, row_length; int family_offset = 0, family_length = 0; int qualifier_offset = 0, qualifier_length = 0; int value_offset = 0, value_length = 0; long timestamp = SerializedCellsFlag.AUTO_ASSIGN; int offset = 0; if (has_timestamp) { timestamp = Long.parseLong(m_line.decode(byte_array, 0, tab_pos)); offset = tab_pos + 1; } row_offset = offset; tab_pos = m_line.find(tab_str, offset); row_length = tab_pos - row_offset; offset = tab_pos + 1; family_offset = offset; tab_pos = m_line.find(tab_str, offset); for (int i = family_offset; i < tab_pos; i++) { if (byte_array[i] == ':' && qualifier_offset == 0) { family_length = i - family_offset; qualifier_offset = i + 1; } } // no qualifier if (qualifier_offset == 0) family_length = tab_pos - family_offset; else qualifier_length = tab_pos - qualifier_offset; offset = tab_pos + 1; value_offset = offset; value_length = len - value_offset; if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length, byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset, value_length, SerializedCellsFlag.FLAG_INSERT)) { mClient.mutator_set_cells_serialized(mMutator, mCellsWriter.buffer(), false); mCellsWriter.clear(); if ((row_length + family_length + qualifier_length + value_length + 32) > mCellsWriter.capacity()) mCellsWriter = new SerializedCellsWriter( row_length + family_length + qualifier_length + value_length + 32); if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length, byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset, value_length, SerializedCellsFlag.FLAG_INSERT)) throw new IOException("Unable to add cell to SerializedCellsWriter " + "(row='" + new String(byte_array, row_offset, row_length, "UTF-8") + "'"); } } catch (Exception e) { log.error(e); throw new IOException("Unable to write cell - " + e.toString()); } }
From source file:org.imageterrier.locfile.QLFSequenceFilesCollection.java
License:Mozilla Public License
protected int getMaxPathChars(List<URI> fl) { int max = 0;//from w w w .ja v a 2 s . c om try { for (URI u : fl) { TextBytesSequenceFileUtility sf = new TextBytesSequenceFileUtility(u, true); for (Text t : sf.listKeys()) { if (t.getLength() > max) max = t.getLength(); } } } catch (IOException e) { throw new RuntimeException(e); } return (int) Math.max(1, Math.ceil(max)); }
From source file:org.loggo.search.cli.Search.java
License:Apache License
public void query() throws Exception { BatchScanner bs = conn.createBatchScanner(opts.table, Authorizations.EMPTY, 8); try {/*from ww w . j a v a 2 s . c o m*/ // Compute the user's date range, if any SimpleDateFormat sdf = new SimpleDateFormat(LogEntry.DATE_FORMAT); String startDate = ""; if (opts.start != null) { startDate = sdf.format(new Date(opts.start)); } String endDate = "9999"; if (opts.end != null) { endDate = sdf.format(new Date(opts.end)); } if (opts.start != null || opts.end != null) { // Set the date ranges for each shard List<Range> ranges = new ArrayList<>(Schema.SHARDS); for (int i = 0; i < Schema.SHARDS; i++) { Range r = new Range(String.format(ROW_FORMAT, i, startDate), String.format(ROW_FORMAT, i, endDate)); ranges.add(r); } bs.setRanges(ranges); } else { // full table scan bs.setRanges(Collections.singletonList(new Range())); } // Set the filter for applications and host int priority = 100; if (!opts.hosts.isEmpty() || !opts.applications.isEmpty()) { IteratorSetting is = new IteratorSetting(priority++, HostAndApplicationFilter.class); HostAndApplicationFilter.setApps(is, opts.applications); HostAndApplicationFilter.setHosts(is, opts.hosts); bs.addScanIterator(is); } // stack the iterators for multiple terms: each term must match to return results List<String> families = Arrays.asList(Schema.FAMILIES); if (!opts.terms.isEmpty()) { for (int i = 0; i < opts.terms.size(); i++) { String term = opts.terms.get(i); IteratorSetting is; if (opts.regexp) { is = new IteratorSetting(priority++, RegExFilter.class); RegExFilter.setRegexs(is, null, null, null, term, false); } else { is = new IteratorSetting(priority++, "name" + i, GrepValueFilter.class); GrepValueFilter.setTerm(is, term); if (families.contains(term)) { bs.fetchColumnFamily(new Text(term)); } } bs.addScanIterator(is); } } // Just get the count: don't bother returning whole records if (opts.count) { IteratorSetting is = new IteratorSetting(priority++, CountingIterator.class); bs.addScanIterator(is); long total = 0; for (Entry<Key, Value> entry : bs) { total += Long.parseLong(entry.getValue().toString()); } printer.println(total); return; } // Get stats, not logs if (opts.duration != null) { final long duration = opts.duration; SimpleDateFormat fmt = new SimpleDateFormat(LogEntry.DATE_FORMAT); // Stats iterator pulls out counts by CF IteratorSetting is = new IteratorSetting(priority++, StatsIterator.class); StatsIterator.duration(is, opts.duration, TimeUnit.MILLISECONDS); bs.addScanIterator(is); // Group counts under the right "bucket" of time SortedMap<Long, Map<String, Long>> stats = new TreeMap<>(); for (Entry<Key, Value> entry : bs) { Key key = entry.getKey(); long ts = StatsIterator.getTs(key, fmt); // convert to start time for this bucket ts -= ts % duration; Map<String, Long> byCF = stats.get(ts); if (byCF == null) { stats.put(ts, byCF = new TreeMap<>()); } // Add values, by name given a string: "NAME:VALUE,NAME2:VALUE2" String value = entry.getValue().toString(); if (!value.isEmpty()) { String nameCounts[] = value.split(","); for (String nameCount : nameCounts) { String parts[] = nameCount.split(":"); Long current = byCF.get(parts[0]); if (current == null) { current = Long.decode(parts[1]); } else { current = Long.decode(parts[1]) + current.longValue(); } byCF.put(parts[0], current); } } } if (stats.isEmpty()) return; // Use the range of the data, or a user specified range, if provided long start = stats.firstKey(); long end = stats.lastKey(); if (opts.start != null) { start = opts.start - (opts.start % duration); } if (opts.end != null) { end = opts.end - (opts.end % duration); } // Print a line for each bucket, even if there's no data for (long time = start; time <= end; time += duration) { Map<String, Long> byCF = stats.get(time); List<String> byCFList = new ArrayList<>(); if (byCF != null) { for (Entry<String, Long> entry : byCF.entrySet()) { byCFList.add(String.format("%s: %d", entry.getKey(), entry.getValue())); } } printer.println( String.format("%s\t%s", fmt.format(new Date(time)), Joiner.on(", ").join(byCFList))); } return; } // Read the whole list for sorting. Unfortunately this means it has to fit into memory. ArrayList<Entry<Key, Value>> results = new ArrayList<Entry<Key, Value>>(); for (Entry<Key, Value> entry : bs) { results.add(entry); } if (opts.sort || opts.reverse) { final int order = opts.reverse ? -1 : 1; Collections.sort(results, new Comparator<Entry<Key, Value>>() { @Override public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) { Text row = o1.getKey().getRow(); Text row2 = o2.getKey().getRow(); return order * BytesWritable.Comparator.compareBytes(row.getBytes(), Schema.SHARD_LENGTH, row.getLength() - Schema.SHARD_LENGTH, row2.getBytes(), Schema.SHARD_LENGTH, row2.getLength() - Schema.SHARD_LENGTH); } }); } for (Entry<Key, Value> entry : results) { String cq = entry.getKey().getColumnQualifier().toString(); String parts[] = cq.split(Schema.APP_HOST_SEPARATOR); String row = entry.getKey().getRow().toString(); String value = entry.getValue().toString(); printer.println(String.format("%s\t%s\t%s\t%s", row.substring(Schema.SHARD_LENGTH), parts[0], parts[1], value)); } } finally { bs.close(); } }
From source file:org.mgrover.hive.translate.GenericUDFTranslate.java
License:Apache License
/** * Pre-process the from and to strings populate {@link #replacementMap} and {@link #deletionSet}. * /*from w w w .j a va 2 s . c o m*/ * @param from * from string to be used for translation * @param to * to string to be used for translation */ private void populateMappings(Text from, Text to) { replacementMap.clear(); deletionSet.clear(); ByteBuffer fromBytes = ByteBuffer.wrap(from.getBytes(), 0, from.getLength()); ByteBuffer toBytes = ByteBuffer.wrap(to.getBytes(), 0, to.getLength()); // Traverse through the from string, one code point at a time while (fromBytes.hasRemaining()) { // This will also move the iterator ahead by one code point int fromCodePoint = Text.bytesToCodePoint(fromBytes); // If the to string has more code points, make sure to traverse it too if (toBytes.hasRemaining()) { int toCodePoint = Text.bytesToCodePoint(toBytes); // If the code point from from string already has a replacement or is to be deleted, we // don't need to do anything, just move on to the next code point if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) { continue; } replacementMap.put(fromCodePoint, toCodePoint); } else { // If the code point from from string already has a replacement or is to be deleted, we // don't need to do anything, just move on to the next code point if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) { continue; } deletionSet.add(fromCodePoint); } } }
From source file:org.mgrover.hive.translate.GenericUDFTranslate.java
License:Apache License
/** * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and * returns the translated string.// w ww. ja va2s . com * * @param input * input string to perform the translation on * @return translated string */ private String processInput(Text input) { StringBuilder resultBuilder = new StringBuilder(); // Obtain the byte buffer from the input string so we can traverse it code point by code point ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength()); // Traverse the byte buffer containing the input string one code point at a time while (inputBytes.hasRemaining()) { int inputCodePoint = Text.bytesToCodePoint(inputBytes); // If the code point exists in deletion set, no need to emit out anything for this code point. // Continue on to the next code point if (deletionSet.contains(inputCodePoint)) { continue; } Integer replacementCodePoint = replacementMap.get(inputCodePoint); // If a replacement exists for this code point, emit out the replacement and append it to the // output string. If no such replacement exists, emit out the original input code point char[] charArray = Character .toChars((replacementCodePoint != null) ? replacementCodePoint : inputCodePoint); resultBuilder.append(charArray); } String resultString = resultBuilder.toString(); return resultString; }
From source file:org.mrgeo.data.accumulo.output.image.AccumuloMrsImagePyramidOutputFormatProvider.java
License:Apache License
@Override public void setupJob(final Job job) throws DataProviderException { try {// ww w .ja va 2 s. c o m //TODO: there is an assumption here that the output is going to accumulo directly - not bulk super.setupJob(job); job.getConfiguration().addResource(AccumuloConnector.getAccumuloPropertiesLocation()); // zoom level - output zoom level zoomLevel = context.getZoomlevel(); // zoomLevel = job.getConfiguration().getInt("zoomlevel", 0); if (zoomLevel != 0) { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL, Integer.toString(zoomLevel)); } //job.getConfiguration().set("zoomLevel", Integer.toString(zoomLevel)); if (doBulk) { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE, MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK); job.getConfiguration().set( MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel), MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK); } else { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE, MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT); job.getConfiguration().set( MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel), MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT); } Properties props = AccumuloConnector.getAccumuloProperties(); if (props != null) { // this used to be the variable "name" in TiledOutputFormatContext, but was always "". String enc = AccumuloConnector.encodeAccumuloProperties(""); job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_RESOURCE, enc); job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE)); job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS)); if (props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE) == null) { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, this.table); } else { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE)); } // username and password job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER)); // make sure the password is set with Base64Encoding String pw = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD); String isEnc = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, "false"); if (isEnc.equalsIgnoreCase("true")) { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD)); } else { byte[] p = Base64.encodeBase64( props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD).getBytes()); job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD, new String(p)); job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, new String("true")); } if (job.getConfiguration().get(MrGeoConstants.MRGEO_PROTECTION_LEVEL) != null) { cv = new ColumnVisibility(job.getConfiguration().get(MrGeoConstants.MRGEO_PROTECTION_LEVEL)); } if (cv == null) { if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)) { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)); cv = new ColumnVisibility(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)); } } else { job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ, new String(cv.getExpression())); } } if (doBulk) { LongRectangle outTileBounds = tileBounds.toLongRectangle(); // setup the output for the job if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR)) { workDir = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR); if (workDir != null) { workDir += File.separator; } } else { workDir = ""; } workDir += AccumuloMrsImagePyramidFileOutputFormat.class.getSimpleName() + File.separator + this.table + File.separator;// + // System.currentTimeMillis() + // File.separator; // delete the work dir if possible // Path wd = new Path(workDir); // FileSystem fs = HadoopFileUtils.getFileSystem(wd); // if (fs.exists(wd)) // { // fs.delete(wd, false); // } job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR, workDir); // determine the starting points for the splits ArrayList<Pair<Long, Long>> splitPoints = new ArrayList<Pair<Long, Long>>(); // think about the multiple levels and creating other splits!!! long step = bulkThreshold / outTileBounds.getWidth(); long rem = bulkThreshold % outTileBounds.getWidth(); if (rem > 0) { step++; } for (long y = outTileBounds.getMinY(); y <= outTileBounds.getMaxY(); y += step) { Pair<Long, Long> cur = new Pair<Long, Long>(outTileBounds.getMinX(), y); splitPoints.add(cur); } // we now have our list of split points // now build the splits file!!! FileSystem fs = null; //FileSystem.get(job.getConfiguration()); PrintStream out = null; try { Path wd = new Path(workDir); fs = FileSystem.get(job.getConfiguration()); if (fs.exists(wd)) { fs.delete(wd, true); } out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "splits.txt")))); for (Pair<Long, Long> p : splitPoints) { long split = TMSUtils.tileid(p.getFirst(), p.getSecond(), zoomLevel); //TileIdWritable t = new TileIdWritable(split); Text t = new Text(longToBytes(split)); out.println(new String(Base64.encodeBase64(TextUtil.getBytes(t)))); log.debug("Point: " + p.getFirst() + "\t" + p.getSecond() + "\t" + split + "\t" + t.getLength()); } job.setNumReduceTasks(splitPoints.size() + 1); out.close(); job.setPartitionerClass(AccumuloMrGeoRangePartitioner.class); AccumuloMrGeoRangePartitioner.setSplitFile(job, workDir + "splits.txt"); } catch (IOException ioe) { ioe.printStackTrace(); throw new DataProviderException( "Problem creating output splits.txt for bulk ingest directory."); } job.setOutputFormatClass(AccumuloMrsImagePyramidFileOutputFormat.class); AccumuloMrsImagePyramidFileOutputFormat.setOutputPath(job, new Path(workDir + "files")); //AccumuloMrsImagePyramidFileOutputFormat.setZoomLevel(zoomLevel); } else { log.info("Setting the output format of: " + AccumuloMrsImagePyramidOutputFormat.class.getCanonicalName()); job.setOutputFormatClass(AccumuloMrsImagePyramidOutputFormat.class); AccumuloMrsImagePyramidOutputFormat.setJob(job); log.info("Setting zoom level to " + zoomLevel); log.info("Visibility is " + cv.toString()); log.info("Setting the number of reducers to " + MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS); job.setNumReduceTasks(MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS); } job.setOutputKeyClass(TileIdWritable.class); job.setOutputValueClass(RasterWritable.class); } catch (IOException ioe) { throw new DataProviderException("Error running job setup", ioe); } }
From source file:org.mrgeo.data.accumulo.output.image.AccumuloMrsPyramidOutputFormatProvider.java
License:Apache License
@SuppressWarnings("squid:S2095") // hadoop FileSystem cannot be closed, or else subsequent uses will fail private void setupConfig(final Configuration conf, final Job job) throws DataProviderException { try {//w ww. j av a 2s .co m // zoom level - output zoom level zoomLevel = context.getZoomLevel(); // zoomLevel = conf.getInt("zoomlevel", 0); if (zoomLevel != 0) { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL, Integer.toString(zoomLevel)); } //conf.set("zoomLevel", Integer.toString(zoomLevel)); if (doBulk || forceBulk) { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE, MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK); conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel), MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK); } else { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE, MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT); conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel), MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT); } Properties props = AccumuloConnector.getAccumuloProperties(); // this used to be the variable "name" in ImageOutputFormatContext, but was always "". String enc = AccumuloConnector.encodeAccumuloProperties(""); conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_RESOURCE, enc); // conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE, // props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE)); // conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS, // props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS)); if (props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE) == null) { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, this.table); } else { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE)); } // // username and password // conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER, // props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER)); // // // make sure the password is set with Base64Encoding // String pw = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD); // String isEnc = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, "false"); // // if(isEnc.equalsIgnoreCase("true")){ // conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD, // props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD)); // } else { // byte[] p = Base64.encodeBase64(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD).getBytes()); // // conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD, // new String(p)); // conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, // new String("true")); // } if (conf.get(MrGeoConstants.MRGEO_PROTECTION_LEVEL) != null) { cv = new ColumnVisibility(conf.get(MrGeoConstants.MRGEO_PROTECTION_LEVEL)); } if (cv == null) { if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)) { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ, props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)); cv = new ColumnVisibility(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)); } } else { conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ, new String(cv.getExpression())); } if (doBulk || forceBulk) { LongRectangle outTileBounds = tileBounds.toLongRectangle(); // setup the output for the job if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR)) { workDir = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR); if (workDir != null) { workDir += File.separator; } } else { workDir = ""; } workDir += AccumuloMrsPyramidFileOutputFormat.class.getSimpleName() + File.separator + this.table + File.separator;// + // System.currentTimeMillis() + // File.separator; // delete the work dir if possible Path wd = new Path(workDir); FileSystem fs = FileSystem.get(conf); if (fs.exists(wd)) { fs.delete(wd, true); } conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR, workDir); if (job != null) { // determine the starting points for the splits ArrayList<Pair<Long, Long>> splitPoints = new ArrayList<Pair<Long, Long>>(); // think about the multiple levels and creating other splits!!! long step = bulkThreshold / outTileBounds.getWidth(); long rem = bulkThreshold % outTileBounds.getWidth(); if (rem > 0) { step++; } for (long y = outTileBounds.getMinY(); y <= outTileBounds.getMaxY(); y += step) { Pair<Long, Long> cur = new Pair<Long, Long>(outTileBounds.getMinX(), y); splitPoints.add(cur); } // we now have our list of split points // now build the splits file!!! try (BufferedOutputStream bos = new BufferedOutputStream( fs.create(new Path(workDir + "splits.txt")))) { try (PrintStream out = new PrintStream(bos)) { for (Pair<Long, Long> p : splitPoints) { long split = TMSUtils.tileid(p.getFirst(), p.getSecond(), zoomLevel); //TileIdWritable t = new TileIdWritable(split); Text t = new Text(longToBytes(split)); out.println(Base64Utils.encodeObject(t.toString())); log.debug("Point: " + p.getFirst() + "\t" + p.getSecond() + "\t" + split + "\t" + t.getLength()); } job.setNumReduceTasks(splitPoints.size() + 1); out.close(); job.setPartitionerClass(AccumuloMrGeoRangePartitioner.class); AccumuloMrGeoRangePartitioner.setSplitFile(job, workDir + "splits.txt"); } } catch (IOException ioe) { throw new DataProviderException( "Problem creating output splits.txt for bulk ingest directory.", ioe); } job.setOutputFormatClass(AccumuloMrsPyramidFileOutputFormat.class); } Path workFilesPath = new Path(workDir + "files"); if (job != null) { AccumuloMrsPyramidFileOutputFormat.setOutputPath(job, workFilesPath); //AccumuloMrsPyramidFileOutputFormat.setZoomLevel(zoomLevel); } else { Path outputDir = workFilesPath.getFileSystem(conf).makeQualified(workFilesPath); // conf.set(AccumuloMrsPyramidFileOutputFormat.OUTDIR, outputDir.toString()); conf.set("mapred.output.dir", outputDir.toString()); conf.set("mapreduce.output.fileoutputformat.outputdir", outputDir.toString()); } } else { if (job != null) { log.info("Setting the output format of: " + AccumuloMrsPyramidOutputFormat.class.getCanonicalName()); job.setOutputFormatClass(AccumuloMrsPyramidOutputFormat.class); AccumuloMrsPyramidOutputFormat.setJob(job); log.info("Setting zoom level to " + zoomLevel); log.info("Visibility is " + cv.toString()); log.info("Setting the number of reducers to " + MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS); job.setNumReduceTasks(MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS); } } if (job != null) { job.setOutputKeyClass(TileIdWritable.class); job.setOutputValueClass(RasterWritable.class); } } catch (IOException ioe) { throw new DataProviderException("Error running job setup", ioe); } }
From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java
License:Apache License
/** * Convert a Text object of a tileId to a back to a long. * * @param rowId Text object to convert./*from ww w .j a v a 2 s .co m*/ * @return the long value from the Text object. */ public static long toLong(Text rowId) { byte[] outB = new byte[8]; for (int x = 0; x < outB.length; x++) { if (x >= rowId.getLength()) { outB[x] = 0x0; } else { outB[x] = rowId.getBytes()[x]; } } return ByteBuffer.wrap(outB).getLong(); }
From source file:org.mrgeo.vector.mrsvector.OSMTileIngester.java
License:Apache License
static byte[] getBytes(final Text text) { byte[] bytes = text.getBytes(); if (text.getLength() == bytes.length) { return bytes; }/* w w w . java 2s . c o m*/ return Arrays.copyOf(bytes, text.getLength()); }
From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java
License:Apache License
/** * create the final out file, and output row by row. After one row is * appended, a configured row separator is appended * //from w ww . j a v a2 s. c o m * @param jc * the job configuration file * @param outPath * the final output file to be created * @param valueClass * the value class used for create * @param isCompressed * whether the content is compressed or not * @param tableProperties * the tableProperties of this file's corresponding table * @param progress * progress used for status report * @return the RecordWriter */ @Override public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass, boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException { int rowSeparator = 0; String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n"); try { rowSeparator = Byte.parseByte(rowSeparatorString); } catch (NumberFormatException e) { rowSeparator = rowSeparatorString.charAt(0); } final int finalRowSeparator = rowSeparator; FileSystem fs = outPath.getFileSystem(jc); final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed); return new RecordWriter() { @SuppressWarnings("deprecation") public void write(Writable r) throws IOException { if (r instanceof Text) { Text tr = (Text) r; String strReplace = tr.toString().toLowerCase().replace(":", "::"); Text txtReplace = new Text(); txtReplace.set(strReplace); outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength()); // outStream.write(tr.getBytes(), 0, tr.getLength()); outStream.write(finalRowSeparator); } else { // DynamicSerDe always writes out BytesWritable BytesWritable bw = (BytesWritable) r; outStream.write(bw.get(), 0, bw.getSize()); outStream.write(finalRowSeparator); } } public void close(boolean abort) throws IOException { outStream.close(); } }; }