Example usage for org.apache.hadoop.io Text getLength

List of usage examples for org.apache.hadoop.io Text getLength

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text getLength.

Prototype

@Override
public int getLength() 

Source Link

Document

Returns the number of bytes in the byte array

Usage

From source file:org.hypertable.hadoop.mapred.HypertableRecordWriter.java

License:Open Source License

/**
 * Write data to HT//from w ww.  j av  a 2  s . c  o  m
 */
public void write(Text key, Text value) throws IOException {
    try {
        key.append(tab, 0, tab.length);

        m_line.clear();
        m_line.append(key.getBytes(), 0, key.getLength());
        m_line.append(value.getBytes(), 0, value.getLength());
        int len = m_line.getLength();

        int tab_count = 0;
        int tab_pos = 0;
        int found = 0;
        while (found != -1) {
            found = m_line.find(tab_str, found + 1);
            if (found > 0) {
                tab_count++;
                if (tab_count == 1)
                    tab_pos = found;
            }
        }

        boolean has_timestamp;
        if (tab_count >= 3) {
            has_timestamp = true;
        } else if (tab_count == 2) {
            has_timestamp = false;
        } else {
            throw new Exception("incorrect output line format only " + tab_count + " tabs");
        }

        byte[] byte_array = m_line.getBytes();
        int row_offset, row_length;
        int family_offset = 0, family_length = 0;
        int qualifier_offset = 0, qualifier_length = 0;
        int value_offset = 0, value_length = 0;
        long timestamp = SerializedCellsFlag.AUTO_ASSIGN;

        int offset = 0;
        if (has_timestamp) {
            timestamp = Long.parseLong(m_line.decode(byte_array, 0, tab_pos));
            offset = tab_pos + 1;
        }

        row_offset = offset;
        tab_pos = m_line.find(tab_str, offset);
        row_length = tab_pos - row_offset;

        offset = tab_pos + 1;
        family_offset = offset;

        tab_pos = m_line.find(tab_str, offset);
        for (int i = family_offset; i < tab_pos; i++) {
            if (byte_array[i] == ':' && qualifier_offset == 0) {
                family_length = i - family_offset;
                qualifier_offset = i + 1;
            }
        }
        // no qualifier
        if (qualifier_offset == 0)
            family_length = tab_pos - family_offset;
        else
            qualifier_length = tab_pos - qualifier_offset;

        offset = tab_pos + 1;
        value_offset = offset;
        value_length = len - value_offset;

        if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length,
                byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset,
                value_length, SerializedCellsFlag.FLAG_INSERT)) {
            mClient.mutator_set_cells_serialized(mMutator, mCellsWriter.buffer(), false);
            mCellsWriter.clear();
            if ((row_length + family_length + qualifier_length + value_length + 32) > mCellsWriter.capacity())
                mCellsWriter = new SerializedCellsWriter(
                        row_length + family_length + qualifier_length + value_length + 32);
            if (!mCellsWriter.add(byte_array, row_offset, row_length, byte_array, family_offset, family_length,
                    byte_array, qualifier_offset, qualifier_length, timestamp, byte_array, value_offset,
                    value_length, SerializedCellsFlag.FLAG_INSERT))
                throw new IOException("Unable to add cell to SerializedCellsWriter " + "(row='"
                        + new String(byte_array, row_offset, row_length, "UTF-8") + "'");
        }
    } catch (Exception e) {
        log.error(e);
        throw new IOException("Unable to write cell - " + e.toString());
    }
}

From source file:org.imageterrier.locfile.QLFSequenceFilesCollection.java

License:Mozilla Public License

protected int getMaxPathChars(List<URI> fl) {
    int max = 0;//from   w  w  w .ja  v a  2 s . c  om

    try {
        for (URI u : fl) {
            TextBytesSequenceFileUtility sf = new TextBytesSequenceFileUtility(u, true);
            for (Text t : sf.listKeys()) {
                if (t.getLength() > max)
                    max = t.getLength();
            }
        }
    } catch (IOException e) {
        throw new RuntimeException(e);
    }

    return (int) Math.max(1, Math.ceil(max));
}

From source file:org.loggo.search.cli.Search.java

License:Apache License

public void query() throws Exception {
    BatchScanner bs = conn.createBatchScanner(opts.table, Authorizations.EMPTY, 8);
    try {/*from ww w .  j a  v  a  2  s . c o m*/
        // Compute the user's date range, if any
        SimpleDateFormat sdf = new SimpleDateFormat(LogEntry.DATE_FORMAT);
        String startDate = "";
        if (opts.start != null) {
            startDate = sdf.format(new Date(opts.start));
        }
        String endDate = "9999";
        if (opts.end != null) {
            endDate = sdf.format(new Date(opts.end));
        }
        if (opts.start != null || opts.end != null) {
            // Set the date ranges for each shard
            List<Range> ranges = new ArrayList<>(Schema.SHARDS);
            for (int i = 0; i < Schema.SHARDS; i++) {
                Range r = new Range(String.format(ROW_FORMAT, i, startDate),
                        String.format(ROW_FORMAT, i, endDate));
                ranges.add(r);
            }
            bs.setRanges(ranges);
        } else {
            // full table scan
            bs.setRanges(Collections.singletonList(new Range()));
        }

        // Set the filter for applications and host
        int priority = 100;
        if (!opts.hosts.isEmpty() || !opts.applications.isEmpty()) {
            IteratorSetting is = new IteratorSetting(priority++, HostAndApplicationFilter.class);
            HostAndApplicationFilter.setApps(is, opts.applications);
            HostAndApplicationFilter.setHosts(is, opts.hosts);
            bs.addScanIterator(is);
        }
        // stack the iterators for multiple terms: each term must match to return results
        List<String> families = Arrays.asList(Schema.FAMILIES);
        if (!opts.terms.isEmpty()) {
            for (int i = 0; i < opts.terms.size(); i++) {
                String term = opts.terms.get(i);
                IteratorSetting is;
                if (opts.regexp) {
                    is = new IteratorSetting(priority++, RegExFilter.class);
                    RegExFilter.setRegexs(is, null, null, null, term, false);
                } else {
                    is = new IteratorSetting(priority++, "name" + i, GrepValueFilter.class);
                    GrepValueFilter.setTerm(is, term);
                    if (families.contains(term)) {
                        bs.fetchColumnFamily(new Text(term));
                    }
                }
                bs.addScanIterator(is);
            }
        }

        // Just get the count: don't bother returning whole records
        if (opts.count) {
            IteratorSetting is = new IteratorSetting(priority++, CountingIterator.class);
            bs.addScanIterator(is);
            long total = 0;
            for (Entry<Key, Value> entry : bs) {
                total += Long.parseLong(entry.getValue().toString());
            }
            printer.println(total);
            return;
        }

        // Get stats, not logs
        if (opts.duration != null) {
            final long duration = opts.duration;
            SimpleDateFormat fmt = new SimpleDateFormat(LogEntry.DATE_FORMAT);
            // Stats iterator pulls out counts by CF
            IteratorSetting is = new IteratorSetting(priority++, StatsIterator.class);
            StatsIterator.duration(is, opts.duration, TimeUnit.MILLISECONDS);
            bs.addScanIterator(is);
            // Group counts under the right "bucket" of time
            SortedMap<Long, Map<String, Long>> stats = new TreeMap<>();
            for (Entry<Key, Value> entry : bs) {
                Key key = entry.getKey();
                long ts = StatsIterator.getTs(key, fmt);
                // convert to start time for this bucket
                ts -= ts % duration;
                Map<String, Long> byCF = stats.get(ts);
                if (byCF == null) {
                    stats.put(ts, byCF = new TreeMap<>());
                }
                // Add values, by name given a string: "NAME:VALUE,NAME2:VALUE2"
                String value = entry.getValue().toString();
                if (!value.isEmpty()) {
                    String nameCounts[] = value.split(",");
                    for (String nameCount : nameCounts) {
                        String parts[] = nameCount.split(":");
                        Long current = byCF.get(parts[0]);
                        if (current == null) {
                            current = Long.decode(parts[1]);
                        } else {
                            current = Long.decode(parts[1]) + current.longValue();
                        }
                        byCF.put(parts[0], current);
                    }
                }
            }
            if (stats.isEmpty())
                return;
            // Use the range of the data, or a user specified range, if provided
            long start = stats.firstKey();
            long end = stats.lastKey();
            if (opts.start != null) {
                start = opts.start - (opts.start % duration);
            }
            if (opts.end != null) {
                end = opts.end - (opts.end % duration);
            }
            // Print a line for each bucket, even if there's no data
            for (long time = start; time <= end; time += duration) {
                Map<String, Long> byCF = stats.get(time);
                List<String> byCFList = new ArrayList<>();
                if (byCF != null) {
                    for (Entry<String, Long> entry : byCF.entrySet()) {
                        byCFList.add(String.format("%s: %d", entry.getKey(), entry.getValue()));
                    }
                }
                printer.println(
                        String.format("%s\t%s", fmt.format(new Date(time)), Joiner.on(", ").join(byCFList)));
            }
            return;
        }

        // Read the whole list for sorting. Unfortunately this means it has to fit into memory.
        ArrayList<Entry<Key, Value>> results = new ArrayList<Entry<Key, Value>>();
        for (Entry<Key, Value> entry : bs) {
            results.add(entry);
        }

        if (opts.sort || opts.reverse) {
            final int order = opts.reverse ? -1 : 1;
            Collections.sort(results, new Comparator<Entry<Key, Value>>() {
                @Override
                public int compare(Entry<Key, Value> o1, Entry<Key, Value> o2) {
                    Text row = o1.getKey().getRow();
                    Text row2 = o2.getKey().getRow();
                    return order * BytesWritable.Comparator.compareBytes(row.getBytes(), Schema.SHARD_LENGTH,
                            row.getLength() - Schema.SHARD_LENGTH, row2.getBytes(), Schema.SHARD_LENGTH,
                            row2.getLength() - Schema.SHARD_LENGTH);
                }
            });
        }
        for (Entry<Key, Value> entry : results) {
            String cq = entry.getKey().getColumnQualifier().toString();
            String parts[] = cq.split(Schema.APP_HOST_SEPARATOR);
            String row = entry.getKey().getRow().toString();
            String value = entry.getValue().toString();
            printer.println(String.format("%s\t%s\t%s\t%s", row.substring(Schema.SHARD_LENGTH), parts[0],
                    parts[1], value));
        }
    } finally {
        bs.close();
    }
}

From source file:org.mgrover.hive.translate.GenericUDFTranslate.java

License:Apache License

/**
 * Pre-process the from and to strings populate {@link #replacementMap} and {@link #deletionSet}.
 * /*from  w w w .j a va  2  s . c o m*/
 * @param from
 *          from string to be used for translation
 * @param to
 *          to string to be used for translation
 */
private void populateMappings(Text from, Text to) {
    replacementMap.clear();
    deletionSet.clear();

    ByteBuffer fromBytes = ByteBuffer.wrap(from.getBytes(), 0, from.getLength());
    ByteBuffer toBytes = ByteBuffer.wrap(to.getBytes(), 0, to.getLength());

    // Traverse through the from string, one code point at a time
    while (fromBytes.hasRemaining()) {
        // This will also move the iterator ahead by one code point
        int fromCodePoint = Text.bytesToCodePoint(fromBytes);
        // If the to string has more code points, make sure to traverse it too
        if (toBytes.hasRemaining()) {
            int toCodePoint = Text.bytesToCodePoint(toBytes);
            // If the code point from from string already has a replacement or is to be deleted, we
            // don't need to do anything, just move on to the next code point
            if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) {
                continue;
            }
            replacementMap.put(fromCodePoint, toCodePoint);
        } else {
            // If the code point from from string already has a replacement or is to be deleted, we
            // don't need to do anything, just move on to the next code point
            if (replacementMap.containsKey(fromCodePoint) || deletionSet.contains(fromCodePoint)) {
                continue;
            }
            deletionSet.add(fromCodePoint);
        }
    }
}

From source file:org.mgrover.hive.translate.GenericUDFTranslate.java

License:Apache License

/**
 * Translates the input string based on {@link #replacementMap} and {@link #deletionSet} and
 * returns the translated string.//  w  ww.  ja va2s  .  com
 * 
 * @param input
 *          input string to perform the translation on
 * @return translated string
 */
private String processInput(Text input) {
    StringBuilder resultBuilder = new StringBuilder();
    // Obtain the byte buffer from the input string so we can traverse it code point by code point
    ByteBuffer inputBytes = ByteBuffer.wrap(input.getBytes(), 0, input.getLength());
    // Traverse the byte buffer containing the input string one code point at a time
    while (inputBytes.hasRemaining()) {
        int inputCodePoint = Text.bytesToCodePoint(inputBytes);
        // If the code point exists in deletion set, no need to emit out anything for this code point.
        // Continue on to the next code point
        if (deletionSet.contains(inputCodePoint)) {
            continue;
        }

        Integer replacementCodePoint = replacementMap.get(inputCodePoint);
        // If a replacement exists for this code point, emit out the replacement and append it to the
        // output string. If no such replacement exists, emit out the original input code point
        char[] charArray = Character
                .toChars((replacementCodePoint != null) ? replacementCodePoint : inputCodePoint);
        resultBuilder.append(charArray);
    }
    String resultString = resultBuilder.toString();
    return resultString;
}

From source file:org.mrgeo.data.accumulo.output.image.AccumuloMrsImagePyramidOutputFormatProvider.java

License:Apache License

@Override
public void setupJob(final Job job) throws DataProviderException {
    try {// ww w  .ja  va 2 s. c  o  m
        //TODO: there is an assumption here that the output is going to accumulo directly - not bulk
        super.setupJob(job);

        job.getConfiguration().addResource(AccumuloConnector.getAccumuloPropertiesLocation());

        // zoom level - output zoom level
        zoomLevel = context.getZoomlevel();
        //      zoomLevel = job.getConfiguration().getInt("zoomlevel", 0);
        if (zoomLevel != 0) {
            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL,
                    Integer.toString(zoomLevel));
        }

        //job.getConfiguration().set("zoomLevel", Integer.toString(zoomLevel));
        if (doBulk) {
            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE,
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK);
            job.getConfiguration().set(
                    MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel),
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK);
        } else {
            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE,
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT);
            job.getConfiguration().set(
                    MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel),
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT);

        }
        Properties props = AccumuloConnector.getAccumuloProperties();
        if (props != null) {

            // this used to be the variable "name" in TiledOutputFormatContext, but was always "".
            String enc = AccumuloConnector.encodeAccumuloProperties("");
            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_RESOURCE, enc);

            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE,
                    props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE));
            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS,
                    props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS));

            if (props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE) == null) {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, this.table);
            } else {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE,
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE));
            }

            // username and password
            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER,
                    props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER));

            // make sure the password is set with Base64Encoding
            String pw = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD);
            String isEnc = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, "false");

            if (isEnc.equalsIgnoreCase("true")) {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD,
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD));
            } else {
                byte[] p = Base64.encodeBase64(
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD).getBytes());

                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD, new String(p));
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64,
                        new String("true"));
            }

            if (job.getConfiguration().get(MrGeoConstants.MRGEO_PROTECTION_LEVEL) != null) {
                cv = new ColumnVisibility(job.getConfiguration().get(MrGeoConstants.MRGEO_PROTECTION_LEVEL));
            }
            if (cv == null) {

                if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)) {

                    job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ,
                            props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));

                    cv = new ColumnVisibility(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));

                }

            } else {
                job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ,
                        new String(cv.getExpression()));
            }

        }

        if (doBulk) {

            LongRectangle outTileBounds = tileBounds.toLongRectangle();

            // setup the output for the job
            if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR)) {
                workDir = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR);
                if (workDir != null) {
                    workDir += File.separator;
                }
            } else {
                workDir = "";
            }
            workDir += AccumuloMrsImagePyramidFileOutputFormat.class.getSimpleName() + File.separator
                    + this.table + File.separator;// +
            //            System.currentTimeMillis() +
            //            File.separator;

            // delete the work dir if possible
            //        Path wd = new Path(workDir);
            //        FileSystem fs = HadoopFileUtils.getFileSystem(wd);        
            //        if (fs.exists(wd))
            //        {
            //          fs.delete(wd, false);
            //        }

            job.getConfiguration().set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR, workDir);

            // determine the starting points for the splits
            ArrayList<Pair<Long, Long>> splitPoints = new ArrayList<Pair<Long, Long>>();

            // think about the multiple levels and creating other splits!!!

            long step = bulkThreshold / outTileBounds.getWidth();
            long rem = bulkThreshold % outTileBounds.getWidth();
            if (rem > 0) {
                step++;
            }
            for (long y = outTileBounds.getMinY(); y <= outTileBounds.getMaxY(); y += step) {
                Pair<Long, Long> cur = new Pair<Long, Long>(outTileBounds.getMinX(), y);
                splitPoints.add(cur);
            }

            // we now have our list of split points
            // now build the splits file!!!
            FileSystem fs = null;
            //FileSystem.get(job.getConfiguration());
            PrintStream out = null;

            try {
                Path wd = new Path(workDir);
                fs = FileSystem.get(job.getConfiguration());
                if (fs.exists(wd)) {
                    fs.delete(wd, true);
                }

                out = new PrintStream(new BufferedOutputStream(fs.create(new Path(workDir + "splits.txt"))));

                for (Pair<Long, Long> p : splitPoints) {
                    long split = TMSUtils.tileid(p.getFirst(), p.getSecond(), zoomLevel);
                    //TileIdWritable t = new TileIdWritable(split);
                    Text t = new Text(longToBytes(split));
                    out.println(new String(Base64.encodeBase64(TextUtil.getBytes(t))));
                    log.debug("Point: " + p.getFirst() + "\t" + p.getSecond() + "\t" + split + "\t"
                            + t.getLength());
                }

                job.setNumReduceTasks(splitPoints.size() + 1);
                out.close();

                job.setPartitionerClass(AccumuloMrGeoRangePartitioner.class);
                AccumuloMrGeoRangePartitioner.setSplitFile(job, workDir + "splits.txt");

            } catch (IOException ioe) {
                ioe.printStackTrace();
                throw new DataProviderException(
                        "Problem creating output splits.txt for bulk ingest directory.");
            }

            job.setOutputFormatClass(AccumuloMrsImagePyramidFileOutputFormat.class);

            AccumuloMrsImagePyramidFileOutputFormat.setOutputPath(job, new Path(workDir + "files"));
            //AccumuloMrsImagePyramidFileOutputFormat.setZoomLevel(zoomLevel);

        } else {

            log.info("Setting the output format of: "
                    + AccumuloMrsImagePyramidOutputFormat.class.getCanonicalName());

            job.setOutputFormatClass(AccumuloMrsImagePyramidOutputFormat.class);
            AccumuloMrsImagePyramidOutputFormat.setJob(job);

            log.info("Setting zoom level to " + zoomLevel);
            log.info("Visibility is " + cv.toString());
            log.info("Setting the number of reducers to " + MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS);
            job.setNumReduceTasks(MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS);
        }

        job.setOutputKeyClass(TileIdWritable.class);
        job.setOutputValueClass(RasterWritable.class);

    } catch (IOException ioe) {
        throw new DataProviderException("Error running job setup", ioe);
    }

}

From source file:org.mrgeo.data.accumulo.output.image.AccumuloMrsPyramidOutputFormatProvider.java

License:Apache License

@SuppressWarnings("squid:S2095") // hadoop FileSystem cannot be closed, or else subsequent uses will fail
private void setupConfig(final Configuration conf, final Job job) throws DataProviderException {
    try {//w ww. j  av a  2s .co m
        // zoom level - output zoom level
        zoomLevel = context.getZoomLevel();
        //      zoomLevel = conf.getInt("zoomlevel", 0);
        if (zoomLevel != 0) {
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOMLEVEL, Integer.toString(zoomLevel));
        }

        //conf.set("zoomLevel", Integer.toString(zoomLevel));
        if (doBulk || forceBulk) {
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE,
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK);
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel),
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_BULK);
        } else {
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_JOBTYPE,
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT);
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PREFIX + Integer.toString(zoomLevel),
                    MrGeoAccumuloConstants.MRGEO_ACC_VALUE_JOB_DIRECT);

        }
        Properties props = AccumuloConnector.getAccumuloProperties();

        // this used to be the variable "name" in ImageOutputFormatContext, but was always "".
        String enc = AccumuloConnector.encodeAccumuloProperties("");
        conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_RESOURCE, enc);

        //        conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE,
        //                 props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_INSTANCE));
        //        conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS,
        //                 props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_ZOOKEEPERS));

        if (props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE) == null) {
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE, this.table);
        } else {
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE,
                    props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_OUTPUT_TABLE));
        }

        //        // username and password
        //        conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER,
        //                 props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_USER));
        //
        //        // make sure the password is set with Base64Encoding
        //        String pw = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD);
        //        String isEnc = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64, "false");
        //
        //        if(isEnc.equalsIgnoreCase("true")){
        //          conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD,
        //                   props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD));
        //        } else {
        //          byte[] p = Base64.encodeBase64(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD).getBytes());
        //
        //          conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PASSWORD,
        //                   new String(p));
        //          conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_PWENCODED64,
        //                   new String("true"));
        //        }

        if (conf.get(MrGeoConstants.MRGEO_PROTECTION_LEVEL) != null) {
            cv = new ColumnVisibility(conf.get(MrGeoConstants.MRGEO_PROTECTION_LEVEL));
        }
        if (cv == null) {

            if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ)) {

                conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ,
                        props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));

                cv = new ColumnVisibility(props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ));

            }

        } else {
            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_VIZ, new String(cv.getExpression()));
        }

        if (doBulk || forceBulk) {

            LongRectangle outTileBounds = tileBounds.toLongRectangle();

            // setup the output for the job
            if (props.containsKey(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR)) {
                workDir = props.getProperty(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR);
                if (workDir != null) {
                    workDir += File.separator;
                }
            } else {
                workDir = "";
            }
            workDir += AccumuloMrsPyramidFileOutputFormat.class.getSimpleName() + File.separator + this.table
                    + File.separator;// +
            //            System.currentTimeMillis() +
            //            File.separator;

            // delete the work dir if possible
            Path wd = new Path(workDir);
            FileSystem fs = FileSystem.get(conf);
            if (fs.exists(wd)) {
                fs.delete(wd, true);
            }

            conf.set(MrGeoAccumuloConstants.MRGEO_ACC_KEY_WORKDIR, workDir);

            if (job != null) {
                // determine the starting points for the splits
                ArrayList<Pair<Long, Long>> splitPoints = new ArrayList<Pair<Long, Long>>();

                // think about the multiple levels and creating other splits!!!

                long step = bulkThreshold / outTileBounds.getWidth();
                long rem = bulkThreshold % outTileBounds.getWidth();
                if (rem > 0) {
                    step++;
                }
                for (long y = outTileBounds.getMinY(); y <= outTileBounds.getMaxY(); y += step) {
                    Pair<Long, Long> cur = new Pair<Long, Long>(outTileBounds.getMinX(), y);
                    splitPoints.add(cur);
                }

                // we now have our list of split points
                // now build the splits file!!!
                try (BufferedOutputStream bos = new BufferedOutputStream(
                        fs.create(new Path(workDir + "splits.txt")))) {
                    try (PrintStream out = new PrintStream(bos)) {
                        for (Pair<Long, Long> p : splitPoints) {
                            long split = TMSUtils.tileid(p.getFirst(), p.getSecond(), zoomLevel);
                            //TileIdWritable t = new TileIdWritable(split);
                            Text t = new Text(longToBytes(split));
                            out.println(Base64Utils.encodeObject(t.toString()));
                            log.debug("Point: " + p.getFirst() + "\t" + p.getSecond() + "\t" + split + "\t"
                                    + t.getLength());
                        }

                        job.setNumReduceTasks(splitPoints.size() + 1);
                        out.close();

                        job.setPartitionerClass(AccumuloMrGeoRangePartitioner.class);
                        AccumuloMrGeoRangePartitioner.setSplitFile(job, workDir + "splits.txt");

                    }
                } catch (IOException ioe) {
                    throw new DataProviderException(
                            "Problem creating output splits.txt for bulk ingest directory.", ioe);
                }

                job.setOutputFormatClass(AccumuloMrsPyramidFileOutputFormat.class);
            }
            Path workFilesPath = new Path(workDir + "files");
            if (job != null) {
                AccumuloMrsPyramidFileOutputFormat.setOutputPath(job, workFilesPath);
                //AccumuloMrsPyramidFileOutputFormat.setZoomLevel(zoomLevel);
            } else {
                Path outputDir = workFilesPath.getFileSystem(conf).makeQualified(workFilesPath);
                //          conf.set(AccumuloMrsPyramidFileOutputFormat.OUTDIR, outputDir.toString());
                conf.set("mapred.output.dir", outputDir.toString());
                conf.set("mapreduce.output.fileoutputformat.outputdir", outputDir.toString());
            }

        } else {
            if (job != null) {
                log.info("Setting the output format of: "
                        + AccumuloMrsPyramidOutputFormat.class.getCanonicalName());

                job.setOutputFormatClass(AccumuloMrsPyramidOutputFormat.class);
                AccumuloMrsPyramidOutputFormat.setJob(job);

                log.info("Setting zoom level to " + zoomLevel);
                log.info("Visibility is " + cv.toString());
                log.info("Setting the number of reducers to "
                        + MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS);
                job.setNumReduceTasks(MrGeoAccumuloConstants.MRGEO_DEFAULT_NUM_REDUCERS);
            }
        }

        if (job != null) {
            job.setOutputKeyClass(TileIdWritable.class);
            job.setOutputValueClass(RasterWritable.class);
        }

    } catch (IOException ioe) {
        throw new DataProviderException("Error running job setup", ioe);
    }

}

From source file:org.mrgeo.data.accumulo.utils.AccumuloUtils.java

License:Apache License

/**
 * Convert a Text object of a tileId to a back to a long.
 *
 * @param rowId Text object to convert./*from ww w  .j a v  a  2 s  .co m*/
 * @return the long value from the Text object.
 */
public static long toLong(Text rowId) {

    byte[] outB = new byte[8];
    for (int x = 0; x < outB.length; x++) {
        if (x >= rowId.getLength()) {
            outB[x] = 0x0;
        } else {
            outB[x] = rowId.getBytes()[x];
        }
    }

    return ByteBuffer.wrap(outB).getLong();
}

From source file:org.mrgeo.vector.mrsvector.OSMTileIngester.java

License:Apache License

static byte[] getBytes(final Text text) {
    byte[] bytes = text.getBytes();
    if (text.getLength() == bytes.length) {
        return bytes;
    }/* w w w . java  2s .  c  o m*/

    return Arrays.copyOf(bytes, text.getLength());
}

From source file:org.platform.modules.hadoop.format.output.CustomOutputFormat.java

License:Apache License

/**
 * create the final out file, and output row by row. After one row is
 * appended, a configured row separator is appended
 * //from  w  ww .  j  a  v a2  s.  c  o m
 * @param jc
 *          the job configuration file
 * @param outPath
 *          the final output file to be created
 * @param valueClass
 *          the value class used for create
 * @param isCompressed
 *          whether the content is compressed or not
 * @param tableProperties
 *          the tableProperties of this file's corresponding table
 * @param progress
 *          progress used for status report
 * @return the RecordWriter
 */
@Override
public RecordWriter getHiveRecordWriter(JobConf jc, Path outPath, Class<? extends Writable> valueClass,
        boolean isCompressed, Properties tableProperties, Progressable progress) throws IOException {
    int rowSeparator = 0;
    String rowSeparatorString = tableProperties.getProperty(serdeConstants.LINE_DELIM, "\n");
    try {
        rowSeparator = Byte.parseByte(rowSeparatorString);
    } catch (NumberFormatException e) {
        rowSeparator = rowSeparatorString.charAt(0);
    }

    final int finalRowSeparator = rowSeparator;
    FileSystem fs = outPath.getFileSystem(jc);
    final OutputStream outStream = Utilities.createCompressedStream(jc, fs.create(outPath), isCompressed);
    return new RecordWriter() {
        @SuppressWarnings("deprecation")
        public void write(Writable r) throws IOException {
            if (r instanceof Text) {
                Text tr = (Text) r;
                String strReplace = tr.toString().toLowerCase().replace(":", "::");
                Text txtReplace = new Text();
                txtReplace.set(strReplace);
                outStream.write(txtReplace.getBytes(), 0, txtReplace.getLength());
                //          outStream.write(tr.getBytes(), 0, tr.getLength());
                outStream.write(finalRowSeparator);
            } else {
                // DynamicSerDe always writes out BytesWritable
                BytesWritable bw = (BytesWritable) r;
                outStream.write(bw.get(), 0, bw.getSize());
                outStream.write(finalRowSeparator);
            }
        }

        public void close(boolean abort) throws IOException {
            outStream.close();
        }
    };
}