List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:org.apache.accumulo.examples.mapreduce.TableToFile.java
License:Apache License
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException { Job job = Job.getInstance(getConf()); job.setJobName(this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(getClass().getName(), args); job.setInputFormatClass(AccumuloInputFormat.class); opts.setAccumuloConfigs(job);/*from w w w . j a v a2 s.co m*/ HashSet<Pair<Text, Text>> columnsToFetch = new HashSet<>(); for (String col : opts.columns.split(",")) { int idx = col.indexOf(":"); Text cf = new Text(idx < 0 ? col : col.substring(0, idx)); Text cq = idx < 0 ? null : new Text(col.substring(idx + 1)); if (cf.getLength() > 0) columnsToFetch.add(new Pair<>(cf, cq)); } if (!columnsToFetch.isEmpty()) AccumuloInputFormat.fetchColumns(job, columnsToFetch); job.setMapperClass(TTFMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(opts.output)); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.examples.simple.filedata.KeyUtil.java
License:Apache License
/** * Split a text object using a null byte separator into an array of strings. * /*w w w . j a v a 2 s .co m*/ * @param t * null-byte separated text object * @return an array of strings */ public static String[] splitNullSepText(Text t) { ArrayList<String> s = new ArrayList<String>(); byte[] b = t.getBytes(); int lastindex = 0; for (int i = 0; i < t.getLength(); i++) { if (b[i] == (byte) 0) { s.add(new String(b, lastindex, i - lastindex)); lastindex = i + 1; } } s.add(new String(b, lastindex, t.getLength() - lastindex)); return s.toArray(new String[s.size()]); }
From source file:org.apache.accumulo.examples.simple.mapreduce.RowHash.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = JobUtil.getJob(getConf()); job.setJobName(this.getClass().getName()); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(RowHash.class.getName(), args); job.setInputFormatClass(AccumuloInputFormat.class); opts.setAccumuloConfigs(job);/*w ww. j a va 2 s .co m*/ String col = opts.column; int idx = col.indexOf(":"); Text cf = new Text(idx < 0 ? col : col.substring(0, idx)); Text cq = idx < 0 ? null : new Text(col.substring(idx + 1)); if (cf.getLength() > 0) AccumuloInputFormat.fetchColumns(job, Collections.singleton(new Pair<Text, Text>(cf, cq))); job.setMapperClass(HashDataMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Mutation.class); job.setNumReduceTasks(0); job.setOutputFormatClass(AccumuloOutputFormat.class); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.examples.simple.mapreduce.TableToFile.java
License:Apache License
@Override public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, AccumuloSecurityException { Job job = JobUtil.getJob(getConf()); job.setJobName(this.getClass().getSimpleName() + "_" + System.currentTimeMillis()); job.setJarByClass(this.getClass()); Opts opts = new Opts(); opts.parseArgs(getClass().getName(), args); job.setInputFormatClass(AccumuloInputFormat.class); opts.setAccumuloConfigs(job);//w ww. j ava2 s. c om HashSet<Pair<Text, Text>> columnsToFetch = new HashSet<Pair<Text, Text>>(); for (String col : opts.columns.split(",")) { int idx = col.indexOf(":"); Text cf = new Text(idx < 0 ? col : col.substring(0, idx)); Text cq = idx < 0 ? null : new Text(col.substring(idx + 1)); if (cf.getLength() > 0) columnsToFetch.add(new Pair<Text, Text>(cf, cq)); } if (!columnsToFetch.isEmpty()) AccumuloInputFormat.fetchColumns(job, columnsToFetch); job.setMapperClass(TTFMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Text.class); job.setNumReduceTasks(0); job.setOutputFormatClass(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, new Path(opts.output)); job.waitForCompletion(true); return job.isSuccessful() ? 0 : 1; }
From source file:org.apache.accumulo.examples.wikisearch.iterator.FieldIndexIterator.java
License:Apache License
public boolean jump(Key jumpKey) throws IOException { if (log.isDebugEnabled()) { String pEndRow = "empty"; if (parentEndRow != null) { pEndRow = parentEndRow.toString(); }//from w ww. j a va2s . c om log.debug("jump, current range: " + range + " parentEndRow is: " + pEndRow); } if (parentEndRow != null && jumpKey.getRow().compareTo(parentEndRow) > 0) { // can't go there. if (log.isDebugEnabled()) { log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); } return false; } int comp; if (!this.hasTop()) { if (log.isDebugEnabled()) { log.debug("current row: " + this.currentRow); } /* * if I don't have a top, then I should be out of my range for my current row. Need to check parent range to see if I'm supposed to continue to next row * or not. Current row can be null because maybe I never found anything in this row. */ if (parentEndRow != null) { // if jumpKey row is greater than parentEndRow, stop if (jumpKey.getRow().compareTo(parentEndRow) > 0) { if (log.isDebugEnabled()) { log.debug("jumpKey row is greater than my parentEndRow, done"); } return false; } // if my current row is null, I must have hit the end of the tablet if (currentRow == null) { if (log.isDebugEnabled()) { log.debug("I have parentEndRow, but no current row, must have hit end of tablet, done"); } return false; } // if my current row is greater than jump row stop, a seek will be // called to get me going again. If my row is equal, but i don't // have a topkey, i'm done if (currentRow.compareTo(jumpKey.getRow()) >= 0) { if (log.isDebugEnabled()) { log.debug("I have parentEndRow, but topKey, and my currentRow is >= jumpRow, done"); } return false; } } else { // we're allowed to go to the end of the tablet // if my current row is null, I must have hit the end of the tablet if (currentRow == null) { if (log.isDebugEnabled()) { log.debug("no parentEndRow and current Row is null, must have hit end of tablet, done"); } return false; } if (currentRow.compareTo(jumpKey.getRow()) >= 0) { // i'm past or equal to the jump point and have no top, // jumping's not going to help if (log.isDebugEnabled()) { log.debug("no parentEndRow, no topKey, and currentRow is >= jumpRow, done"); } return false; } } // ok, jumpKey is ahead of me I'll mark it and allow the normal // flow to jump there and see if I have top. if (log.isDebugEnabled()) { log.debug("no topKey, but jumpRow is ahead and I'm allowed to go to it, marking"); } comp = -1; } else { // I have a topKey, I can do the normal comparisons if (log.isDebugEnabled()) { log.debug("have top, can do normal comparisons"); } comp = this.topKey.getRow().compareTo(jumpKey.getRow()); } // ------------------ // compare rows if (comp > 0) { // my row is ahead of jump key if (canBeInNextRow()) { if (log.isDebugEnabled()) { log.debug("I'm ahead of jump row & it's ok."); log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow: " + parentEndRow); } return true; } else { if (log.isDebugEnabled()) { log.debug("I'm ahead of jump row & can't be here, or at end of tablet."); } topKey = null; topValue = null; return false; } } else if (comp < 0) { // a row behind jump key, need to move forward if (log.isDebugEnabled()) { String myRow = ""; if (hasTop()) { myRow = topKey.getRow().toString(); } else if (currentRow != null) { myRow = currentRow.toString(); } log.debug("My row " + myRow + " is less than jump row: " + jumpKey.getRow() + " seeking"); } range = buildRange(jumpKey.getRow()); // this.seek(range, EMPTY_COL_FAMS, false); boolean success = jumpSeek(range); if (log.isDebugEnabled() && success) { log.debug("uid forced jump, found topKey: " + topKey); } if (!this.hasTop()) { log.debug("seeked with new row and had no top"); topKey = null; topValue = null; return false; } else if (parentEndRow != null && currentRow.compareTo(parentEndRow) > 0) { if (log.isDebugEnabled()) { log.debug("myRow: " + getTopKey().getRow() + " is past parentEndRow: " + parentEndRow); } topKey = null; topValue = null; return false; } if (log.isDebugEnabled()) { log.debug("jumped, valid top: " + getTopKey()); } return true; } else { // rows are equal, check the uid! keyParser.parse(topKey); String myUid = keyParser.getUid(); keyParser.parse(jumpKey); String jumpUid = keyParser.getUid(); int ucomp = myUid.compareTo(jumpUid); if (log.isDebugEnabled()) { log.debug("topKeyUid: " + myUid + " jumpUid: " + jumpUid + " myUid.compareTo(jumpUid)->" + ucomp); } if (ucomp < 0) { // need to move up log.debug("my uid is less than jumpUid, topUid: " + myUid + " jumpUid: " + jumpUid); Text cq = jumpKey.getColumnQualifier(); int index = cq.find(NULL_BYTE); if (0 <= index) { cq.set(cq.getBytes(), index + 1, cq.getLength() - index - 1); } else { log.error("Expected a NULL separator in the column qualifier"); this.topKey = null; this.topValue = null; return false; } // note my internal range stays the same, I just need to move forward Key startKey = new Key(topKey.getRow(), fName, new Text(fValue + NULL_BYTE + cq)); Key endKey = new Key(topKey.getRow(), fName, new Text(fValue + ONE_BYTE)); range = new Range(startKey, true, endKey, false); log.debug("Using range: " + range + " to seek"); // source.seek(range, EMPTY_COL_FAMS, false); boolean success = jumpSeek(range); if (log.isDebugEnabled() && success) { log.debug("uid forced jump, found topKey: " + topKey); } return success; } else { // else do nothing log.debug("my uid is greater than jumpUid, topKey: " + topKey + " jumpKey: " + jumpKey); log.debug("doing nothing"); } } return hasTop(); }
From source file:org.apache.accumulo.pig.AccumuloStorage.java
License:Apache License
/** * Takes a tuple and turns it into a mutation to write out to a table *//*from www. ja v a 2 s .c om*/ public Collection<Mutation> getMutations(Tuple tuple) throws ExecException, IOException { Mutation mut = new Mutation(StorageUtils.objToText(tuple.get(0))); Text cf = StorageUtils.objToText(tuple.get(1)); Text cq = StorageUtils.objToText(tuple.get(2)); if (tuple.size() > 4) { Text cv = StorageUtils.objToText(tuple.get(3)); Value val = new Value(StorageUtils.objToBytes(tuple.get(4))); if (cv.getLength() == 0) { mut.put(cf, cq, val); } else { mut.put(cf, cq, new ColumnVisibility(cv), val); } } else { Value val = new Value(StorageUtils.objToBytes(tuple.get(3))); mut.put(cf, cq, val); } return Collections.singleton(mut); }
From source file:org.apache.accumulo.server.tabletserver.Tablet.java
License:Apache License
private SplitRowSpec findSplitRow(Collection<FileRef> files) { // never split the root tablet // check if we already decided that we can never split // check to see if we're big enough to split long splitThreshold = acuTableConf.getMemoryInBytes(Property.TABLE_SPLIT_THRESHOLD); if (extent.isRootTablet() || estimateTabletSize() <= splitThreshold) { return null; }//from ww w . j av a 2 s .c om // have seen a big row before, do not bother checking unless a minor compaction or map file import has occurred. if (sawBigRow) { if (timeOfLastMinCWhenBigFreakinRowWasSeen != lastMinorCompactionFinishTime || timeOfLastImportWhenBigFreakinRowWasSeen != lastMapFileImportTime) { // a minor compaction or map file import has occurred... check again sawBigRow = false; } else { // nothing changed, do not split return null; } } SortedMap<Double, Key> keys = null; try { // we should make .25 below configurable keys = FileUtil.findMidPoint(fs, tabletServer.getSystemConfiguration(), extent.getPrevEndRow(), extent.getEndRow(), files, .25); } catch (IOException e) { log.error("Failed to find midpoint " + e.getMessage()); return null; } // check to see if one row takes up most of the tablet, in which case we can not split try { Text lastRow; if (extent.getEndRow() == null) { Key lastKey = (Key) FileUtil.findLastKey(fs, tabletServer.getSystemConfiguration(), files); lastRow = lastKey.getRow(); } else { lastRow = extent.getEndRow(); } // check to see that the midPoint is not equal to the end key if (keys.get(.5).compareRow(lastRow) == 0) { if (keys.firstKey() < .5) { Key candidate = keys.get(keys.firstKey()); if (candidate.compareRow(lastRow) != 0) { // we should use this ratio in split size estimations if (log.isTraceEnabled()) log.trace(String.format( "Splitting at %6.2f instead of .5, row at .5 is same as end row%n", keys.firstKey())); return new SplitRowSpec(keys.firstKey(), candidate.getRow()); } } log.warn("Cannot split tablet " + extent + " it contains a big row : " + lastRow); sawBigRow = true; timeOfLastMinCWhenBigFreakinRowWasSeen = lastMinorCompactionFinishTime; timeOfLastImportWhenBigFreakinRowWasSeen = lastMapFileImportTime; return null; } Key mid = keys.get(.5); Text text = (mid == null) ? null : mid.getRow(); SortedMap<Double, Key> firstHalf = keys.headMap(.5); if (firstHalf.size() > 0) { Text beforeMid = firstHalf.get(firstHalf.lastKey()).getRow(); Text shorter = new Text(); int trunc = longestCommonLength(text, beforeMid); shorter.set(text.getBytes(), 0, Math.min(text.getLength(), trunc + 1)); text = shorter; } return new SplitRowSpec(.5, text); } catch (IOException e) { // don't split now, but check again later log.error("Failed to find lastkey " + e.getMessage()); return null; } }
From source file:org.apache.accumulo.server.tabletserver.Tablet.java
License:Apache License
private static int longestCommonLength(Text text, Text beforeMid) { int common = 0; while (common < text.getLength() && common < beforeMid.getLength() && text.getBytes()[common] == beforeMid.getBytes()[common]) { common++;/*from w ww. j av a 2 s .c o m*/ } return common; }
From source file:org.apache.accumulo.server.test.performance.scan.CollectTabletStats.java
License:Apache License
private static void calcTabletStats(Connector conn, String table, String[] auths, int batchSize, KeyExtent ke, String[] columns) throws Exception { // long t1 = System.currentTimeMillis(); Scanner scanner = conn.createScanner(table, new Authorizations(auths)); scanner.setBatchSize(batchSize);//from w w w . j a va2 s . c om scanner.setRange(new Range(ke.getPrevEndRow(), false, ke.getEndRow(), true)); for (String c : columns) { scanner.fetchColumnFamily(new Text(c)); } Stat rowLen = new Stat(); Stat cfLen = new Stat(); Stat cqLen = new Stat(); Stat cvLen = new Stat(); Stat valLen = new Stat(); Stat colsPerRow = new Stat(); Text lastRow = null; int colsPerRowCount = 0; for (Entry<Key, Value> entry : scanner) { Key key = entry.getKey(); Text row = key.getRow(); if (lastRow == null) { lastRow = row; } if (!lastRow.equals(row)) { colsPerRow.addStat(colsPerRowCount); lastRow = row; colsPerRowCount = 0; } colsPerRowCount++; rowLen.addStat(row.getLength()); cfLen.addStat(key.getColumnFamilyData().length()); cqLen.addStat(key.getColumnQualifierData().length()); cvLen.addStat(key.getColumnVisibilityData().length()); valLen.addStat(entry.getValue().get().length); } synchronized (System.out) { System.out.println(""); System.out.println("\tTablet " + ke.getUUID() + " statistics : "); printStat("Row length", rowLen); printStat("Column family length", cfLen); printStat("Column qualifier length", cqLen); printStat("Column visibility length", cvLen); printStat("Value length", valLen); printStat("Columns per row", colsPerRow); System.out.println(""); } }
From source file:org.apache.accumulo.server.util.VerifyTabletAssignments.java
License:Apache License
private static void checkTabletServer(ClientContext context, Entry<HostAndPort, List<KeyExtent>> entry, HashSet<KeyExtent> failures) throws ThriftSecurityException, TException, NoSuchScanIDException { TabletClientService.Iface client = ThriftUtil.getTServerClient(entry.getKey(), context); Map<TKeyExtent, List<TRange>> batch = new TreeMap<>(); for (KeyExtent keyExtent : entry.getValue()) { Text row = keyExtent.getEndRow(); Text row2 = null;/*w w w . j ava 2s .c om*/ if (row == null) { row = keyExtent.getPrevEndRow(); if (row != null) { row = new Text(row); row.append(new byte[] { 'a' }, 0, 1); } else { row = new Text("1234567890"); } row2 = new Text(row); row2.append(new byte[] { '!' }, 0, 1); } else { row = new Text(row); row2 = new Text(row); row.getBytes()[row.getLength() - 1] = (byte) (row.getBytes()[row.getLength() - 1] - 1); } Range r = new Range(row, true, row2, false); batch.put(keyExtent.toThrift(), Collections.singletonList(r.toThrift())); } TInfo tinfo = Tracer.traceInfo(); Map<String, Map<String, String>> emptyMapSMapSS = Collections.emptyMap(); List<IterInfo> emptyListIterInfo = Collections.emptyList(); List<TColumn> emptyListColumn = Collections.emptyList(); InitialMultiScan is = client.startMultiScan(tinfo, context.rpcCreds(), batch, emptyListColumn, emptyListIterInfo, emptyMapSMapSS, Authorizations.EMPTY.getAuthorizationsBB(), false, null, 0L, null, null); if (is.result.more) { MultiScanResult result = client.continueMultiScan(tinfo, is.scanID); checkFailures(entry.getKey(), failures, result); while (result.more) { result = client.continueMultiScan(tinfo, is.scanID); checkFailures(entry.getKey(), failures, result); } } client.closeMultiScan(tinfo, is.scanID); ThriftUtil.returnClient((TServiceClient) client); }