List of usage examples for org.apache.hadoop.io Text getLength
@Override public int getLength()
From source file:com.uber.hoodie.common.table.timeline.HoodieArchivedTimeline.java
License:Apache License
public HoodieArchivedTimeline(HoodieTableMetaClient metaClient) { // Read back the commits to make sure Path archiveLogPath = HoodieArchivedTimeline.getArchiveLogPath(metaClient.getArchivePath()); try (SequenceFile.Reader reader = new SequenceFile.Reader(metaClient.getHadoopConf(), SequenceFile.Reader.file(archiveLogPath))) { Text key = new Text(); Text val = new Text(); while (reader.next(key, val)) { // TODO - limit the number of commits loaded in memory. this could get very large. // This is okay because only tooling will load the archived commit timeline today readCommits.put(key.toString(), Arrays.copyOf(val.getBytes(), val.getLength())); }/*www .j a v a 2 s .c om*/ this.setInstants(readCommits.keySet().stream() .map(s -> new HoodieInstant(false, HoodieTimeline.COMMIT_ACTION, s)) .collect(Collectors.toList())); } catch (IOException e) { throw new HoodieIOException("Could not load archived commit timeline from path " + archiveLogPath, e); } // multiple casts will make this lambda serializable - // http://docs.oracle.com/javase/specs/jls/se8/html/jls-15.html#jls-15.16 this.details = (Function<HoodieInstant, Optional<byte[]>> & Serializable) this::getInstantDetails; this.metaClient = metaClient; }
From source file:com.yahoo.glimmer.indexing.generator.DocumentMapper.java
License:Open Source License
@Override public void map(LongWritable key, Text record, Context context) throws IOException, InterruptedException { doc.setContent(record.getBytes(), record.getLength()); if (doc == null || doc.getSubject() == null) { // Failed parsing context.getCounter(Counters.FAILED_PARSING).increment(1); LOG.error("Document failed parsing"); return;//w ww .j a va2 s. c o m } if (doc.getId() < 0) { throw new IllegalStateException("Negative docId:" + doc.getId() + " subject:" + doc.getSubject()); } // This is used to write the position of the last occurrence and testing // if the fakeDocOccurrrence for the term has already been written. Map<String, DocStat> termToDocStatMap = new HashMap<String, DocStat>(); // Iterate over all indices for (int indexId = 0; indexId < fields.length; indexId++) { String fieldName = fields[indexId]; if (fieldName.startsWith("NOINDEX")) { continue; } TermValue indexIdValue = new TermValue(Type.INDEX_ID, indexId); // Iterate in parallel over the words of the indices MutableString term = new MutableString(""); MutableString nonWord = new MutableString(""); WordReader termReader = doc.content(indexId); int position = 0; while (termReader.next(term, nonWord)) { // Read next property as well if (term != null && term.length() > 0) { String termString = term.toString(); // Report progress context.setStatus(fields[indexId] + "=" + term.substring(0, Math.min(term.length(), 50))); // Create an occurrence at the next position TermValue occurrenceValue = new TermValue(Type.OCCURRENCE, doc.getId(), position); context.write(new TermKey(termString, indexId, occurrenceValue), occurrenceValue); DocStat docStat = termToDocStatMap.get(termString); if (docStat == null) { if (doc.getIndexType() == RDFDocumentFactory.IndexType.VERTICAL) { // For the Alignment Index, we write the predicate // id(Which is equal to the index id for a VERTICAL // index) the first time we encounter a term. // The 'Alignment Index' is an index without counts // or positions. It's used for query optimization in // the query parser. The resulting 'alignment index' // is basically used as a map from term to // predicates that the term occurs in. context.write(new TermKey(termString, ALIGNMENT_INDEX, indexIdValue), indexIdValue); } docStat = new DocStat(); docStat.last = position; docStat.count = 1; termToDocStatMap.put(termString, docStat); } else { docStat.last = position; docStat.count++; } position++; context.getCounter(Counters.INDEXED_OCCURRENCES).increment(1); } else { LOG.info("Nextterm is null"); } } if (doc.getIndexType() == RDFDocumentFactory.IndexType.HORIZONTAL && position > 0) { TermValue docSizeValue = new TermValue(Type.DOC_SIZE, doc.getId(), position); context.write(new TermKey(TermKey.DOC_SIZE_TERM, indexId, docSizeValue), docSizeValue); } for (String termString : termToDocStatMap.keySet()) { DocStat docStat = termToDocStatMap.get(termString); TermValue occurrenceCountValue = new TermValue(Type.TERM_STATS, docStat.count, docStat.last); context.write(new TermKey(termString, indexId, occurrenceCountValue), occurrenceCountValue); } termToDocStatMap.clear(); } context.getCounter(Counters.NUMBER_OF_RECORDS).increment(1); }
From source file:com.yahoo.glimmer.indexing.preprocessor.ResourcesReducer.java
License:Open Source License
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, Object>.Context context) throws IOException, InterruptedException { context.getCounter(Counters.KEYS).increment(1); int keyPredicateCount = 0; int keyObjectCount = 0; int keyContextCount = 0; int relationsCount = 0; int duplicateRelatations = 0; outputCount.output = OUTPUT.ALL;//from ww w. j a va 2 s .c o m outputCount.count = 0; context.write(key, outputCount); bySubjectRecord.clearRelations(); String lastValue = null; for (Text value : values) { context.getCounter(Counters.VALUES).increment(1); if (PREDICATE_TEXT.equals(value)) { keyPredicateCount++; } else if (OBJECT_TEXT.equals(value)) { keyObjectCount++; } else if (CONTEXT_TEXT.equals(value)) { keyContextCount++; } else if (SUBJECT_TEXT.equals(value)) { throw new IllegalArgumentException( "Reducer got a SUBJECT value!?. Should only be \"PREDICATE\", \"OBJECT\", \"CONTEXT\" or a relation String."); } else if (value.getLength() > 0) { String valueString = value.toString(); if (!valueString.equals(lastValue)) { bySubjectRecord.addRelation(valueString); relationsCount++; lastValue = valueString; } else { duplicateRelatations++; } } } if (relationsCount > 0) { if (duplicateRelatations > 0) { context.getCounter(Counters.DUPLICATE_RELATIONS).increment(duplicateRelatations); } // The docId's should match with OUTPUT.ALL hash values bySubjectRecord.setId(docId); bySubjectRecord.setSubject(key.toString()); if (bySubjectRecord.getRelationsCount() != relationsCount) { System.out.println("Too many relations. Only indexing " + bySubjectRecord.getRelationsCount() + " of " + relationsCount + ". Subject is:" + key.toString()); context.getCounter(Counters.TOO_MANY_RELATIONS).increment(1); } context.write(key, bySubjectRecord); bySubjectRecord.setPreviousId(docId); context.getCounter(Counters.KEY_SUBJECT).increment(relationsCount); } if (keyPredicateCount > 0) { outputCount.output = OUTPUT.PREDICATE; outputCount.count = keyPredicateCount; context.write(key, outputCount); context.getCounter(Counters.KEY_PREDICATE).increment(keyPredicateCount); } if (keyObjectCount > 0) { outputCount.output = OUTPUT.OBJECT; outputCount.count = keyObjectCount; context.write(key, outputCount); context.getCounter(Counters.KEY_OBJECT).increment(keyObjectCount); } if (keyContextCount > 0) { outputCount.output = OUTPUT.CONTEXT; outputCount.count = keyContextCount; context.write(key, outputCount); context.getCounter(Counters.KEY_CONTEXT).increment(keyContextCount); } docId++; }
From source file:core.client.impl.ConditionalWriterImpl.java
License:Apache License
private boolean isVisible(ByteSequence cv) { Text testVis = new Text(cv.toArray()); if (testVis.getLength() == 0) return true; Boolean b = (Boolean) cache.get(testVis); if (b != null) return b; try {// ww w . j a v a 2 s .co m Boolean bb = ve.evaluate(new ColumnVisibility(testVis)); cache.put(new Text(testVis), bb); return bb; } catch (VisibilityParseException e) { return false; } catch (BadArgumentException e) { return false; } }
From source file:core.data.ConditionalMutation.java
License:Apache License
public ConditionalMutation(Text row) { this(row.getBytes(), 0, row.getLength()); }
From source file:cosmos.impl.IndexToMultimapRecord.java
License:Apache License
@Override public MultimapRecord apply(Entry<Key, Value> input) { Key k = input.getKey();//from w ww .j av a2 s . c om Text colqual = k.getColumnQualifier(); int index = colqual.find(Defaults.NULL_BYTE_STR); if (-1 == index) { throw new RuntimeException("Was provided unexpected Key: " + k); } int start = index + 1; try { String docId = Text.decode(colqual.getBytes(), start, colqual.getLength() - start); return sorts.contents(id, docId); } catch (TableNotFoundException e) { throw new RuntimeException(e); } catch (UnexpectedStateException e) { throw new RuntimeException(e); } catch (CharacterCodingException e) { throw new RuntimeException(e); } }
From source file:cosmos.mapred.MediawikiMapper.java
License:Apache License
/** * Called once for each key/value pair in the input split. Most applications should override this, but the default is the identity function. *///from ww w . java 2 s .c om @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { Object o; try { o = unmarshaller.unmarshal(new ByteArrayInputStream(value.getBytes(), 0, value.getLength())); } catch (JAXBException e) { throw new IOException("Couldn't unmarshall '" + value + "'", e); } PageType pageType = (PageType) o; Page page = pageTypeToPage(pageType); Value protobufValue = new Value(page.toByteArray()); Mutation m = new Mutation(Long.toString(page.getId())); m.put(empty, empty, protobufValue); context.write(tableName, m); }
From source file:crunch.MaxTemperature.java
License:Apache License
public static void main(String[] args) { Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); ByteBuffer buf = ByteBuffer.wrap(t.getBytes(), 0, t.getLength()); int cp;/*from w w w . ja va 2 s .c o m*/ while (buf.hasRemaining() && (cp = Text.bytesToCodePoint(buf)) != -1) { System.out.println(Integer.toHexString(cp)); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void text() { Text t = new Text("\u0041\u00DF\u6771\uD801\uDC00"); assertThat(t.getLength(), is(10)); assertThat(t.find("\u0041"), is(0)); assertThat(t.find("\u00DF"), is(1)); assertThat(t.find("\u6771"), is(3)); assertThat(t.find("\uD801\uDC00"), is(6)); assertThat(t.charAt(0), is(0x0041)); assertThat(t.charAt(1), is(0x00DF)); assertThat(t.charAt(3), is(0x6771)); assertThat(t.charAt(6), is(0x10400)); }/*from ww w. java2 s . com*/
From source file:crunch.MaxTemperature.java
License:Apache License
@Test public void test() throws IOException { // vv TextTest Text t = new Text("hadoop"); assertThat(t.getLength(), is(6)); assertThat(t.getBytes().length, is(6)); assertThat(t.charAt(2), is((int) 'd')); assertThat("Out of bounds", t.charAt(100), is(-1)); // ^^ TextTest }/* w w w. j av a 2s . c om*/