List of usage examples for com.google.common.collect Interners newStrongInterner
public static <E> Interner<E> newStrongInterner()
From source file:com.lithium.flow.geo.Ip2LocationGeoReader.java
@Override @Nonnull/* w ww . ja v a 2 s . com*/ public List<GeoBlock> readBlocks(@Nonnull File file) throws IOException { checkNotNull(file); long time = System.currentTimeMillis(); List<GeoBlock> blocks = Lists.newArrayList(); Interner<String> interner = Interners.newStrongInterner(); CSVReader reader = new CSVReader(new FileReader(file)); String[] line; while ((line = reader.readNext()) != null) { long start = Long.parseLong(line[0]); long end = Long.parseLong(line[1]); String countryCode = fixUnknown(interner.intern(line[2].toLowerCase())); String countryName = fixUnknown(interner.intern(WordUtils.capitalizeFully(line[3]))); String region = interner.intern(WordUtils.capitalizeFully(line[4])); String city = interner.intern(WordUtils.capitalizeFully(line[5])); double latitude = Double.parseDouble(line[6]); double longitude = Double.parseDouble(line[7]); String postal = line.length <= 8 ? "unknown" : fixUnknown(interner.intern(line[8])); String timeZone = line.length <= 9 ? "unknown" : fixUnknown(interner.intern(line[9])); GeoDetail detail = new GeoDetail(city, region, postal, countryCode, countryName, latitude, longitude, timeZone); GeoBlock block = new GeoBlock(start, end, detail); blocks.add(block); } reader.close(); time = System.currentTimeMillis() - time; log.info("read {} blocks in {}ms", blocks.size(), time); return blocks; }
From source file:org.spf4j.zel.vm.ProgramBuilder.java
/** * initializes the program//from w w w . j a va2s . c om */ public ProgramBuilder(final MemoryBuilder staticMemBuilder) { this.staticMemBuilder = staticMemBuilder; instructions = new Instruction[DEFAULT_SIZE]; instrNumber = 0; type = Program.Type.NONDETERMINISTIC; execType = null; //Program.ExecutionType.ASYNC; stringInterner = Interners.newStrongInterner(); debugInfo = new ArrayList<>(); }
From source file:com.google.gitiles.blame.BlameCacheImpl.java
private static List<Region> loadRegions(BlameGenerator gen) throws IOException { Map<ObjectId, PooledCommit> commits = Maps.newHashMap(); Interner<String> strings = Interners.newStrongInterner(); int lineCount = gen.getResultContents().size(); List<Region> regions = Lists.newArrayList(); while (gen.next()) { String path = gen.getSourcePath(); PersonIdent author = gen.getSourceAuthor(); ObjectId commit = gen.getSourceCommit(); checkState(path != null && author != null && commit != null); PooledCommit pc = commits.get(commit); if (pc == null) { pc = new PooledCommit(commit.copy(), new PersonIdent(strings.intern(author.getName()), strings.intern(author.getEmailAddress()), author.getWhen(), author.getTimeZone())); commits.put(pc.commit, pc);/* www . j a v a2 s . c om*/ } path = strings.intern(path); commit = pc.commit; author = pc.author; regions.add(new Region(path, commit, author, gen.getResultStart(), gen.getResultEnd())); } Collections.sort(regions); // Fill in any gaps left by bugs in JGit, since rendering code assumes the // full set of contiguous regions. List<Region> result = Lists.newArrayListWithExpectedSize(regions.size()); Region last = null; for (Region r : regions) { if (last != null) { checkState(last.getEnd() <= r.getStart()); if (last.getEnd() < r.getStart()) { result.add(new Region(null, null, null, last.getEnd(), r.getStart())); } } result.add(r); last = r; } if (last != null && last.getEnd() != lineCount) { result.add(new Region(null, null, null, last.getEnd(), lineCount)); } return ImmutableList.copyOf(result); }
From source file:de.learnlib.algorithms.dhc.mealy.MealyDHC.java
@Override public void startLearning() { // initialize structure to store state output signatures Map<List<Word<O>>, Integer> signatures = new HashMap<>(); // set up new hypothesis machine hypothesis = new CompactMealy<>(alphabet); // initialize exploration queue Queue<QueueElement<I, O>> queue = new ArrayDeque<>(); // initialize storage for access sequences accessSequences = hypothesis.createDynamicStateMapping(); // first element to be explored represents the initial state with no predecessor queue.add(new QueueElement<I, O>(null, null, null, null)); Interner<Word<O>> deduplicator = Interners.newStrongInterner(); while (!queue.isEmpty()) { // get element to be explored from queue QueueElement<I, O> elem = queue.poll(); // determine access sequence for state Word<I> access = assembleAccessSequence(elem); // assemble queries ArrayList<DefaultQuery<I, Word<O>>> queries = new ArrayList<>(splitters.size()); for (Word<I> suffix : splitters) { queries.add(new DefaultQuery<I, Word<O>>(access, suffix)); }//from ww w . j a va 2s . co m // retrieve answers oracle.processQueries(queries); // assemble output signature List<Word<O>> sig = new ArrayList<>(splitters.size()); for (DefaultQuery<I, Word<O>> query : queries) { sig.add(deduplicator.intern(query.getOutput())); } Integer sibling = signatures.get(sig); if (sibling != null) { // this element does not possess a new output signature // create a transition from parent state to sibling hypothesis.addTransition(elem.parentState, elem.transIn, sibling, elem.transOut); } else { // this is actually an observably distinct state! Progress! // Create state and connect via transition to parent Integer state = elem.parentElement == null ? hypothesis.addInitialState() : hypothesis.addState(); if (elem.parentElement != null) { hypothesis.addTransition(elem.parentState, elem.transIn, state, elem.transOut); } signatures.put(sig, state); accessSequences.put(state, elem); scheduleSuccessors(elem, state, queue, sig); } } }
From source file:com.google.debugging.sourcemap.SourceMapConsumerV1.java
/** * Parse the file mappings section of the source map file. This maps the * ids to the filename, line number and column number in the original * files./*from w w w . j a v a 2 s . com*/ * @param parser The parser to get the data from. * @param maxID The maximum id found in the character mapping section. */ private void parseFileMappings(ParseState parser, int maxID) throws SourceMapParseException, JSONException { // ['d.js', 3, 78, 'foo'] // Intern the strings to save memory. Interner<String> interner = Interners.newStrongInterner(); ImmutableList.Builder<SourceFile> mappingsBuilder = ImmutableList.builder(); // Setup all the arrays to keep track of the various details about the // source file. ArrayList<Byte> lineOffsets = Lists.newArrayList(); ArrayList<Short> columns = Lists.newArrayList(); ArrayList<String> identifiers = Lists.newArrayList(); // The indexes and details about the current position in the file to do // diffs against. String currentFile = null; int lastLine = -1; int startLine = -1; int startMapId = -1; for (int mappingId = 0; mappingId <= maxID; ++mappingId) { String currentLine = parser.readLine(); JSONArray mapArray = new JSONArray(currentLine); if (mapArray.length() < 3) { parser.fail("Invalid mapping array"); } // Split up the file and directory names to reduce memory usage. String myFile = mapArray.getString(0); int line = mapArray.getInt(1); if (!myFile.equals(currentFile) || (line - lastLine) > Byte.MAX_VALUE || (line - lastLine) < Byte.MIN_VALUE) { if (currentFile != null) { FileName dirFile = splitFileName(interner, currentFile); SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir) .setFileName(dirFile.name).setStartLine(startLine).setStartMapId(startMapId) .setLineOffsets(lineOffsets).setColumns(columns).setIdentifiers(identifiers); mappingsBuilder.add(builder.build()); } // Reset all the positions back to the start and clear out the arrays // to start afresh. currentFile = myFile; startLine = line; lastLine = line; startMapId = mappingId; columns.clear(); lineOffsets.clear(); identifiers.clear(); } // We need to add on the columns and identifiers for all the lines, even // for the first line. lineOffsets.add((byte) (line - lastLine)); columns.add((short) mapArray.getInt(2)); identifiers.add(interner.intern(mapArray.optString(3, ""))); lastLine = line; } if (currentFile != null) { FileName dirFile = splitFileName(interner, currentFile); SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir).setFileName(dirFile.name) .setStartLine(startLine).setStartMapId(startMapId).setLineOffsets(lineOffsets) .setColumns(columns).setIdentifiers(identifiers); mappingsBuilder.add(builder.build()); } mappings = mappingsBuilder.build(); }
From source file:org.apache.hadoop.hive.ql.parse.TaskCompiler.java
@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks,
final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {
Context ctx = pCtx.getContext();
GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();
List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();
boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();
if (pCtx.getFetchTask() != null) {
if (pCtx.getFetchTask().getTblDesc() == null) {
return;
}/*from ww w . ja v a 2 s. c o m*/
pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
// If the serializer is ThriftJDBCBinarySerDe, then it requires that NoOpFetchFormatter be used. But when it isn't,
// then either the ThriftFormatter or the DefaultFetchFormatter should be used.
if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
if (SessionState.get().isHiveServerQuery()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
} else {
String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
if (formatterName == null || formatterName.isEmpty()) {
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
}
}
}
return;
}
optimizeOperatorPlan(pCtx, inputs, outputs);
/*
* In case of a select, use a fetch task instead of a move task.
* If the select is from analyze table column rewrite, don't create a fetch task. Instead create
* a column stats task later.
*/
if (pCtx.getQueryProperties().isQuery() && !isCStats) {
if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
}
LoadFileDesc loadFileDesc = loadFileWork.get(0);
String cols = loadFileDesc.getColumns();
String colTypes = loadFileDesc.getColumnTypes();
String resFileFormat;
TableDesc resultTab = pCtx.getFetchTableDesc();
if (resultTab == null) {
resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()
&& (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat,
ThriftJDBCBinarySerDe.class);
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
} else {
resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat,
LazySimpleSerDe.class);
}
} else {
if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB)
.equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
// Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
// read formatted thrift objects from the output SequenceFile written by Tasks.
conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
}
}
FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
fetch.setHiveServerQuery(isHiveServerQuery);
fetch.setSource(pCtx.getFetchSource());
fetch.setSink(pCtx.getFetchSink());
if (isHiveServerQuery && null != resultTab
&& resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())
&& HiveConf.getBoolVar(conf,
HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
fetch.setIsUsingThriftJDBCBinarySerDe(true);
} else {
fetch.setIsUsingThriftJDBCBinarySerDe(false);
}
pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));
// For the FetchTask, the limit optimization requires we fetch all the rows
// in memory and count how many rows we get. It's not practical if the
// limit factor is too big
int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit
+ ". Doesn't qualify limit optimization.");
globalLimitCtx.disableOpt();
}
if (outerQueryLimit == 0) {
// Believe it or not, some tools do generate queries with limit 0 and than expect
// query to run quickly. Lets meet their requirement.
LOG.info("Limit 0. No query execution needed.");
return;
}
} else if (!isCStats) {
for (LoadTableDesc ltd : loadTableWork) {
Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
mvTask.add(tsk);
// Check to see if we are stale'ing any indexes and auto-update them if we want
if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf);
try {
List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
tsk.addDependentTask(updateTask);
}
} catch (HiveException e) {
console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync");
}
}
}
boolean oneLoadFile = true;
for (LoadFileDesc lfd : loadFileWork) {
if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
assert (oneLoadFile); // should not have more than 1 load file for
// CTAS
// make the movetask's destination directory the table's destination.
Path location;
String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation()
: pCtx.getCreateViewDesc().getLocation();
if (loc == null) {
// get the default location
Path targetPath;
try {
String protoName = null;
if (pCtx.getQueryProperties().isCTAS()) {
protoName = pCtx.getCreateTable().getTableName();
} else if (pCtx.getQueryProperties().isMaterializedView()) {
protoName = pCtx.getCreateViewDesc().getViewName();
}
String[] names = Utilities.getDbTableName(protoName);
if (!db.databaseExists(names[0])) {
throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
}
Warehouse wh = new Warehouse(conf);
targetPath = wh.getDefaultTablePath(db.getDatabase(names[0]), names[1]);
} catch (HiveException e) {
throw new SemanticException(e);
} catch (MetaException e) {
throw new SemanticException(e);
}
location = targetPath;
} else {
location = new Path(loc);
}
lfd.setTargetDir(location);
oneLoadFile = false;
}
mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf));
}
}
generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);
// For each task, set the key descriptor for the reducer
for (Task<? extends Serializable> rootTask : rootTasks) {
GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
}
// If a task contains an operator which instructs bucketizedhiveinputformat
// to be used, please do so
for (Task<? extends Serializable> rootTask : rootTasks) {
setInputFormat(rootTask);
}
optimizeTaskPlan(rootTasks, pCtx, ctx);
/*
* If the query was the result of analyze table column compute statistics rewrite, create
* a column stats task instead of a fetch task to persist stats to the metastore.
*/
if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
getLeafTasks(rootTasks, leafTasks);
if (isCStats) {
genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0);
} else {
for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx
.getColumnStatsAutoGatherContexts()) {
if (!columnStatsAutoGatherContext.isInsertInto()) {
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(),
columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0);
} else {
int numBitVector;
try {
numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
} catch (Exception e) {
throw new SemanticException(e.getMessage());
}
genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(),
columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit,
numBitVector);
}
}
}
}
decideExecMode(rootTasks, ctx, globalLimitCtx);
if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
// generate a DDL task and make it a dependent task of the leaf
CreateTableDesc crtTblDesc = pCtx.getCreateTable();
crtTblDesc.validate(conf);
Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc),
conf);
patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
} else if (pCtx.getQueryProperties().isMaterializedView()) {
// generate a DDL task and make it a dependent task of the leaf
CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc),
conf);
patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
}
if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
}
if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks);
for (ExecDriver tsk : mrTasks) {
tsk.setRetryCmdWhenFail(true);
}
List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
for (SparkTask sparkTask : sparkTasks) {
sparkTask.setRetryCmdWhenFail(true);
}
}
Interner<TableDesc> interner = Interners.newStrongInterner();
for (Task<? extends Serializable> rootTask : rootTasks) {
GenMapRedUtils.internTableDesc(rootTask, interner);
GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
}
}