List of usage examples for com.google.common.collect Interners newStrongInterner
public static <E> Interner<E> newStrongInterner()
From source file:com.lithium.flow.geo.Ip2LocationGeoReader.java
@Override @Nonnull/* w ww . ja v a 2 s . com*/ public List<GeoBlock> readBlocks(@Nonnull File file) throws IOException { checkNotNull(file); long time = System.currentTimeMillis(); List<GeoBlock> blocks = Lists.newArrayList(); Interner<String> interner = Interners.newStrongInterner(); CSVReader reader = new CSVReader(new FileReader(file)); String[] line; while ((line = reader.readNext()) != null) { long start = Long.parseLong(line[0]); long end = Long.parseLong(line[1]); String countryCode = fixUnknown(interner.intern(line[2].toLowerCase())); String countryName = fixUnknown(interner.intern(WordUtils.capitalizeFully(line[3]))); String region = interner.intern(WordUtils.capitalizeFully(line[4])); String city = interner.intern(WordUtils.capitalizeFully(line[5])); double latitude = Double.parseDouble(line[6]); double longitude = Double.parseDouble(line[7]); String postal = line.length <= 8 ? "unknown" : fixUnknown(interner.intern(line[8])); String timeZone = line.length <= 9 ? "unknown" : fixUnknown(interner.intern(line[9])); GeoDetail detail = new GeoDetail(city, region, postal, countryCode, countryName, latitude, longitude, timeZone); GeoBlock block = new GeoBlock(start, end, detail); blocks.add(block); } reader.close(); time = System.currentTimeMillis() - time; log.info("read {} blocks in {}ms", blocks.size(), time); return blocks; }
From source file:org.spf4j.zel.vm.ProgramBuilder.java
/** * initializes the program//from w w w . j a va2s . c om */ public ProgramBuilder(final MemoryBuilder staticMemBuilder) { this.staticMemBuilder = staticMemBuilder; instructions = new Instruction[DEFAULT_SIZE]; instrNumber = 0; type = Program.Type.NONDETERMINISTIC; execType = null; //Program.ExecutionType.ASYNC; stringInterner = Interners.newStrongInterner(); debugInfo = new ArrayList<>(); }
From source file:com.google.gitiles.blame.BlameCacheImpl.java
private static List<Region> loadRegions(BlameGenerator gen) throws IOException { Map<ObjectId, PooledCommit> commits = Maps.newHashMap(); Interner<String> strings = Interners.newStrongInterner(); int lineCount = gen.getResultContents().size(); List<Region> regions = Lists.newArrayList(); while (gen.next()) { String path = gen.getSourcePath(); PersonIdent author = gen.getSourceAuthor(); ObjectId commit = gen.getSourceCommit(); checkState(path != null && author != null && commit != null); PooledCommit pc = commits.get(commit); if (pc == null) { pc = new PooledCommit(commit.copy(), new PersonIdent(strings.intern(author.getName()), strings.intern(author.getEmailAddress()), author.getWhen(), author.getTimeZone())); commits.put(pc.commit, pc);/* www . j a v a2 s . c om*/ } path = strings.intern(path); commit = pc.commit; author = pc.author; regions.add(new Region(path, commit, author, gen.getResultStart(), gen.getResultEnd())); } Collections.sort(regions); // Fill in any gaps left by bugs in JGit, since rendering code assumes the // full set of contiguous regions. List<Region> result = Lists.newArrayListWithExpectedSize(regions.size()); Region last = null; for (Region r : regions) { if (last != null) { checkState(last.getEnd() <= r.getStart()); if (last.getEnd() < r.getStart()) { result.add(new Region(null, null, null, last.getEnd(), r.getStart())); } } result.add(r); last = r; } if (last != null && last.getEnd() != lineCount) { result.add(new Region(null, null, null, last.getEnd(), lineCount)); } return ImmutableList.copyOf(result); }
From source file:de.learnlib.algorithms.dhc.mealy.MealyDHC.java
@Override public void startLearning() { // initialize structure to store state output signatures Map<List<Word<O>>, Integer> signatures = new HashMap<>(); // set up new hypothesis machine hypothesis = new CompactMealy<>(alphabet); // initialize exploration queue Queue<QueueElement<I, O>> queue = new ArrayDeque<>(); // initialize storage for access sequences accessSequences = hypothesis.createDynamicStateMapping(); // first element to be explored represents the initial state with no predecessor queue.add(new QueueElement<I, O>(null, null, null, null)); Interner<Word<O>> deduplicator = Interners.newStrongInterner(); while (!queue.isEmpty()) { // get element to be explored from queue QueueElement<I, O> elem = queue.poll(); // determine access sequence for state Word<I> access = assembleAccessSequence(elem); // assemble queries ArrayList<DefaultQuery<I, Word<O>>> queries = new ArrayList<>(splitters.size()); for (Word<I> suffix : splitters) { queries.add(new DefaultQuery<I, Word<O>>(access, suffix)); }//from ww w . j a va 2s . co m // retrieve answers oracle.processQueries(queries); // assemble output signature List<Word<O>> sig = new ArrayList<>(splitters.size()); for (DefaultQuery<I, Word<O>> query : queries) { sig.add(deduplicator.intern(query.getOutput())); } Integer sibling = signatures.get(sig); if (sibling != null) { // this element does not possess a new output signature // create a transition from parent state to sibling hypothesis.addTransition(elem.parentState, elem.transIn, sibling, elem.transOut); } else { // this is actually an observably distinct state! Progress! // Create state and connect via transition to parent Integer state = elem.parentElement == null ? hypothesis.addInitialState() : hypothesis.addState(); if (elem.parentElement != null) { hypothesis.addTransition(elem.parentState, elem.transIn, state, elem.transOut); } signatures.put(sig, state); accessSequences.put(state, elem); scheduleSuccessors(elem, state, queue, sig); } } }
From source file:com.google.debugging.sourcemap.SourceMapConsumerV1.java
/** * Parse the file mappings section of the source map file. This maps the * ids to the filename, line number and column number in the original * files./*from w w w . j a v a 2 s . com*/ * @param parser The parser to get the data from. * @param maxID The maximum id found in the character mapping section. */ private void parseFileMappings(ParseState parser, int maxID) throws SourceMapParseException, JSONException { // ['d.js', 3, 78, 'foo'] // Intern the strings to save memory. Interner<String> interner = Interners.newStrongInterner(); ImmutableList.Builder<SourceFile> mappingsBuilder = ImmutableList.builder(); // Setup all the arrays to keep track of the various details about the // source file. ArrayList<Byte> lineOffsets = Lists.newArrayList(); ArrayList<Short> columns = Lists.newArrayList(); ArrayList<String> identifiers = Lists.newArrayList(); // The indexes and details about the current position in the file to do // diffs against. String currentFile = null; int lastLine = -1; int startLine = -1; int startMapId = -1; for (int mappingId = 0; mappingId <= maxID; ++mappingId) { String currentLine = parser.readLine(); JSONArray mapArray = new JSONArray(currentLine); if (mapArray.length() < 3) { parser.fail("Invalid mapping array"); } // Split up the file and directory names to reduce memory usage. String myFile = mapArray.getString(0); int line = mapArray.getInt(1); if (!myFile.equals(currentFile) || (line - lastLine) > Byte.MAX_VALUE || (line - lastLine) < Byte.MIN_VALUE) { if (currentFile != null) { FileName dirFile = splitFileName(interner, currentFile); SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir) .setFileName(dirFile.name).setStartLine(startLine).setStartMapId(startMapId) .setLineOffsets(lineOffsets).setColumns(columns).setIdentifiers(identifiers); mappingsBuilder.add(builder.build()); } // Reset all the positions back to the start and clear out the arrays // to start afresh. currentFile = myFile; startLine = line; lastLine = line; startMapId = mappingId; columns.clear(); lineOffsets.clear(); identifiers.clear(); } // We need to add on the columns and identifiers for all the lines, even // for the first line. lineOffsets.add((byte) (line - lastLine)); columns.add((short) mapArray.getInt(2)); identifiers.add(interner.intern(mapArray.optString(3, ""))); lastLine = line; } if (currentFile != null) { FileName dirFile = splitFileName(interner, currentFile); SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir).setFileName(dirFile.name) .setStartLine(startLine).setStartMapId(startMapId).setLineOffsets(lineOffsets) .setColumns(columns).setIdentifiers(identifiers); mappingsBuilder.add(builder.build()); } mappings = mappingsBuilder.build(); }
From source file:org.apache.hadoop.hive.ql.parse.TaskCompiler.java
@SuppressWarnings({ "nls", "unchecked" }) public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks, final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException { Context ctx = pCtx.getContext(); GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx(); List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>(); List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork(); List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork(); boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite(); int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit(); if (pCtx.getFetchTask() != null) { if (pCtx.getFetchTask().getTblDesc() == null) { return; }/*from ww w . ja v a 2 s. c o m*/ pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery()); TableDesc resultTab = pCtx.getFetchTask().getTblDesc(); // If the serializer is ThriftJDBCBinarySerDe, then it requires that NoOpFetchFormatter be used. But when it isn't, // then either the ThriftFormatter or the DefaultFetchFormatter should be used. if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) { if (SessionState.get().isHiveServerQuery()) { conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName()); } else { String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER); if (formatterName == null || formatterName.isEmpty()) { conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName()); } } } return; } optimizeOperatorPlan(pCtx, inputs, outputs); /* * In case of a select, use a fetch task instead of a move task. * If the select is from analyze table column rewrite, don't create a fetch task. Instead create * a column stats task later. */ if (pCtx.getQueryProperties().isQuery() && !isCStats) { if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) { throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg()); } LoadFileDesc loadFileDesc = loadFileWork.get(0); String cols = loadFileDesc.getColumns(); String colTypes = loadFileDesc.getColumnTypes(); String resFileFormat; TableDesc resultTab = pCtx.getFetchTableDesc(); if (resultTab == null) { resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT); if (SessionState.get().getIsUsingThriftJDBCBinarySerDe() && (resFileFormat.equalsIgnoreCase("SequenceFile"))) { resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, ThriftJDBCBinarySerDe.class); // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll // read formatted thrift objects from the output SequenceFile written by Tasks. conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName()); } else { resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat, LazySimpleSerDe.class); } } else { if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB) .equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) { // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll // read formatted thrift objects from the output SequenceFile written by Tasks. conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName()); } } FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit); boolean isHiveServerQuery = SessionState.get().isHiveServerQuery(); fetch.setHiveServerQuery(isHiveServerQuery); fetch.setSource(pCtx.getFetchSource()); fetch.setSink(pCtx.getFetchSink()); if (isHiveServerQuery && null != resultTab && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName()) && HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) { fetch.setIsUsingThriftJDBCBinarySerDe(true); } else { fetch.setIsUsingThriftJDBCBinarySerDe(false); } pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf)); // For the FetchTask, the limit optimization requires we fetch all the rows // in memory and count how many rows we get. It's not practical if the // limit factor is too big int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH); if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) { LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit + ". Doesn't qualify limit optimization."); globalLimitCtx.disableOpt(); } if (outerQueryLimit == 0) { // Believe it or not, some tools do generate queries with limit 0 and than expect // query to run quickly. Lets meet their requirement. LOG.info("Limit 0. No query execution needed."); return; } } else if (!isCStats) { for (LoadTableDesc ltd : loadTableWork) { Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf); mvTask.add(tsk); // Check to see if we are stale'ing any indexes and auto-update them if we want if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) { IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf); try { List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks(); for (Task<? extends Serializable> updateTask : indexUpdateTasks) { tsk.addDependentTask(updateTask); } } catch (HiveException e) { console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync"); } } } boolean oneLoadFile = true; for (LoadFileDesc lfd : loadFileWork) { if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) { assert (oneLoadFile); // should not have more than 1 load file for // CTAS // make the movetask's destination directory the table's destination. Path location; String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation() : pCtx.getCreateViewDesc().getLocation(); if (loc == null) { // get the default location Path targetPath; try { String protoName = null; if (pCtx.getQueryProperties().isCTAS()) { protoName = pCtx.getCreateTable().getTableName(); } else if (pCtx.getQueryProperties().isMaterializedView()) { protoName = pCtx.getCreateViewDesc().getViewName(); } String[] names = Utilities.getDbTableName(protoName); if (!db.databaseExists(names[0])) { throw new SemanticException("ERROR: The database " + names[0] + " does not exist."); } Warehouse wh = new Warehouse(conf); targetPath = wh.getDefaultTablePath(db.getDatabase(names[0]), names[1]); } catch (HiveException e) { throw new SemanticException(e); } catch (MetaException e) { throw new SemanticException(e); } location = targetPath; } else { location = new Path(loc); } lfd.setTargetDir(location); oneLoadFile = false; } mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf)); } } generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs); // For each task, set the key descriptor for the reducer for (Task<? extends Serializable> rootTask : rootTasks) { GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask); } // If a task contains an operator which instructs bucketizedhiveinputformat // to be used, please do so for (Task<? extends Serializable> rootTask : rootTasks) { setInputFormat(rootTask); } optimizeTaskPlan(rootTasks, pCtx, ctx); /* * If the query was the result of analyze table column compute statistics rewrite, create * a column stats task instead of a fetch task to persist stats to the metastore. */ if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) { Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>(); getLeafTasks(rootTasks, leafTasks); if (isCStats) { genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0); } else { for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx .getColumnStatsAutoGatherContexts()) { if (!columnStatsAutoGatherContext.isInsertInto()) { genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0); } else { int numBitVector; try { numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf); } catch (Exception e) { throw new SemanticException(e.getMessage()); } genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(), columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, numBitVector); } } } } decideExecMode(rootTasks, ctx, globalLimitCtx); if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) { // generate a DDL task and make it a dependent task of the leaf CreateTableDesc crtTblDesc = pCtx.getCreateTable(); crtTblDesc.validate(conf); Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc), conf); patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask); } else if (pCtx.getQueryProperties().isMaterializedView()) { // generate a DDL task and make it a dependent task of the leaf CreateViewDesc viewDesc = pCtx.getCreateViewDesc(); Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc), conf); patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask); } if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) { LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit()); pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit()); } if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) { LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit()); globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit()); List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks); for (ExecDriver tsk : mrTasks) { tsk.setRetryCmdWhenFail(true); } List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks); for (SparkTask sparkTask : sparkTasks) { sparkTask.setRetryCmdWhenFail(true); } } Interner<TableDesc> interner = Interners.newStrongInterner(); for (Task<? extends Serializable> rootTask : rootTasks) { GenMapRedUtils.internTableDesc(rootTask, interner); GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf()); } }