Example usage for com.google.common.collect Interners newStrongInterner

Introduction

In this page you can find the example usage for com.google.common.collect Interners newStrongInterner.

Prototype

public static <E> Interner<E> newStrongInterner()

Source Link

Document

Returns a new thread-safe interner which retains a strong reference to each instance it has interned, thus preventing these instances from being garbage-collected.

Usage

From source file:com.lithium.flow.geo.Ip2LocationGeoReader.java

@Override
@Nonnull/* w ww . ja v a  2  s .  com*/
public List<GeoBlock> readBlocks(@Nonnull File file) throws IOException {
    checkNotNull(file);

    long time = System.currentTimeMillis();
    List<GeoBlock> blocks = Lists.newArrayList();
    Interner<String> interner = Interners.newStrongInterner();
    CSVReader reader = new CSVReader(new FileReader(file));

    String[] line;
    while ((line = reader.readNext()) != null) {
        long start = Long.parseLong(line[0]);
        long end = Long.parseLong(line[1]);
        String countryCode = fixUnknown(interner.intern(line[2].toLowerCase()));
        String countryName = fixUnknown(interner.intern(WordUtils.capitalizeFully(line[3])));
        String region = interner.intern(WordUtils.capitalizeFully(line[4]));
        String city = interner.intern(WordUtils.capitalizeFully(line[5]));
        double latitude = Double.parseDouble(line[6]);
        double longitude = Double.parseDouble(line[7]);
        String postal = line.length <= 8 ? "unknown" : fixUnknown(interner.intern(line[8]));
        String timeZone = line.length <= 9 ? "unknown" : fixUnknown(interner.intern(line[9]));

        GeoDetail detail = new GeoDetail(city, region, postal, countryCode, countryName, latitude, longitude,
                timeZone);
        GeoBlock block = new GeoBlock(start, end, detail);
        blocks.add(block);
    }
    reader.close();

    time = System.currentTimeMillis() - time;
    log.info("read {} blocks in {}ms", blocks.size(), time);
    return blocks;
}

From source file:org.spf4j.zel.vm.ProgramBuilder.java

/**
 * initializes the program//from  w w  w  .  j a  va2s  .  c  om
 */
public ProgramBuilder(final MemoryBuilder staticMemBuilder) {
    this.staticMemBuilder = staticMemBuilder;
    instructions = new Instruction[DEFAULT_SIZE];
    instrNumber = 0;
    type = Program.Type.NONDETERMINISTIC;
    execType = null; //Program.ExecutionType.ASYNC;
    stringInterner = Interners.newStrongInterner();
    debugInfo = new ArrayList<>();
}

From source file:com.google.gitiles.blame.BlameCacheImpl.java

private static List<Region> loadRegions(BlameGenerator gen) throws IOException {
    Map<ObjectId, PooledCommit> commits = Maps.newHashMap();
    Interner<String> strings = Interners.newStrongInterner();
    int lineCount = gen.getResultContents().size();

    List<Region> regions = Lists.newArrayList();
    while (gen.next()) {
        String path = gen.getSourcePath();
        PersonIdent author = gen.getSourceAuthor();
        ObjectId commit = gen.getSourceCommit();
        checkState(path != null && author != null && commit != null);

        PooledCommit pc = commits.get(commit);
        if (pc == null) {
            pc = new PooledCommit(commit.copy(), new PersonIdent(strings.intern(author.getName()),
                    strings.intern(author.getEmailAddress()), author.getWhen(), author.getTimeZone()));
            commits.put(pc.commit, pc);/* www  . j a v a2 s  .  c  om*/
        }
        path = strings.intern(path);
        commit = pc.commit;
        author = pc.author;
        regions.add(new Region(path, commit, author, gen.getResultStart(), gen.getResultEnd()));
    }
    Collections.sort(regions);

    // Fill in any gaps left by bugs in JGit, since rendering code assumes the
    // full set of contiguous regions.
    List<Region> result = Lists.newArrayListWithExpectedSize(regions.size());
    Region last = null;
    for (Region r : regions) {
        if (last != null) {
            checkState(last.getEnd() <= r.getStart());
            if (last.getEnd() < r.getStart()) {
                result.add(new Region(null, null, null, last.getEnd(), r.getStart()));
            }
        }
        result.add(r);
        last = r;
    }
    if (last != null && last.getEnd() != lineCount) {
        result.add(new Region(null, null, null, last.getEnd(), lineCount));
    }

    return ImmutableList.copyOf(result);
}

From source file:de.learnlib.algorithms.dhc.mealy.MealyDHC.java

@Override
public void startLearning() {
    // initialize structure to store state output signatures
    Map<List<Word<O>>, Integer> signatures = new HashMap<>();

    // set up new hypothesis machine
    hypothesis = new CompactMealy<>(alphabet);

    // initialize exploration queue
    Queue<QueueElement<I, O>> queue = new ArrayDeque<>();

    // initialize storage for access sequences
    accessSequences = hypothesis.createDynamicStateMapping();

    // first element to be explored represents the initial state with no predecessor
    queue.add(new QueueElement<I, O>(null, null, null, null));

    Interner<Word<O>> deduplicator = Interners.newStrongInterner();

    while (!queue.isEmpty()) {
        // get element to be explored from queue
        QueueElement<I, O> elem = queue.poll();

        // determine access sequence for state
        Word<I> access = assembleAccessSequence(elem);

        // assemble queries
        ArrayList<DefaultQuery<I, Word<O>>> queries = new ArrayList<>(splitters.size());
        for (Word<I> suffix : splitters) {
            queries.add(new DefaultQuery<I, Word<O>>(access, suffix));
        }//from ww  w . j a va  2s  .  co m

        // retrieve answers
        oracle.processQueries(queries);

        // assemble output signature
        List<Word<O>> sig = new ArrayList<>(splitters.size());
        for (DefaultQuery<I, Word<O>> query : queries) {
            sig.add(deduplicator.intern(query.getOutput()));
        }

        Integer sibling = signatures.get(sig);

        if (sibling != null) {
            // this element does not possess a new output signature
            // create a transition from parent state to sibling
            hypothesis.addTransition(elem.parentState, elem.transIn, sibling, elem.transOut);
        } else {
            // this is actually an observably distinct state! Progress!
            // Create state and connect via transition to parent
            Integer state = elem.parentElement == null ? hypothesis.addInitialState() : hypothesis.addState();
            if (elem.parentElement != null) {
                hypothesis.addTransition(elem.parentState, elem.transIn, state, elem.transOut);
            }
            signatures.put(sig, state);
            accessSequences.put(state, elem);

            scheduleSuccessors(elem, state, queue, sig);
        }
    }
}

From source file:com.google.debugging.sourcemap.SourceMapConsumerV1.java

/**
 * Parse the file mappings section of the source map file.  This maps the
 * ids to the filename, line number and column number in the original
 * files./*from   w w  w .  j a v a 2 s  . com*/
 * @param parser The parser to get the data from.
 * @param maxID The maximum id found in the character mapping section.
 */
private void parseFileMappings(ParseState parser, int maxID) throws SourceMapParseException, JSONException {
    // ['d.js', 3, 78, 'foo']
    // Intern the strings to save memory.
    Interner<String> interner = Interners.newStrongInterner();
    ImmutableList.Builder<SourceFile> mappingsBuilder = ImmutableList.builder();

    // Setup all the arrays to keep track of the various details about the
    // source file.
    ArrayList<Byte> lineOffsets = Lists.newArrayList();
    ArrayList<Short> columns = Lists.newArrayList();
    ArrayList<String> identifiers = Lists.newArrayList();

    // The indexes and details about the current position in the file to do
    // diffs against.
    String currentFile = null;
    int lastLine = -1;
    int startLine = -1;
    int startMapId = -1;
    for (int mappingId = 0; mappingId <= maxID; ++mappingId) {
        String currentLine = parser.readLine();
        JSONArray mapArray = new JSONArray(currentLine);
        if (mapArray.length() < 3) {
            parser.fail("Invalid mapping array");
        }

        // Split up the file and directory names to reduce memory usage.
        String myFile = mapArray.getString(0);
        int line = mapArray.getInt(1);
        if (!myFile.equals(currentFile) || (line - lastLine) > Byte.MAX_VALUE
                || (line - lastLine) < Byte.MIN_VALUE) {
            if (currentFile != null) {
                FileName dirFile = splitFileName(interner, currentFile);
                SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir)
                        .setFileName(dirFile.name).setStartLine(startLine).setStartMapId(startMapId)
                        .setLineOffsets(lineOffsets).setColumns(columns).setIdentifiers(identifiers);
                mappingsBuilder.add(builder.build());
            }
            // Reset all the positions back to the start and clear out the arrays
            // to start afresh.
            currentFile = myFile;
            startLine = line;
            lastLine = line;
            startMapId = mappingId;
            columns.clear();
            lineOffsets.clear();
            identifiers.clear();
        }
        // We need to add on the columns and identifiers for all the lines, even
        // for the first line.
        lineOffsets.add((byte) (line - lastLine));
        columns.add((short) mapArray.getInt(2));
        identifiers.add(interner.intern(mapArray.optString(3, "")));
        lastLine = line;
    }
    if (currentFile != null) {
        FileName dirFile = splitFileName(interner, currentFile);
        SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir).setFileName(dirFile.name)
                .setStartLine(startLine).setStartMapId(startMapId).setLineOffsets(lineOffsets)
                .setColumns(columns).setIdentifiers(identifiers);
        mappingsBuilder.add(builder.build());
    }
    mappings = mappingsBuilder.build();
}

From source file:org.apache.hadoop.hive.ql.parse.TaskCompiler.java

@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks,
        final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {

    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();

    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();

    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();

    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }/*from ww  w . ja  v  a 2  s.  c o m*/
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // If the serializer is ThriftJDBCBinarySerDe, then it requires that NoOpFetchFormatter be used. But when it isn't,
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }

        return;
    }

    optimizeOperatorPlan(pCtx, inputs, outputs);

    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }

        LoadFileDesc loadFileDesc = loadFileWork.get(0);

        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();

        String resFileFormat;
        TableDesc resultTab = pCtx.getFetchTableDesc();
        if (resultTab == null) {
            resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()
                    && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat,
                        ThriftJDBCBinarySerDe.class);
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            } else {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat,
                        LazySimpleSerDe.class);
            }
        } else {
            if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB)
                    .equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            }
        }

        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab
                && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())
                && HiveConf.getBoolVar(conf,
                        HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }

        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));

        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit
                    + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();

        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
            mvTask.add(tsk);
            // Check to see if we are stale'ing any indexes and auto-update them if we want
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
                IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf);
                try {
                    List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
                    for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
                        tsk.addDependentTask(updateTask);
                    }
                } catch (HiveException e) {
                    console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync");
                }
            }
        }

        boolean oneLoadFile = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                assert (oneLoadFile); // should not have more than 1 load file for
                // CTAS
                // make the movetask's destination directory the table's destination.
                Path location;
                String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation()
                        : pCtx.getCreateViewDesc().getLocation();
                if (loc == null) {
                    // get the default location
                    Path targetPath;
                    try {
                        String protoName = null;
                        if (pCtx.getQueryProperties().isCTAS()) {
                            protoName = pCtx.getCreateTable().getTableName();
                        } else if (pCtx.getQueryProperties().isMaterializedView()) {
                            protoName = pCtx.getCreateViewDesc().getViewName();
                        }
                        String[] names = Utilities.getDbTableName(protoName);
                        if (!db.databaseExists(names[0])) {
                            throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
                        }
                        Warehouse wh = new Warehouse(conf);
                        targetPath = wh.getDefaultTablePath(db.getDatabase(names[0]), names[1]);
                    } catch (HiveException e) {
                        throw new SemanticException(e);
                    } catch (MetaException e) {
                        throw new SemanticException(e);
                    }

                    location = targetPath;
                } else {
                    location = new Path(loc);
                }
                lfd.setTargetDir(location);

                oneLoadFile = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf));
        }
    }

    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);

    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }

    // If a task contains an operator which instructs bucketizedhiveinputformat
    // to be used, please do so
    for (Task<? extends Serializable> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }

    optimizeTaskPlan(rootTasks, pCtx, ctx);

    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
        getLeafTasks(rootTasks, leafTasks);
        if (isCStats) {
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0);
        } else {
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx
                    .getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(),
                            columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(),
                            columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit,
                            numBitVector);
                }
            }
        }
    }

    decideExecMode(rootTasks, ctx, globalLimitCtx);

    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();

        crtTblDesc.validate(conf);

        Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc),
                conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc),
                conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
    }

    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }

    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
        List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks);
        for (ExecDriver tsk : mrTasks) {
            tsk.setRetryCmdWhenFail(true);
        }
        List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
        for (SparkTask sparkTask : sparkTasks) {
            sparkTask.setRetryCmdWhenFail(true);
        }
    }

    Interner<TableDesc> interner = Interners.newStrongInterner();
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.internTableDesc(rootTask, interner);
        GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
    }
}