Example usage for com.google.common.collect Interners newStrongInterner

List of usage examples for com.google.common.collect Interners newStrongInterner

Introduction

In this page you can find the example usage for com.google.common.collect Interners newStrongInterner.

Prototype

public static <E> Interner<E> newStrongInterner() 

Source Link

Document

Returns a new thread-safe interner which retains a strong reference to each instance it has interned, thus preventing these instances from being garbage-collected.

Usage

From source file:com.lithium.flow.geo.Ip2LocationGeoReader.java

@Override
@Nonnull/* w ww . ja v a  2  s .  com*/
public List<GeoBlock> readBlocks(@Nonnull File file) throws IOException {
    checkNotNull(file);

    long time = System.currentTimeMillis();
    List<GeoBlock> blocks = Lists.newArrayList();
    Interner<String> interner = Interners.newStrongInterner();
    CSVReader reader = new CSVReader(new FileReader(file));

    String[] line;
    while ((line = reader.readNext()) != null) {
        long start = Long.parseLong(line[0]);
        long end = Long.parseLong(line[1]);
        String countryCode = fixUnknown(interner.intern(line[2].toLowerCase()));
        String countryName = fixUnknown(interner.intern(WordUtils.capitalizeFully(line[3])));
        String region = interner.intern(WordUtils.capitalizeFully(line[4]));
        String city = interner.intern(WordUtils.capitalizeFully(line[5]));
        double latitude = Double.parseDouble(line[6]);
        double longitude = Double.parseDouble(line[7]);
        String postal = line.length <= 8 ? "unknown" : fixUnknown(interner.intern(line[8]));
        String timeZone = line.length <= 9 ? "unknown" : fixUnknown(interner.intern(line[9]));

        GeoDetail detail = new GeoDetail(city, region, postal, countryCode, countryName, latitude, longitude,
                timeZone);
        GeoBlock block = new GeoBlock(start, end, detail);
        blocks.add(block);
    }
    reader.close();

    time = System.currentTimeMillis() - time;
    log.info("read {} blocks in {}ms", blocks.size(), time);
    return blocks;
}

From source file:org.spf4j.zel.vm.ProgramBuilder.java

/**
 * initializes the program//from  w w  w  .  j a  va2s  .  c  om
 */
public ProgramBuilder(final MemoryBuilder staticMemBuilder) {
    this.staticMemBuilder = staticMemBuilder;
    instructions = new Instruction[DEFAULT_SIZE];
    instrNumber = 0;
    type = Program.Type.NONDETERMINISTIC;
    execType = null; //Program.ExecutionType.ASYNC;
    stringInterner = Interners.newStrongInterner();
    debugInfo = new ArrayList<>();
}

From source file:com.google.gitiles.blame.BlameCacheImpl.java

private static List<Region> loadRegions(BlameGenerator gen) throws IOException {
    Map<ObjectId, PooledCommit> commits = Maps.newHashMap();
    Interner<String> strings = Interners.newStrongInterner();
    int lineCount = gen.getResultContents().size();

    List<Region> regions = Lists.newArrayList();
    while (gen.next()) {
        String path = gen.getSourcePath();
        PersonIdent author = gen.getSourceAuthor();
        ObjectId commit = gen.getSourceCommit();
        checkState(path != null && author != null && commit != null);

        PooledCommit pc = commits.get(commit);
        if (pc == null) {
            pc = new PooledCommit(commit.copy(), new PersonIdent(strings.intern(author.getName()),
                    strings.intern(author.getEmailAddress()), author.getWhen(), author.getTimeZone()));
            commits.put(pc.commit, pc);/* www  . j a v a2 s  .  c  om*/
        }
        path = strings.intern(path);
        commit = pc.commit;
        author = pc.author;
        regions.add(new Region(path, commit, author, gen.getResultStart(), gen.getResultEnd()));
    }
    Collections.sort(regions);

    // Fill in any gaps left by bugs in JGit, since rendering code assumes the
    // full set of contiguous regions.
    List<Region> result = Lists.newArrayListWithExpectedSize(regions.size());
    Region last = null;
    for (Region r : regions) {
        if (last != null) {
            checkState(last.getEnd() <= r.getStart());
            if (last.getEnd() < r.getStart()) {
                result.add(new Region(null, null, null, last.getEnd(), r.getStart()));
            }
        }
        result.add(r);
        last = r;
    }
    if (last != null && last.getEnd() != lineCount) {
        result.add(new Region(null, null, null, last.getEnd(), lineCount));
    }

    return ImmutableList.copyOf(result);
}

From source file:de.learnlib.algorithms.dhc.mealy.MealyDHC.java

@Override
public void startLearning() {
    // initialize structure to store state output signatures
    Map<List<Word<O>>, Integer> signatures = new HashMap<>();

    // set up new hypothesis machine
    hypothesis = new CompactMealy<>(alphabet);

    // initialize exploration queue
    Queue<QueueElement<I, O>> queue = new ArrayDeque<>();

    // initialize storage for access sequences
    accessSequences = hypothesis.createDynamicStateMapping();

    // first element to be explored represents the initial state with no predecessor
    queue.add(new QueueElement<I, O>(null, null, null, null));

    Interner<Word<O>> deduplicator = Interners.newStrongInterner();

    while (!queue.isEmpty()) {
        // get element to be explored from queue
        QueueElement<I, O> elem = queue.poll();

        // determine access sequence for state
        Word<I> access = assembleAccessSequence(elem);

        // assemble queries
        ArrayList<DefaultQuery<I, Word<O>>> queries = new ArrayList<>(splitters.size());
        for (Word<I> suffix : splitters) {
            queries.add(new DefaultQuery<I, Word<O>>(access, suffix));
        }//from ww  w . j a va  2s  .  co m

        // retrieve answers
        oracle.processQueries(queries);

        // assemble output signature
        List<Word<O>> sig = new ArrayList<>(splitters.size());
        for (DefaultQuery<I, Word<O>> query : queries) {
            sig.add(deduplicator.intern(query.getOutput()));
        }

        Integer sibling = signatures.get(sig);

        if (sibling != null) {
            // this element does not possess a new output signature
            // create a transition from parent state to sibling
            hypothesis.addTransition(elem.parentState, elem.transIn, sibling, elem.transOut);
        } else {
            // this is actually an observably distinct state! Progress!
            // Create state and connect via transition to parent
            Integer state = elem.parentElement == null ? hypothesis.addInitialState() : hypothesis.addState();
            if (elem.parentElement != null) {
                hypothesis.addTransition(elem.parentState, elem.transIn, state, elem.transOut);
            }
            signatures.put(sig, state);
            accessSequences.put(state, elem);

            scheduleSuccessors(elem, state, queue, sig);
        }
    }
}

From source file:com.google.debugging.sourcemap.SourceMapConsumerV1.java

/**
 * Parse the file mappings section of the source map file.  This maps the
 * ids to the filename, line number and column number in the original
 * files./*from   w w  w .  j a v a 2 s  . com*/
 * @param parser The parser to get the data from.
 * @param maxID The maximum id found in the character mapping section.
 */
private void parseFileMappings(ParseState parser, int maxID) throws SourceMapParseException, JSONException {
    // ['d.js', 3, 78, 'foo']
    // Intern the strings to save memory.
    Interner<String> interner = Interners.newStrongInterner();
    ImmutableList.Builder<SourceFile> mappingsBuilder = ImmutableList.builder();

    // Setup all the arrays to keep track of the various details about the
    // source file.
    ArrayList<Byte> lineOffsets = Lists.newArrayList();
    ArrayList<Short> columns = Lists.newArrayList();
    ArrayList<String> identifiers = Lists.newArrayList();

    // The indexes and details about the current position in the file to do
    // diffs against.
    String currentFile = null;
    int lastLine = -1;
    int startLine = -1;
    int startMapId = -1;
    for (int mappingId = 0; mappingId <= maxID; ++mappingId) {
        String currentLine = parser.readLine();
        JSONArray mapArray = new JSONArray(currentLine);
        if (mapArray.length() < 3) {
            parser.fail("Invalid mapping array");
        }

        // Split up the file and directory names to reduce memory usage.
        String myFile = mapArray.getString(0);
        int line = mapArray.getInt(1);
        if (!myFile.equals(currentFile) || (line - lastLine) > Byte.MAX_VALUE
                || (line - lastLine) < Byte.MIN_VALUE) {
            if (currentFile != null) {
                FileName dirFile = splitFileName(interner, currentFile);
                SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir)
                        .setFileName(dirFile.name).setStartLine(startLine).setStartMapId(startMapId)
                        .setLineOffsets(lineOffsets).setColumns(columns).setIdentifiers(identifiers);
                mappingsBuilder.add(builder.build());
            }
            // Reset all the positions back to the start and clear out the arrays
            // to start afresh.
            currentFile = myFile;
            startLine = line;
            lastLine = line;
            startMapId = mappingId;
            columns.clear();
            lineOffsets.clear();
            identifiers.clear();
        }
        // We need to add on the columns and identifiers for all the lines, even
        // for the first line.
        lineOffsets.add((byte) (line - lastLine));
        columns.add((short) mapArray.getInt(2));
        identifiers.add(interner.intern(mapArray.optString(3, "")));
        lastLine = line;
    }
    if (currentFile != null) {
        FileName dirFile = splitFileName(interner, currentFile);
        SourceFile.Builder builder = SourceFile.newBuilder().setDir(dirFile.dir).setFileName(dirFile.name)
                .setStartLine(startLine).setStartMapId(startMapId).setLineOffsets(lineOffsets)
                .setColumns(columns).setIdentifiers(identifiers);
        mappingsBuilder.add(builder.build());
    }
    mappings = mappingsBuilder.build();
}

From source file:org.apache.hadoop.hive.ql.parse.TaskCompiler.java

@SuppressWarnings({ "nls", "unchecked" })
public void compile(final ParseContext pCtx, final List<Task<? extends Serializable>> rootTasks,
        final HashSet<ReadEntity> inputs, final HashSet<WriteEntity> outputs) throws SemanticException {

    Context ctx = pCtx.getContext();
    GlobalLimitCtx globalLimitCtx = pCtx.getGlobalLimitCtx();
    List<Task<MoveWork>> mvTask = new ArrayList<Task<MoveWork>>();

    List<LoadTableDesc> loadTableWork = pCtx.getLoadTableWork();
    List<LoadFileDesc> loadFileWork = pCtx.getLoadFileWork();

    boolean isCStats = pCtx.getQueryProperties().isAnalyzeRewrite();
    int outerQueryLimit = pCtx.getQueryProperties().getOuterQueryLimit();

    if (pCtx.getFetchTask() != null) {
        if (pCtx.getFetchTask().getTblDesc() == null) {
            return;
        }/*from ww  w . ja  v  a 2  s.  c o m*/
        pCtx.getFetchTask().getWork().setHiveServerQuery(SessionState.get().isHiveServerQuery());
        TableDesc resultTab = pCtx.getFetchTask().getTblDesc();
        // If the serializer is ThriftJDBCBinarySerDe, then it requires that NoOpFetchFormatter be used. But when it isn't,
        // then either the ThriftFormatter or the DefaultFetchFormatter should be used.
        if (!resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
            if (SessionState.get().isHiveServerQuery()) {
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, ThriftFormatter.class.getName());
            } else {
                String formatterName = conf.get(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER);
                if (formatterName == null || formatterName.isEmpty()) {
                    conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, DefaultFetchFormatter.class.getName());
                }
            }
        }

        return;
    }

    optimizeOperatorPlan(pCtx, inputs, outputs);

    /*
     * In case of a select, use a fetch task instead of a move task.
     * If the select is from analyze table column rewrite, don't create a fetch task. Instead create
     * a column stats task later.
     */
    if (pCtx.getQueryProperties().isQuery() && !isCStats) {
        if ((!loadTableWork.isEmpty()) || (loadFileWork.size() != 1)) {
            throw new SemanticException(ErrorMsg.INVALID_LOAD_TABLE_FILE_WORK.getMsg());
        }

        LoadFileDesc loadFileDesc = loadFileWork.get(0);

        String cols = loadFileDesc.getColumns();
        String colTypes = loadFileDesc.getColumnTypes();

        String resFileFormat;
        TableDesc resultTab = pCtx.getFetchTableDesc();
        if (resultTab == null) {
            resFileFormat = HiveConf.getVar(conf, HiveConf.ConfVars.HIVEQUERYRESULTFILEFORMAT);
            if (SessionState.get().getIsUsingThriftJDBCBinarySerDe()
                    && (resFileFormat.equalsIgnoreCase("SequenceFile"))) {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat,
                        ThriftJDBCBinarySerDe.class);
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            } else {
                resultTab = PlanUtils.getDefaultQueryOutputTableDesc(cols, colTypes, resFileFormat,
                        LazySimpleSerDe.class);
            }
        } else {
            if (resultTab.getProperties().getProperty(serdeConstants.SERIALIZATION_LIB)
                    .equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())) {
                // Set the fetch formatter to be a no-op for the ListSinkOperator, since we'll
                // read formatted thrift objects from the output SequenceFile written by Tasks.
                conf.set(SerDeUtils.LIST_SINK_OUTPUT_FORMATTER, NoOpFetchFormatter.class.getName());
            }
        }

        FetchWork fetch = new FetchWork(loadFileDesc.getSourcePath(), resultTab, outerQueryLimit);
        boolean isHiveServerQuery = SessionState.get().isHiveServerQuery();
        fetch.setHiveServerQuery(isHiveServerQuery);
        fetch.setSource(pCtx.getFetchSource());
        fetch.setSink(pCtx.getFetchSink());
        if (isHiveServerQuery && null != resultTab
                && resultTab.getSerdeClassName().equalsIgnoreCase(ThriftJDBCBinarySerDe.class.getName())
                && HiveConf.getBoolVar(conf,
                        HiveConf.ConfVars.HIVE_SERVER2_THRIFT_RESULTSET_SERIALIZE_IN_TASKS)) {
            fetch.setIsUsingThriftJDBCBinarySerDe(true);
        } else {
            fetch.setIsUsingThriftJDBCBinarySerDe(false);
        }

        pCtx.setFetchTask((FetchTask) TaskFactory.get(fetch, conf));

        // For the FetchTask, the limit optimization requires we fetch all the rows
        // in memory and count how many rows we get. It's not practical if the
        // limit factor is too big
        int fetchLimit = HiveConf.getIntVar(conf, HiveConf.ConfVars.HIVELIMITOPTMAXFETCH);
        if (globalLimitCtx.isEnable() && globalLimitCtx.getGlobalLimit() > fetchLimit) {
            LOG.info("For FetchTask, LIMIT " + globalLimitCtx.getGlobalLimit() + " > " + fetchLimit
                    + ". Doesn't qualify limit optimization.");
            globalLimitCtx.disableOpt();

        }
        if (outerQueryLimit == 0) {
            // Believe it or not, some tools do generate queries with limit 0 and than expect
            // query to run quickly. Lets meet their requirement.
            LOG.info("Limit 0. No query execution needed.");
            return;
        }
    } else if (!isCStats) {
        for (LoadTableDesc ltd : loadTableWork) {
            Task<MoveWork> tsk = TaskFactory.get(new MoveWork(null, null, ltd, null, false), conf);
            mvTask.add(tsk);
            // Check to see if we are stale'ing any indexes and auto-update them if we want
            if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVEINDEXAUTOUPDATE)) {
                IndexUpdater indexUpdater = new IndexUpdater(loadTableWork, inputs, conf);
                try {
                    List<Task<? extends Serializable>> indexUpdateTasks = indexUpdater.generateUpdateTasks();
                    for (Task<? extends Serializable> updateTask : indexUpdateTasks) {
                        tsk.addDependentTask(updateTask);
                    }
                } catch (HiveException e) {
                    console.printInfo("WARNING: could not auto-update stale indexes, which are not in sync");
                }
            }
        }

        boolean oneLoadFile = true;
        for (LoadFileDesc lfd : loadFileWork) {
            if (pCtx.getQueryProperties().isCTAS() || pCtx.getQueryProperties().isMaterializedView()) {
                assert (oneLoadFile); // should not have more than 1 load file for
                // CTAS
                // make the movetask's destination directory the table's destination.
                Path location;
                String loc = pCtx.getQueryProperties().isCTAS() ? pCtx.getCreateTable().getLocation()
                        : pCtx.getCreateViewDesc().getLocation();
                if (loc == null) {
                    // get the default location
                    Path targetPath;
                    try {
                        String protoName = null;
                        if (pCtx.getQueryProperties().isCTAS()) {
                            protoName = pCtx.getCreateTable().getTableName();
                        } else if (pCtx.getQueryProperties().isMaterializedView()) {
                            protoName = pCtx.getCreateViewDesc().getViewName();
                        }
                        String[] names = Utilities.getDbTableName(protoName);
                        if (!db.databaseExists(names[0])) {
                            throw new SemanticException("ERROR: The database " + names[0] + " does not exist.");
                        }
                        Warehouse wh = new Warehouse(conf);
                        targetPath = wh.getDefaultTablePath(db.getDatabase(names[0]), names[1]);
                    } catch (HiveException e) {
                        throw new SemanticException(e);
                    } catch (MetaException e) {
                        throw new SemanticException(e);
                    }

                    location = targetPath;
                } else {
                    location = new Path(loc);
                }
                lfd.setTargetDir(location);

                oneLoadFile = false;
            }
            mvTask.add(TaskFactory.get(new MoveWork(null, null, null, lfd, false), conf));
        }
    }

    generateTaskTree(rootTasks, pCtx, mvTask, inputs, outputs);

    // For each task, set the key descriptor for the reducer
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.setKeyAndValueDescForTaskTree(rootTask);
    }

    // If a task contains an operator which instructs bucketizedhiveinputformat
    // to be used, please do so
    for (Task<? extends Serializable> rootTask : rootTasks) {
        setInputFormat(rootTask);
    }

    optimizeTaskPlan(rootTasks, pCtx, ctx);

    /*
     * If the query was the result of analyze table column compute statistics rewrite, create
     * a column stats task instead of a fetch task to persist stats to the metastore.
     */
    if (isCStats || !pCtx.getColumnStatsAutoGatherContexts().isEmpty()) {
        Set<Task<? extends Serializable>> leafTasks = new LinkedHashSet<Task<? extends Serializable>>();
        getLeafTasks(rootTasks, leafTasks);
        if (isCStats) {
            genColumnStatsTask(pCtx.getAnalyzeRewrite(), loadFileWork, leafTasks, outerQueryLimit, 0);
        } else {
            for (ColumnStatsAutoGatherContext columnStatsAutoGatherContext : pCtx
                    .getColumnStatsAutoGatherContexts()) {
                if (!columnStatsAutoGatherContext.isInsertInto()) {
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(),
                            columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit, 0);
                } else {
                    int numBitVector;
                    try {
                        numBitVector = HiveStatsUtils.getNumBitVectorsForNDVEstimation(conf);
                    } catch (Exception e) {
                        throw new SemanticException(e.getMessage());
                    }
                    genColumnStatsTask(columnStatsAutoGatherContext.getAnalyzeRewrite(),
                            columnStatsAutoGatherContext.getLoadFileWork(), leafTasks, outerQueryLimit,
                            numBitVector);
                }
            }
        }
    }

    decideExecMode(rootTasks, ctx, globalLimitCtx);

    if (pCtx.getQueryProperties().isCTAS() && !pCtx.getCreateTable().isMaterialization()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateTableDesc crtTblDesc = pCtx.getCreateTable();

        crtTblDesc.validate(conf);

        Task<? extends Serializable> crtTblTask = TaskFactory.get(new DDLWork(inputs, outputs, crtTblDesc),
                conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtTblTask);
    } else if (pCtx.getQueryProperties().isMaterializedView()) {
        // generate a DDL task and make it a dependent task of the leaf
        CreateViewDesc viewDesc = pCtx.getCreateViewDesc();
        Task<? extends Serializable> crtViewTask = TaskFactory.get(new DDLWork(inputs, outputs, viewDesc),
                conf);
        patchUpAfterCTASorMaterializedView(rootTasks, outputs, crtViewTask);
    }

    if (globalLimitCtx.isEnable() && pCtx.getFetchTask() != null) {
        LOG.info("set least row check for FetchTask: " + globalLimitCtx.getGlobalLimit());
        pCtx.getFetchTask().getWork().setLeastNumRows(globalLimitCtx.getGlobalLimit());
    }

    if (globalLimitCtx.isEnable() && globalLimitCtx.getLastReduceLimitDesc() != null) {
        LOG.info("set least row check for LimitDesc: " + globalLimitCtx.getGlobalLimit());
        globalLimitCtx.getLastReduceLimitDesc().setLeastRows(globalLimitCtx.getGlobalLimit());
        List<ExecDriver> mrTasks = Utilities.getMRTasks(rootTasks);
        for (ExecDriver tsk : mrTasks) {
            tsk.setRetryCmdWhenFail(true);
        }
        List<SparkTask> sparkTasks = Utilities.getSparkTasks(rootTasks);
        for (SparkTask sparkTask : sparkTasks) {
            sparkTask.setRetryCmdWhenFail(true);
        }
    }

    Interner<TableDesc> interner = Interners.newStrongInterner();
    for (Task<? extends Serializable> rootTask : rootTasks) {
        GenMapRedUtils.internTableDesc(rootTask, interner);
        GenMapRedUtils.deriveFinalExplainAttributes(rootTask, pCtx.getConf());
    }
}