org.sonar.server.computation.filemove.FileMoveDetectionStep.java Source code

Java tutorial

Introduction

Here is the source code for org.sonar.server.computation.filemove.FileMoveDetectionStep.java

Source

/*
 * SonarQube
 * Copyright (C) 2009-2016 SonarSource SA
 * mailto:contact AT sonarsource DOT com
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 3 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */
package org.sonar.server.computation.filemove;

import com.google.common.base.Predicate;
import com.google.common.base.Splitter;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.Immutable;
import org.sonar.api.resources.Qualifiers;
import org.sonar.api.utils.log.Logger;
import org.sonar.api.utils.log.Loggers;
import org.sonar.core.hash.SourceHashComputer;
import org.sonar.core.hash.SourceLinesHashesComputer;
import org.sonar.core.util.CloseableIterator;
import org.sonar.db.DbClient;
import org.sonar.db.DbSession;
import org.sonar.db.component.ComponentTreeQuery;
import org.sonar.db.component.SnapshotDto;
import org.sonar.db.source.FileSourceDto;
import org.sonar.server.computation.analysis.AnalysisMetadataHolder;
import org.sonar.server.computation.component.Component;
import org.sonar.server.computation.component.CrawlerDepthLimit;
import org.sonar.server.computation.component.DepthTraversalTypeAwareCrawler;
import org.sonar.server.computation.component.TreeRootHolder;
import org.sonar.server.computation.component.TypeAwareVisitorAdapter;
import org.sonar.server.computation.filemove.FileSimilarity.File;
import org.sonar.server.computation.snapshot.Snapshot;
import org.sonar.server.computation.source.SourceLinesRepository;
import org.sonar.server.computation.step.ComputationStep;

import static com.google.common.base.Splitter.on;
import static com.google.common.collect.FluentIterable.from;
import static java.util.Arrays.asList;
import static java.util.Collections.singletonList;
import static org.sonar.server.computation.component.ComponentVisitor.Order.POST_ORDER;

public class FileMoveDetectionStep implements ComputationStep {
    protected static final int MIN_REQUIRED_SCORE = 85;
    private static final Logger LOG = Loggers.get(FileMoveDetectionStep.class);
    private static final List<String> FILE_QUALIFIERS = asList(Qualifiers.FILE, Qualifiers.UNIT_TEST_FILE);
    private static final List<String> SORT_FIELDS = singletonList("name");
    private static final Splitter LINES_HASHES_SPLITTER = on('\n');

    private final AnalysisMetadataHolder analysisMetadataHolder;
    private final TreeRootHolder rootHolder;
    private final DbClient dbClient;
    private final SourceLinesRepository sourceLinesRepository;
    private final FileSimilarity fileSimilarity;
    private final MutableMovedFilesRepository movedFilesRepository;

    public FileMoveDetectionStep(AnalysisMetadataHolder analysisMetadataHolder, TreeRootHolder rootHolder,
            DbClient dbClient, SourceLinesRepository sourceLinesRepository, FileSimilarity fileSimilarity,
            MutableMovedFilesRepository movedFilesRepository) {
        this.analysisMetadataHolder = analysisMetadataHolder;
        this.rootHolder = rootHolder;
        this.dbClient = dbClient;
        this.sourceLinesRepository = sourceLinesRepository;
        this.fileSimilarity = fileSimilarity;
        this.movedFilesRepository = movedFilesRepository;
    }

    @Override
    public String getDescription() {
        return "Detect file moves";
    }

    @Override
    public void execute() {
        // do nothing if no files in db (first analysis)
        Snapshot baseProjectSnapshot = analysisMetadataHolder.getBaseProjectSnapshot();
        if (baseProjectSnapshot == null) {
            LOG.debug("First analysis. Do nothing.");
            return;
        }

        Map<String, DbComponent> dbFilesByKey = getDbFilesByKey(baseProjectSnapshot);
        if (dbFilesByKey.isEmpty()) {
            LOG.debug("Previous snapshot has no file. Do nothing.");
            return;
        }

        Map<String, Component> reportFilesByKey = getReportFilesByKey(this.rootHolder.getRoot());
        if (reportFilesByKey.isEmpty()) {
            LOG.debug("No files in report. Do nothing.");
            return;
        }

        Set<String> addedFileKeys = ImmutableSet
                .copyOf(Sets.difference(reportFilesByKey.keySet(), dbFilesByKey.keySet()));
        Set<String> removedFileKeys = ImmutableSet
                .copyOf(Sets.difference(dbFilesByKey.keySet(), reportFilesByKey.keySet()));

        // can find matches if at least one of the added or removed files groups is empty => abort
        if (addedFileKeys.isEmpty() || removedFileKeys.isEmpty()) {
            LOG.debug("Either no files added or no files removed. Do nothing.");
            return;
        }

        // retrieve file data from report
        Map<String, File> reportFileSourcesByKey = getReportFileSourcesByKey(reportFilesByKey, addedFileKeys);

        // compute score matrix
        ScoreMatrix scoreMatrix = computeScoreMatrix(dbFilesByKey, removedFileKeys, reportFileSourcesByKey);
        printIfDebug(scoreMatrix);

        // not a single match with score higher than MIN_REQUIRED_SCORE => abort
        if (scoreMatrix.getMaxScore() < MIN_REQUIRED_SCORE) {
            LOG.debug("max score in matrix is less than min required score (%s). Do nothing.", MIN_REQUIRED_SCORE);
            return;
        }

        MatchesByScore matchesByScore = MatchesByScore.create(scoreMatrix);

        ElectedMatches electedMatches = electMatches(removedFileKeys, reportFileSourcesByKey, matchesByScore);

        registerMatches(dbFilesByKey, reportFilesByKey, electedMatches);
    }

    private void registerMatches(Map<String, DbComponent> dbFilesByKey, Map<String, Component> reportFilesByKey,
            ElectedMatches electedMatches) {
        for (Match validatedMatch : electedMatches) {
            movedFilesRepository.setOriginalFile(reportFilesByKey.get(validatedMatch.getReportKey()),
                    toOriginalFile(dbFilesByKey.get(validatedMatch.getDbKey())));
            LOG.info("File move found: " + validatedMatch);
        }
    }

    private Map<String, DbComponent> getDbFilesByKey(Snapshot baseProjectSnapshot) {
        try (DbSession dbSession = dbClient.openSession(false)) {
            // FIXME no need to use such a complex query, joining on SNAPSHOTS and retrieving all column of table PROJECTS, replace with dedicated mapper method
            return from(dbClient.componentDao().selectAllChildren(dbSession,
                    ComponentTreeQuery.builder()
                            .setBaseSnapshot(new SnapshotDto().setId(baseProjectSnapshot.getId())
                                    .setRootId(baseProjectSnapshot.getId()))
                            .setQualifiers(FILE_QUALIFIERS).setSortFields(SORT_FIELDS)
                            .setPageSize(Integer.MAX_VALUE).setPage(1).build()))
                                    .transform(componentDto -> new DbComponent(componentDto.getId(),
                                            componentDto.key(), componentDto.uuid(), componentDto.path()))
                                    .uniqueIndex(DbComponent::getKey);
        }
    }

    private static Map<String, Component> getReportFilesByKey(Component root) {
        final ImmutableMap.Builder<String, Component> builder = ImmutableMap.builder();
        new DepthTraversalTypeAwareCrawler(new TypeAwareVisitorAdapter(CrawlerDepthLimit.FILE, POST_ORDER) {
            @Override
            public void visitFile(Component file) {
                builder.put(file.getKey(), file);
            }
        }).visit(root);
        return builder.build();
    }

    private Map<String, File> getReportFileSourcesByKey(Map<String, Component> reportFilesByKey,
            Set<String> addedFileKeys) {
        ImmutableMap.Builder<String, File> builder = ImmutableMap.builder();
        for (String fileKey : addedFileKeys) {
            // FIXME computation of sourceHash and lineHashes might be done multiple times for some files: here, in ComputeFileSourceData, in
            // SourceHashRepository
            Component component = reportFilesByKey.get(fileKey);
            SourceLinesHashesComputer linesHashesComputer = new SourceLinesHashesComputer();
            SourceHashComputer sourceHashComputer = new SourceHashComputer();
            try (CloseableIterator<String> lineIterator = sourceLinesRepository.readLines(component)) {
                while (lineIterator.hasNext()) {
                    String line = lineIterator.next();
                    linesHashesComputer.addLine(line);
                    sourceHashComputer.addLine(line, lineIterator.hasNext());
                }
            }
            builder.put(fileKey, new File(component.getReportAttributes().getPath(), sourceHashComputer.getHash(),
                    linesHashesComputer.getLineHashes()));
        }
        return builder.build();
    }

    private ScoreMatrix computeScoreMatrix(Map<String, DbComponent> dtosByKey, Set<String> dbFileKeys,
            Map<String, File> reportFileSourcesByKey) {
        int[][] scoreMatrix = new int[dbFileKeys.size()][reportFileSourcesByKey.size()];
        int maxScore = 0;

        try (DbSession dbSession = dbClient.openSession(false)) {
            int dbFileIndex = 0;
            for (String removedFileKey : dbFileKeys) {
                File fileInDb = getFile(dbSession, dtosByKey.get(removedFileKey));
                if (fileInDb == null) {
                    continue;
                }

                int reportFileIndex = 0;
                for (Map.Entry<String, File> reportFileSourceAndKey : reportFileSourcesByKey.entrySet()) {
                    File unmatchedFile = reportFileSourceAndKey.getValue();
                    int score = fileSimilarity.score(fileInDb, unmatchedFile);
                    scoreMatrix[dbFileIndex][reportFileIndex] = score;
                    if (score > maxScore) {
                        maxScore = score;
                    }
                    reportFileIndex++;
                }
                dbFileIndex++;
            }
        }

        return new ScoreMatrix(dbFileKeys, reportFileSourcesByKey, scoreMatrix, maxScore);
    }

    @CheckForNull
    private File getFile(DbSession dbSession, DbComponent dbComponent) {
        FileSourceDto fileSourceDto = dbClient.fileSourceDao().selectSourceByFileUuid(dbSession,
                dbComponent.getUuid());
        if (fileSourceDto == null) {
            return null;
        }
        return new File(dbComponent.getPath(), fileSourceDto.getSrcHash(),
                LINES_HASHES_SPLITTER.splitToList(fileSourceDto.getLineHashes()));
    }

    private static void printIfDebug(ScoreMatrix scoreMatrix) {
        if (LOG.isDebugEnabled()) {
            LOG.debug("ScoreMatrix:\n" + scoreMatrix.toCsv(';'));
        }
    }

    private static ElectedMatches electMatches(Set<String> dbFileKeys, Map<String, File> reportFileSourcesByKey,
            MatchesByScore matchesByScore) {
        ElectedMatches electedMatches = new ElectedMatches(matchesByScore, dbFileKeys, reportFileSourcesByKey);
        Multimap<String, Match> matchesPerFileForScore = ArrayListMultimap.create();
        for (List<Match> matches : matchesByScore) {
            // no match for this score value, ignore
            if (matches == null) {
                continue;
            }

            List<Match> matchesToValidate = electedMatches.filter(matches);
            if (matches.isEmpty()) {
                continue;
            }
            if (matches.size() == 1) {
                Match match = matches.get(0);
                electedMatches.add(match);
            } else {
                matchesPerFileForScore.clear();
                for (Match match : matches) {
                    matchesPerFileForScore.put(match.getDbKey(), match);
                    matchesPerFileForScore.put(match.getReportKey(), match);
                }
                // validate non ambiguous matches (ie. the match is the only match of either the db file and the report file)
                for (Match match : matchesToValidate) {
                    int dbFileMatchesCount = matchesPerFileForScore.get(match.getDbKey()).size();
                    int reportFileMatchesCount = matchesPerFileForScore.get(match.getReportKey()).size();
                    if (dbFileMatchesCount == 1 && reportFileMatchesCount == 1) {
                        electedMatches.add(match);
                    }
                }
            }
        }
        return electedMatches;
    }

    private static MovedFilesRepository.OriginalFile toOriginalFile(DbComponent dbComponent) {
        return new MovedFilesRepository.OriginalFile(dbComponent.getId(), dbComponent.getUuid(),
                dbComponent.getKey());
    }

    @Immutable
    private static final class DbComponent {
        private final long id;
        private final String key;
        private final String uuid;
        private final String path;

        private DbComponent(long id, String key, String uuid, String path) {
            this.id = id;
            this.key = key;
            this.uuid = uuid;
            this.path = path;
        }

        public long getId() {
            return id;
        }

        public String getKey() {
            return key;
        }

        public String getUuid() {
            return uuid;
        }

        public String getPath() {
            return path;
        }
    }

    private static class ElectedMatches implements Iterable<Match> {
        private final List<Match> matches;
        private final Set<String> matchedFileKeys;
        private final Predicate<Match> notAlreadyMatched = new Predicate<Match>() {
            @Override
            public boolean apply(@Nonnull Match input) {
                return !(matchedFileKeys.contains(input.getDbKey())
                        || matchedFileKeys.contains(input.getReportKey()));
            }
        };

        public ElectedMatches(MatchesByScore matchesByScore, Set<String> dbFileKeys,
                Map<String, File> reportFileSourcesByKey) {
            this.matches = new ArrayList<>(matchesByScore.getSize());
            this.matchedFileKeys = new HashSet<>(dbFileKeys.size() + reportFileSourcesByKey.size());
        }

        public void add(Match match) {
            matches.add(match);
            matchedFileKeys.add(match.getDbKey());
            matchedFileKeys.add(match.getReportKey());
        }

        public List<Match> filter(Iterable<Match> matches) {
            return from(matches).filter(notAlreadyMatched).toList();
        }

        @Override
        public Iterator<Match> iterator() {
            return matches.iterator();
        }
    }
}