diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-09-27 00:59:32 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2020-09-27 01:31:59 -0400 |
commit | 6e968dccde4c8b7a9a756656ee736139f2905b0a (patch) | |
tree | 5dbb908459bf9da7671eb256468b407c4c492140 | |
parent | 8a8b6d98e6d02c2fcfcbb7d150f2649f4e9a68db (diff) | |
download | grokmirror-6e968dccde4c8b7a9a756656ee736139f2905b0a.tar.gz |
Add trial obstrepo_merge_strategy feature
Try a different strategy for merging related repositories. Sometimes we
don't want to be super exact when finding siblings, as this would result
in much greater savings.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | grokmirror/__init__.py | 15 | ||||
-rwxr-xr-x | grokmirror/fsck.py | 9 |
2 files changed, 20 insertions, 4 deletions
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py index 744ee28..2774994 100644 --- a/grokmirror/__init__.py +++ b/grokmirror/__init__.py @@ -558,11 +558,20 @@ def find_siblings(fullpath, my_roots, known_roots, exact=False): siblings = set() for gitpath, gitroots in known_roots.items(): # Of course we're going to match ourselves - if fullpath == gitpath or not my_roots or not gitroots: + if fullpath == gitpath or not my_roots or not gitroots or not len(gitroots.intersection(my_roots)): continue - if exact and (gitroots.issubset(my_roots) or my_roots.issubset(gitroots)): + if gitroots.issubset(my_roots) or my_roots.issubset(gitroots): siblings.add(gitpath) - elif not exact and len(gitroots.intersection(my_roots)): + continue + if exact: + continue + sumdiff = len(gitroots.difference(my_roots)) + len(my_roots.difference(gitroots)) + # If we only differ by a single root, consider us siblings + if sumdiff <= 2: + siblings.add(gitpath) + continue + # If we have more roots in common than we have different, consider us siblings + if len(gitroots.intersection(my_roots)) - sumdiff > 0: siblings.add(gitpath) return siblings diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py index 87f3114..487c440 100755 --- a/grokmirror/fsck.py +++ b/grokmirror/fsck.py @@ -857,7 +857,10 @@ def fsck_mirror(config, force=False, repack_only=False, conn_only=False, my_roots = grokmirror.get_repo_roots(obstrepo) if obstrepo in amap and len(amap[obstrepo]): # Is it redundant with any other objstore repos? - siblings = grokmirror.find_siblings(obstrepo, my_roots, obst_roots, exact=True) + exact_merge = True + if config['fsck'].get('obstrepo_merge_strategy', 'exact') == 'loose': + exact_merge = False + siblings = grokmirror.find_siblings(obstrepo, my_roots, obst_roots, exact=exact_merge) if len(siblings): siblings.add(obstrepo) mdest = None @@ -909,6 +912,10 @@ def fsck_mirror(config, force=False, repack_only=False, conn_only=False, # Force full repack of merged obstrepos status[mdest]['nextcheck'] = todayiso + # Recalculate my roots + my_roots = grokmirror.get_repo_roots(obstrepo, force=True) + obst_roots[obstrepo] = my_roots + # Not an else, because the previous step may have migrated things if obstrepo not in amap or not len(amap[obstrepo]): obst_changes = True |