aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-09-27 00:59:32 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-09-27 01:31:59 -0400
commit6e968dccde4c8b7a9a756656ee736139f2905b0a (patch)
tree5dbb908459bf9da7671eb256468b407c4c492140
parent8a8b6d98e6d02c2fcfcbb7d150f2649f4e9a68db (diff)
downloadgrokmirror-6e968dccde4c8b7a9a756656ee736139f2905b0a.tar.gz
Add trial obstrepo_merge_strategy feature
Try a different strategy for merging related repositories. Sometimes we don't want to be super exact when finding siblings, as this would result in much greater savings. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--grokmirror/__init__.py15
-rwxr-xr-xgrokmirror/fsck.py9
2 files changed, 20 insertions, 4 deletions
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index 744ee28..2774994 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -558,11 +558,20 @@ def find_siblings(fullpath, my_roots, known_roots, exact=False):
siblings = set()
for gitpath, gitroots in known_roots.items():
# Of course we're going to match ourselves
- if fullpath == gitpath or not my_roots or not gitroots:
+ if fullpath == gitpath or not my_roots or not gitroots or not len(gitroots.intersection(my_roots)):
continue
- if exact and (gitroots.issubset(my_roots) or my_roots.issubset(gitroots)):
+ if gitroots.issubset(my_roots) or my_roots.issubset(gitroots):
siblings.add(gitpath)
- elif not exact and len(gitroots.intersection(my_roots)):
+ continue
+ if exact:
+ continue
+ sumdiff = len(gitroots.difference(my_roots)) + len(my_roots.difference(gitroots))
+ # If we only differ by a single root, consider us siblings
+ if sumdiff <= 2:
+ siblings.add(gitpath)
+ continue
+ # If we have more roots in common than we have different, consider us siblings
+ if len(gitroots.intersection(my_roots)) - sumdiff > 0:
siblings.add(gitpath)
return siblings
diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py
index 87f3114..487c440 100755
--- a/grokmirror/fsck.py
+++ b/grokmirror/fsck.py
@@ -857,7 +857,10 @@ def fsck_mirror(config, force=False, repack_only=False, conn_only=False,
my_roots = grokmirror.get_repo_roots(obstrepo)
if obstrepo in amap and len(amap[obstrepo]):
# Is it redundant with any other objstore repos?
- siblings = grokmirror.find_siblings(obstrepo, my_roots, obst_roots, exact=True)
+ exact_merge = True
+ if config['fsck'].get('obstrepo_merge_strategy', 'exact') == 'loose':
+ exact_merge = False
+ siblings = grokmirror.find_siblings(obstrepo, my_roots, obst_roots, exact=exact_merge)
if len(siblings):
siblings.add(obstrepo)
mdest = None
@@ -909,6 +912,10 @@ def fsck_mirror(config, force=False, repack_only=False, conn_only=False,
# Force full repack of merged obstrepos
status[mdest]['nextcheck'] = todayiso
+ # Recalculate my roots
+ my_roots = grokmirror.get_repo_roots(obstrepo, force=True)
+ obst_roots[obstrepo] = my_roots
+
# Not an else, because the previous step may have migrated things
if obstrepo not in amap or not len(amap[obstrepo]):
obst_changes = True