aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-09-30 09:11:21 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2020-09-30 09:11:21 -0400
commit2fcdd2df10bb6b8bff695fa5cc1b87d108590847 (patch)
tree69f15bb6c1ad3a1c2b1236d304411aa4d7ae298f
parent8a72c8865619b68a8c200e43c9f1e476cf846b58 (diff)
downloadgrokmirror-2fcdd2df10bb6b8bff695fa5cc1b87d108590847.tar.gz
Use faster dir searching algorithm
Switch back to the os.walk algorithm because it allows us more control over the dirs we find and avoid needlessly recursing into the git dirs. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--grokmirror/__init__.py65
-rwxr-xr-xgrokmirror/fsck.py2
2 files changed, 36 insertions, 31 deletions
diff --git a/grokmirror/__init__.py b/grokmirror/__init__.py
index 0a25b84..999e9c0 100644
--- a/grokmirror/__init__.py
+++ b/grokmirror/__init__.py
@@ -738,7 +738,7 @@ def is_obstrepo(fullpath, obstdir):
return fullpath.find(obstdir) == 0
-def find_all_gitdirs(toplevel, ignore=None, normalize=False, exclude_objstore=True, flat=False):
+def find_all_gitdirs(toplevel, ignore=None, normalize=False, exclude_objstore=True):
global _alt_repo_map
if _alt_repo_map is None:
_alt_repo_map = dict()
@@ -752,39 +752,44 @@ def find_all_gitdirs(toplevel, ignore=None, normalize=False, exclude_objstore=Tr
logger.info(' search: finding all repos in %s', toplevel)
logger.debug('Ignore list: %s', ' '.join(ignore))
gitdirs = set()
- tp = pathlib.Path(toplevel)
- if flat:
- globpatt = '*.git'
- else:
- globpatt = '**/*.git'
- for subp in tp.glob(globpatt):
- # Should we ignore this dir?
- ignored = False
- for ignoreglob in ignore:
- if subp.match(ignoreglob):
- ignored = True
- break
- if ignored:
- continue
- fullpath = subp.resolve().as_posix()
- if not is_bare_git_repo(fullpath):
+ for root, dirs, files in os.walk(toplevel, topdown=True):
+ if not len(dirs):
continue
- if exclude_objstore and os.path.exists(os.path.join(fullpath, 'grokmirror.objstore')):
- continue
- if normalize:
- fullpath = os.path.realpath(fullpath)
- logger.debug('Found %s', fullpath)
- if fullpath not in gitdirs:
+ torm = set()
+ for name in dirs:
+ fullpath = os.path.join(root, name)
+ # Should we ignore this dir?
+ ignored = False
+ for ignoredir in ignore:
+ if fnmatch.fnmatch(fullpath, ignoredir):
+ torm.add(name)
+ ignored = True
+ break
+ if ignored:
+ continue
+ if not is_bare_git_repo(fullpath):
+ continue
+ if exclude_objstore and os.path.exists(os.path.join(fullpath, 'grokmirror.objstore')):
+ continue
+ if normalize:
+ fullpath = os.path.realpath(fullpath)
+
+ logger.debug('Found %s', os.path.join(root, name))
gitdirs.add(fullpath)
+ torm.add(name)
- if build_amap:
- altrepo = get_altrepo(fullpath)
- if not altrepo:
- continue
- if altrepo not in _alt_repo_map:
- _alt_repo_map[altrepo] = set()
- _alt_repo_map[altrepo].add(fullpath)
+ if build_amap:
+ altrepo = get_altrepo(fullpath)
+ if not altrepo:
+ continue
+ if altrepo not in _alt_repo_map:
+ _alt_repo_map[altrepo] = set()
+ _alt_repo_map[altrepo].add(fullpath)
+
+ for name in torm:
+ # don't recurse into the found *.git dirs
+ dirs.remove(name)
return gitdirs
diff --git a/grokmirror/fsck.py b/grokmirror/fsck.py
index 411e8e7..c4703cf 100755
--- a/grokmirror/fsck.py
+++ b/grokmirror/fsck.py
@@ -983,7 +983,7 @@ def fsck_mirror(config, force=False, repack_only=False, conn_only=False,
grokmirror.manifest_lock(manifile)
manifest = grokmirror.read_manifest(manifile)
- obstrepos = grokmirror.find_all_gitdirs(obstdir, normalize=True, exclude_objstore=False, flat=True)
+ obstrepos = grokmirror.find_all_gitdirs(obstdir, normalize=True, exclude_objstore=False)
analyzed = 0
queued = 0