aboutsummaryrefslogtreecommitdiffstats
path: root/grokmirror.conf
blob: afb21b91bf62158b123cce5feea1ebf3c6757961 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
# Grokmirror 2.x and above have a single config file per each set
# of mirrored repos, instead of a separate repos.conf and fsck.conf
# with multiple sections.
#
# You can use ${varname} interpolation within the same section
# or ${sectname:varname} from any other section.
[core]
#
# Where are our mirrored repositories kept?
toplevel = /var/lib/git/mirror
#
# Where should we keep our manifest file?
manifest = ${toplevel}/manifest.js.gz
#
# Where should we put our log? Make sure it is logrotated,
# otherwise it will grow indefinitely.
log = ${toplevel}/log
#
# Options are "info" and "debug" for all the debug data (lots!)
loglevel = info
#
# Grokmirror version 2.x and above can automatically recognize related repositories
# by analyzing root commits. If it finds two or more related repositories, it can set
# up a unified "object storage" repo and fetch all refs from each related repository.
# For example, you can have two forks of linux.git:
# foo/bar/linux.git:
#   refs/heads/master
#   refs/heads/devbranch
#   refs/tags/v5.0-rc3
#   ...
# baz/quux/linux.git:
#   refs/heads/master
#   refs/heads/devbranch
#   refs/tags/v5.0-rc3
#   ...
# Grokmirror will set up an object storage repository and fetch all refs from
# both repositories:
# objstore/[random-guid-name].git
#    refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/master
#    refs/virtual/[sha1-of-foo/bar/linux.git:12]/heads/devbranch
#    refs/virtual/[sha1-of-foo/bar/linux.git:12]/tags/v5.0-rc3
#    ...
#    refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/master
#    refs/virtual/[sha1-of-baz/quux/linux.git:12]/heads/devbranch
#    refs/virtual/[sha1-of-baz/quux/linux.git:12]/tags/v5.0-rc3
#    ...
#
# This will dramatically improve storage on disk, as original repositories will be
# repacked to almost nothing. Grokmirror will repack the object storage repository
# with --delta-islands to help optimize packs for efficient clones.
objstore = ${toplevel}/objstore
#
# Due to the nature of git alternates, if two repositories share all their objects
# with an "object storage" repo, any object from repoA can be retrieved from repoB
# via most web UIs if someone knows the object hash.
# E.g. this is how this trick works on Github:
# https://github.com/torvalds/linux/blob/b4061a10fc29010a610ff2b5b20160d7335e69bf/drivers/hid/hid-samsung.c#L113-L118
#
# If you have private repositories that should absolutely not reveal any objects,
# add them here using shell-style globbing. They will still be set up for alternates
# if we find common roots with public repositories, but we won't fetch any objects
# from these repos into refs/virtual/*.
#
# Leave blank if you don't have any private repos (or don't offer a web UI).
#private = */private/*

# Used by grok-manifest (and others for "pretty"). These options can be
# overridden using matching command-line switches to grok-manifest.
[manifest]
# Enable to save pretty-printed js (larger and slower, but easier to debug)
pretty = no
# List of repositories to ignore -- can take multiple entries with newline+tab
# and accepts shell globbing.
ignore = /testing/*
         /private/*
# Enable to fetch objects into objstore repos after commit. This can be useful if
# someone tries to push the same objects to a sibling repository, but may significantly
# slow down post-commit hook operation, negating any speed gains. If set to no, the
# objects will be fetched during regular grok-fsck runs.
fetch_objstore = no
# Only include repositories that have git-daemon-export-ok.
check_export_ok = no

# Used by grok-pull, mostly
[remote]
# The host part of the mirror you're pulling from.
site = https://git.kernel.org
#
# Where the grok manifest is published. The following protocols
# are supported at this time:
# http:// or https:// using If-Modified-Since http header
# file:// (when manifest file is on NFS, for example)
# NB: You can no longer specify username:password as part of the URL with
#     grokmirror 2.x and above. You can use a netrc file for this purpose.
manifest = ${site}/manifest.js.gz
#
# As an alternative to setting a manifest URL, you can define a manifest_command.
# It has three possible outcomes:
#   exit code 0   + full remote manifest on stdout (must be valid json)
#   exit code 1   + error message on stdout
#   exit code 127 + nothing on stdout if remote manifest hasn't changed
# See contrib/gitolite/* for example commands to use with gitolite.
#manifest_command = /usr/local/bin/grok-get-gl-manifest.sh

# Used by grok-pull
[pull]
#
# Write out projects.list that can be used by gitweb or cgit.
# Leave blank if you don't want a projects.list.
projectslist = ${core:toplevel}/projects.list
#
# When generating projects.list, start at this subpath instead
# of at the toplevel. Useful when mirroring kernel or when generating
# multiple gitweb/cgit configurations for the same tree.
projectslist_trimtop =
#
# When generating projects.list, also create entries for symlinks.
# Otherwise we assume they are just legacy and keep them out of
# web interfaces.
projectslist_symlinks = no
#
# A simple hook to execute whenever a repository is modified.
# It passes the full path to the git repository modified as the only
# argument.
post_update_hook =
#
# Should we purge repositories that are not present in the remote
# manifest? If set to "no" this can be overridden via the -p flag to
# grok-pull (useful if you have a very large collection of repos
# and don't want to walk the entire tree on each manifest run).
# See also: purgeprotect.
purge = yes
#
# This prevents catastrophic mirror purges when our upstream gives us a
# manifest that is dramatically smaller than ours. The default is to
# refuse the purge if the remote manifest has over 5% fewer repositories
# than what we have, or in other words, if we have 100 repos and the
# remote manifest has shrunk to 95 repos or fewer, we refuse to purge,
# suspecting that something has gone wrong. You can set purgeprotect to
# a higher percentage, or override it entirely with --force-purge
# commandline flag.
purgeprotect = 5
#
# If owner is not specified in the manifest, who should be listed
# as the default owner in tools like gitweb or cgit?
#default_owner = Grokmirror User
default_owner = Grokmirror User
#
# To speed up updates, grok-pull will use multiple threads. Please be
# considerate to the mirror you're pulling from and don't set this very
# high. You may also run into per-ip multiple session limits, so leave
# this number at a nice low setting.
pull_threads = 5
#
# If git fetch fails, we will retry up to this many times before
# giving up and marking that repository as failed.
retries = 3
#
# Use shell-globbing to list the repositories you would like to mirror.
# If you want to mirror everything, just say "*". Separate multiple entries
# with newline plus tab. Examples:
#
# mirror everything:
#include = *
#
# mirror just the main kernel sources:
#include = /pub/scm/linux/kernel/git/torvalds/linux.git
#          /pub/scm/linux/kernel/git/stable/linux.git
#          /pub/scm/linux/kernel/git/next/linux-next.git
include = *
#
# This is processed after the include. If you want to exclude some
# specific entries from an all-inclusive globbing above. E.g., to
# exclude all linux-2.4 git sources:
#exclude = */linux-2.4*
exclude =
#
# List repositories that should always reject forced pushes.
#ffonly = */torvalds/linux.git
#
# If you enable the following option and run grok-pull with -o,
# grok-pull will run continuously and will periodically recheck the
# remote maniefest for new updates. See contrib for an example systemd
# service you can set up to continuously update your local mirror.  The
# value is in seconds.
#refresh = 900
#
# If you enable refresh, you can also enable the socket listener that
# allows for rapid push notifications from your primary mirror. The
# socket expects repository names matching what is in the local
# manifest, followed by a newline. E.g.:
# /pub/scm/linux/kernel/git/torvalds/linux.git\n
#
# Anything not matching a repository in the local manifest will be ignored.
# See contrib for example pubsub listener.
#socket = ${core:toplevel}/.updater.socket

# Used by grok-fsck
[fsck]
#
# How often should we check each repository, in days.  Any newly added
# repository will have the first check within a random period of 0 and
# $frequency, and then every $frequency after that, to assure that not
# all repositories are checked on the same day.  Don't set to less than
# 7 unless you only mirror a few repositories (or really like to thrash
# your disks).
frequency = 30
#
# Where to keep the status file
statusfile = ${core:toplevel}/fsck.status.js
#
# Some errors are relatively benign and can be safely ignored. Add
# matching substrings to this field to ignore them.
ignore_errors = notice: warning: disabling bitmap writing, as some
objects are not being packed ignoring extra bitmap file
missingTaggerEntry missingSpaceBeforeDate
#
# If the fsck process finds errors that match any of these strings
# during its run, it will ask grok-pull to reclone this repository when
# it runs next. Only useful for minion mirrors, not for mirror masters.
reclone_on_errors = fatal: bad tree object fatal: Failed to traverse
parents missing commit missing blob missing tree broken link
#
# Should we repack the repositories? You almost always want this on,
# unless you are doing something really odd.
repack = yes
#
# We set proper flags for repacking depending if the repo is using
# alternates or not, and whether this is a full repack or not. We will
# also always build bitmaps (when it makes sense), to make cloning
# faster.  You can add other flags (e.g. --threads and --window-memory)
# via the following parameter:
extra_repack_flags =
#
# These flags are added *in addition* to extra_repack_flags
extra_repack_flags_full = --window=250 --depth=50
#
# If git version is new enough to support generating commit graphs, we
# will always generate them, though if your git version is older than
# 2.24.0, the graphs won't be automatically used unless core.commitgraph
# is set to true. You can turn off graph generation by setting the
# commitgraph option to "no".  Graph generation will be skipped for
# child repos that use alternates.
commitgraph = yes
#
# Run git-prune to remove obsolete loose objects. Grokmirror will make
# sure this is a safe operation when it comes to objstore repos, so you
# should leave this enabled.
prune = yes
#
# Grokmirror is extremely careful about not pruning the repositories
# that are used by others via git alternates. However, it cannot prevent
# some other git process (not grokmirror-managed) from inadvertently
# running "git prune/gc". For example, this may happen if an admin
# mistypes a command in the wrong directory.  Setting precious=yes will
# add extensions.preciousObjects=true to the git configuration file in
# such repositories, which will help prevent repository corruption
# between grok-fsck runs.
#
# When set to "yes", grokmirror will temporarily turn this feature off
# when running scheduled repacks in order to be able to delete redundant
# packs and loose objects that have already been packed. This is usually
# a safe operation when done by grok-fsck itself. However, if you set
# this to "always", grokmirror will leave this enabled even during
# grok-fsck runs, for maximum paranoia. Be warned, that this will result
# in ever-growing git repositories, so it only makes sense in very rare
# situations, such as for backup purposes.
precious = yes
#
# If you have a lot of forks using the same objstore repo, you may end
# up with thousands of refs being negotiated during each remote update.
# This tends to result in higher load and bigger negotiation transfers.
# Setting the "baselines" option allows you to designate a set of repos
# that are likely to have most of the relevant objects and ignore the
# rest of the objstore refs. This is done using the
# core.alternateRefsPrefixes feature (see git-config).
baselines = */kernel/git/next/linux-next.git
#
# Objstore repos are repacked with delta island support (see man
# git-config), but if you have one repo that is a lot more likely to be
# cloned than all the other ones, you can designate it as "islandCore",
# which will give it priority when creating packs.
islandcores = */kernel/git/torvalds/linux.git
#
# If there are any critical errors, the report will be sent to root. You
# can change the settings below to configure report delivery to suit
# your needs:
#report_to = root
#report_from = root
#report_subject = git fsck errors on my beautiful replica
#report_mailhost = localhost