aboutsummaryrefslogtreecommitdiffstats
path: root/builtin/clone.c
diff options
context:
space:
mode:
Diffstat (limited to 'builtin/clone.c')
-rw-r--r--builtin/clone.c180
1 files changed, 141 insertions, 39 deletions
diff --git a/builtin/clone.c b/builtin/clone.c
index 315befa133..221edbcdee 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -71,6 +71,7 @@ static char *remote_name = NULL;
static char *option_branch = NULL;
static struct string_list option_not = STRING_LIST_INIT_NODUP;
static const char *real_git_dir;
+static const char *ref_format;
static char *option_upload_pack = "git-upload-pack";
static int option_verbosity;
static int option_progress = -1;
@@ -115,7 +116,7 @@ static struct option builtin_clone_options[] = {
OPT_HIDDEN_BOOL(0, "naked", &option_bare,
N_("create a bare repository")),
OPT_BOOL(0, "mirror", &option_mirror,
- N_("create a mirror repository (implies bare)")),
+ N_("create a mirror repository (implies --bare)")),
OPT_BOOL('l', "local", &option_local,
N_("to clone from a local repository")),
OPT_BOOL(0, "no-hardlinks", &option_no_hardlinks,
@@ -156,6 +157,8 @@ static struct option builtin_clone_options[] = {
N_("any cloned submodules will be shallow")),
OPT_STRING(0, "separate-git-dir", &real_git_dir, N_("gitdir"),
N_("separate git dir from working tree")),
+ OPT_STRING(0, "ref-format", &ref_format, N_("format"),
+ N_("specify the reference format to use")),
OPT_STRING_LIST('c', "config", &option_config, N_("key=value"),
N_("set config inside the new repository")),
OPT_STRING_LIST(0, "server-option", &server_options,
@@ -326,7 +329,20 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
int src_len, dest_len;
struct dir_iterator *iter;
int iter_status;
- struct strbuf realpath = STRBUF_INIT;
+
+ /*
+ * Refuse copying directories by default which aren't owned by us. The
+ * code that performs either the copying or hardlinking is not prepared
+ * to handle various edge cases where an adversary may for example
+ * racily swap out files for symlinks. This can cause us to
+ * inadvertently use the wrong source file.
+ *
+ * Furthermore, even if we were prepared to handle such races safely,
+ * creating hardlinks across user boundaries is an inherently unsafe
+ * operation as the hardlinked files can be rewritten at will by the
+ * potentially-untrusted user. We thus refuse to do so by default.
+ */
+ die_upon_dubious_ownership(NULL, NULL, src_repo);
mkdir_if_missing(dest->buf, 0777);
@@ -374,9 +390,27 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
if (unlink(dest->buf) && errno != ENOENT)
die_errno(_("failed to unlink '%s'"), dest->buf);
if (!option_no_hardlinks) {
- strbuf_realpath(&realpath, src->buf, 1);
- if (!link(realpath.buf, dest->buf))
+ if (!link(src->buf, dest->buf)) {
+ struct stat st;
+
+ /*
+ * Sanity-check whether the created hardlink
+ * actually links to the expected file now. This
+ * catches time-of-check-time-of-use bugs in
+ * case the source file was meanwhile swapped.
+ */
+ if (lstat(dest->buf, &st))
+ die(_("hardlink cannot be checked at '%s'"), dest->buf);
+ if (st.st_mode != iter->st.st_mode ||
+ st.st_ino != iter->st.st_ino ||
+ st.st_dev != iter->st.st_dev ||
+ st.st_size != iter->st.st_size ||
+ st.st_uid != iter->st.st_uid ||
+ st.st_gid != iter->st.st_gid)
+ die(_("hardlink different from source at '%s'"), dest->buf);
+
continue;
+ }
if (option_local > 0)
die_errno(_("failed to create link '%s'"), dest->buf);
option_no_hardlinks = 1;
@@ -389,8 +423,6 @@ static void copy_or_link_directory(struct strbuf *src, struct strbuf *dest,
strbuf_setlen(src, src_len);
die(_("failed to iterate over '%s'"), src->buf);
}
-
- strbuf_release(&realpath);
}
static void clone_local(const char *src_repo, const char *dest_repo)
@@ -735,8 +767,9 @@ static int checkout(int submodule_progress, int filter_submodules)
tree = parse_tree_indirect(&oid);
if (!tree)
die(_("unable to parse commit %s"), oid_to_hex(&oid));
- parse_tree(tree);
- init_tree_desc(&t, tree->buffer, tree->size);
+ if (parse_tree(tree) < 0)
+ exit(128);
+ init_tree_desc(&t, &tree->object.oid, tree->buffer, tree->size);
if (unpack_trees(1, &t, &opts) < 0)
die(_("unable to checkout working tree"));
@@ -923,6 +956,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
struct ref *mapped_refs = NULL;
const struct ref *ref;
struct strbuf key = STRBUF_INIT;
+ struct strbuf buf = STRBUF_INIT;
struct strbuf branch_top = STRBUF_INIT, reflog_msg = STRBUF_INIT;
struct transport *transport = NULL;
const char *src_ref_prefix = "refs/heads/";
@@ -931,7 +965,10 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
int submodule_progress;
int filter_submodules = 0;
int hash_algo;
+ unsigned int ref_storage_format = REF_STORAGE_FORMAT_UNKNOWN;
const int do_not_override_repo_unix_permissions = -1;
+ const char *template_dir;
+ char *template_dir_dup = NULL;
struct transport_ls_refs_options transport_ls_refs_options =
TRANSPORT_LS_REFS_OPTIONS_INIT;
@@ -951,11 +988,24 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
usage_msg_opt(_("You must specify a repository to clone."),
builtin_clone_usage, builtin_clone_options);
+ xsetenv("GIT_CLONE_PROTECTION_ACTIVE", "true", 0 /* allow user override */);
+ template_dir = get_template_dir(option_template);
+ if (*template_dir && !is_absolute_path(template_dir))
+ template_dir = template_dir_dup =
+ absolute_pathdup(template_dir);
+ xsetenv("GIT_CLONE_TEMPLATE_DIR", template_dir, 1);
+
if (option_depth || option_since || option_not.nr)
deepen = 1;
if (option_single_branch == -1)
option_single_branch = deepen ? 1 : 0;
+ if (ref_format) {
+ ref_storage_format = ref_storage_format_by_name(ref_format);
+ if (ref_storage_format == REF_STORAGE_FORMAT_UNKNOWN)
+ die(_("unknown ref storage format '%s'"), ref_format);
+ }
+
if (option_mirror)
option_bare = 1;
@@ -1100,8 +1150,15 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
}
}
- init_db(git_dir, real_git_dir, option_template, GIT_HASH_UNKNOWN, NULL,
- do_not_override_repo_unix_permissions, INIT_DB_QUIET);
+ /*
+ * Initialize the repository, but skip initializing the reference
+ * database. We do not yet know about the object format of the
+ * repository, and reference backends may persist that information into
+ * their on-disk data structures.
+ */
+ init_db(git_dir, real_git_dir, template_dir, GIT_HASH_UNKNOWN,
+ ref_storage_format, NULL,
+ do_not_override_repo_unix_permissions, INIT_DB_QUIET | INIT_DB_SKIP_REFDB);
if (real_git_dir) {
free((char *)git_dir);
@@ -1109,6 +1166,50 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
}
/*
+ * We have a chicken-and-egg situation between initializing the refdb
+ * and spawning transport helpers:
+ *
+ * - Initializing the refdb requires us to know about the object
+ * format. We thus have to spawn the transport helper to learn
+ * about it.
+ *
+ * - The transport helper may want to access the Git repository. But
+ * because the refdb has not been initialized, we don't have "HEAD"
+ * or "refs/". Thus, the helper cannot find the Git repository.
+ *
+ * Ideally, we would have structured the helper protocol such that it's
+ * mandatory for the helper to first announce its capabilities without
+ * yet assuming a fully initialized repository. Like that, we could
+ * have added a "lazy-refdb-init" capability that announces whether the
+ * helper is ready to handle not-yet-initialized refdbs. If any helper
+ * didn't support them, we would have fully initialized the refdb with
+ * the SHA1 object format, but later on bailed out if we found out that
+ * the remote repository used a different object format.
+ *
+ * But we didn't, and thus we use the following workaround to partially
+ * initialize the repository's refdb such that it can be discovered by
+ * Git commands. To do so, we:
+ *
+ * - Create an invalid HEAD ref pointing at "refs/heads/.invalid".
+ *
+ * - Create the "refs/" directory.
+ *
+ * - Set up the ref storage format and repository version as
+ * required.
+ *
+ * This is sufficient for Git commands to discover the Git directory.
+ */
+ initialize_repository_version(GIT_HASH_UNKNOWN,
+ the_repository->ref_storage_format, 1);
+
+ strbuf_addf(&buf, "%s/HEAD", git_dir);
+ write_file(buf.buf, "ref: refs/heads/.invalid");
+
+ strbuf_reset(&buf);
+ strbuf_addf(&buf, "%s/refs", git_dir);
+ safe_create_dir(buf.buf, 1);
+
+ /*
* additional config can be injected with -c, make sure it's included
* after init_db, which clears the entire config environment.
*/
@@ -1188,10 +1289,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
if (option_required_reference.nr || option_optional_reference.nr)
setup_reference();
- if (option_sparse_checkout && git_sparse_checkout_init(dir))
- return 1;
-
- remote = remote_get(remote_name);
+ remote = remote_get_early(remote_name);
refspec_appendf(&remote->fetch, "+%s*:%s*", src_ref_prefix,
branch_top.buf);
@@ -1269,6 +1367,27 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
if (transport->smart_options && !deepen && !filter_options.choice)
transport->smart_options->check_self_contained_and_connected = 1;
+ strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD");
+ refspec_ref_prefixes(&remote->fetch,
+ &transport_ls_refs_options.ref_prefixes);
+ if (option_branch)
+ expand_ref_prefix(&transport_ls_refs_options.ref_prefixes,
+ option_branch);
+ if (!option_no_tags)
+ strvec_push(&transport_ls_refs_options.ref_prefixes,
+ "refs/tags/");
+
+ refs = transport_get_remote_refs(transport, &transport_ls_refs_options);
+
+ /*
+ * Now that we know what algorithm the remote side is using, let's set
+ * ours to the same thing.
+ */
+ hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport));
+ initialize_repository_version(hash_algo, the_repository->ref_storage_format, 1);
+ repo_set_hash_algo(the_repository, hash_algo);
+ create_reference_database(the_repository->ref_storage_format, NULL, 1);
+
/*
* Before fetching from the remote, download and install bundle
* data from the --bundle-uri option.
@@ -1284,24 +1403,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
bundle_uri);
else if (has_heuristic)
git_config_set_gently("fetch.bundleuri", bundle_uri);
- }
-
- strvec_push(&transport_ls_refs_options.ref_prefixes, "HEAD");
- refspec_ref_prefixes(&remote->fetch,
- &transport_ls_refs_options.ref_prefixes);
- if (option_branch)
- expand_ref_prefix(&transport_ls_refs_options.ref_prefixes,
- option_branch);
- if (!option_no_tags)
- strvec_push(&transport_ls_refs_options.ref_prefixes,
- "refs/tags/");
-
- refs = transport_get_remote_refs(transport, &transport_ls_refs_options);
-
- if (refs)
- mapped_refs = wanted_peer_refs(refs, &remote->fetch);
-
- if (!bundle_uri) {
+ } else {
/*
* Populate transport->got_remote_bundle_uri and
* transport->bundle_uri. We might get nothing.
@@ -1322,13 +1424,8 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
}
}
- /*
- * Now that we know what algorithm the remote side is using,
- * let's set ours to the same thing.
- */
- hash_algo = hash_algo_by_ptr(transport_get_hash_algo(transport));
- initialize_repository_version(hash_algo, 1);
- repo_set_hash_algo(the_repository, hash_algo);
+ if (refs)
+ mapped_refs = wanted_peer_refs(refs, &remote->fetch);
if (mapped_refs) {
/*
@@ -1431,12 +1528,16 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
dissociate_from_references();
}
+ if (option_sparse_checkout && git_sparse_checkout_init(dir))
+ return 1;
+
junk_mode = JUNK_LEAVE_REPO;
err = checkout(submodule_progress, filter_submodules);
free(remote_name);
strbuf_release(&reflog_msg);
strbuf_release(&branch_top);
+ strbuf_release(&buf);
strbuf_release(&key);
free_refs(mapped_refs);
free_refs(remote_head_points_at);
@@ -1444,6 +1545,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix)
free(dir);
free(path);
free(repo_to_free);
+ free(template_dir_dup);
junk_mode = JUNK_LEAVE_ALL;
transport_ls_refs_options_release(&transport_ls_refs_options);