From 880946158b01138c06e93e4aa4255ffbfe70e1c8 Mon Sep 17 00:00:00 2001 From: Jialu Xu Date: Sun, 10 Dec 2023 15:05:34 +0800 Subject: gen_compile_commands.py: fix path resolve with symlinks in it When a path contains relative symbolic links, os.path.abspath() might not follow the symlinks and instead return the absolute path with just the relative paths resolved, resulting in an incorrect path. 1. Say "drivers/hdf/" has some symlinks: # ls -l drivers/hdf/ total 364 drwxrwxr-x 2 ... 4096 ... evdev lrwxrwxrwx 1 ... 44 ... framework -> ../../../../../../drivers/hdf_core/framework -rw-rw-r-- 1 ... 359010 ... hdf_macro_test.h lrwxrwxrwx 1 ... 55 ... inner_api -> ../../../../../../drivers/hdf_core/interfaces/inner_api lrwxrwxrwx 1 ... 53 ... khdf -> ../../../../../../drivers/hdf_core/adapter/khdf/linux -rw-r--r-- 1 ... 74 ... Makefile drwxrwxr-x 3 ... 4096 ... wifi 2. One .cmd file records that: # head -1 ./framework/core/manager/src/.devmgr_service.o.cmd cmd_drivers/hdf/khdf/manager/../../../../framework/core/manager/src/devmgr_service.o := ... \ /path/to/src/drivers/hdf/khdf/manager/../../../../framework/core/manager/src/devmgr_service.c 3. os.path.abspath returns "/path/to/src/framework/core/manager/src/devmgr_service.c", not correct: # ./scripts/clang-tools/gen_compile_commands.py INFO: Could not add line from ./framework/core/manager/src/.devmgr_service.o.cmd: File \ /path/to/src/framework/core/manager/src/devmgr_service.c not found Use os.path.realpath(), which resolves the symlinks and normalizes the paths correctly. # cat compile_commands.json ... { "command": ... "directory": ... "file": "/path/to/bla/drivers/hdf_core/framework/core/manager/src/devmgr_service.c" }, ... Also fix it in parse_arguments(). Signed-off-by: Jialu Xu Signed-off-by: Masahiro Yamada --- scripts/clang-tools/gen_compile_commands.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'scripts') diff --git a/scripts/clang-tools/gen_compile_commands.py b/scripts/clang-tools/gen_compile_commands.py index 180952fb91c1b..5dea4479240bc 100755 --- a/scripts/clang-tools/gen_compile_commands.py +++ b/scripts/clang-tools/gen_compile_commands.py @@ -64,7 +64,7 @@ def parse_arguments(): args = parser.parse_args() return (args.log_level, - os.path.abspath(args.directory), + os.path.realpath(args.directory), args.output, args.ar, args.paths if len(args.paths) > 0 else [args.directory]) @@ -172,8 +172,8 @@ def process_line(root_directory, command_prefix, file_path): # by Make, so this code replaces the escaped version with '#'. prefix = command_prefix.replace('\#', '#').replace('$(pound)', '#') - # Use os.path.abspath() to normalize the path resolving '.' and '..' . - abs_path = os.path.abspath(os.path.join(root_directory, file_path)) + # Return the canonical path, eliminating any symbolic links encountered in the path. + abs_path = os.path.realpath(os.path.join(root_directory, file_path)) if not os.path.exists(abs_path): raise ValueError('File %s not found' % abs_path) return { -- cgit 1.2.3-korg From c1a8627164dbe8b92958aea10c7c0848105a3d7f Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 20 Dec 2023 17:18:33 +0900 Subject: kbuild: fix build ID symlinks to installed debug VDSO files Commit 56769ba4b297 ("kbuild: unify vdso_install rules") accidentally dropped the '.debug' suffix from the build ID symlinks. Fixes: 56769ba4b297 ("kbuild: unify vdso_install rules") Signed-off-by: Masahiro Yamada --- scripts/Makefile.vdsoinst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'scripts') diff --git a/scripts/Makefile.vdsoinst b/scripts/Makefile.vdsoinst index 1022d9fdd976d..c477d17b0aa5b 100644 --- a/scripts/Makefile.vdsoinst +++ b/scripts/Makefile.vdsoinst @@ -22,7 +22,7 @@ $$(dest): $$(src) FORCE # Some architectures create .build-id symlinks ifneq ($(filter arm sparc x86, $(SRCARCH)),) -link := $(install-dir)/.build-id/$$(shell $(READELF) -n $$(src) | sed -n 's@^.*Build ID: \(..\)\(.*\)@\1/\2@p') +link := $(install-dir)/.build-id/$$(shell $(READELF) -n $$(src) | sed -n 's@^.*Build ID: \(..\)\(.*\)@\1/\2@p').debug __default: $$(link) $$(link): $$(dest) FORCE -- cgit 1.2.3-korg From 9c334eb9ce886247567573074b13c5ac29d1a41a Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Tue, 19 Dec 2023 02:25:14 +0100 Subject: get_maintainer: correctly parse UTF-8 encoded names in files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit While the script correctly extracts UTF-8 encoded names from the MAINTAINERS file, the regular expressions damage my name when parsing from .yaml files. Fix this by replacing the Latin-1-compatible regular expressions with the unicode property matcher \p{L}, which matches on any letter according to the Unicode General Category of letters. The proposed solution only works if the script uses proper string encoding from the outset, so instruct Perl to unconditionally open all files with UTF-8 encoding. This should be safe, as the entire source tree is either UTF-8 or ASCII encoded anyway. See [1] for a detailed analysis. Furthermore, to prevent the \w expression from matching non-ASCII when checking for whether a name should be escaped with quotes, add the /a flag to the regular expression. The escaping logic was duplicated in two places, so it has been factored out into its own function. The original issue was also identified on the tools mailing list [2]. This should solve the observed side effects there as well. Link: https://lore.kernel.org/all/dzn6uco4c45oaa3ia4u37uo5mlt33obecv7gghj2l756fr4hdh@mt3cprft3tmq/ [1] Link: https://lore.kernel.org/tools/20230726-gush-slouching-a5cd41@meerkat/ [2] Signed-off-by: Alvin Šipraga Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) (limited to 'scripts') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index 16d8ac6005b6f..dac38c6e3b1cf 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -20,6 +20,7 @@ use Getopt::Long qw(:config no_auto_abbrev); use Cwd; use File::Find; use File::Spec::Functions; +use open qw(:std :encoding(UTF-8)); my $cur_path = fastgetcwd() . '/'; my $lk_path = "./"; @@ -445,7 +446,7 @@ sub maintainers_in_file { my $text = do { local($/) ; <$f> }; close($f); - my @poss_addr = $text =~ m$[A-Za-zÀ-ÿ\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g; + my @poss_addr = $text =~ m$[\p{L}\"\' \,\.\+-]*\s*[\,]*\s*[\(\<\{]{0,1}[A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+\.[A-Za-z0-9]+[\)\>\}]{0,1}$g; push(@file_emails, clean_file_emails(@poss_addr)); } } @@ -1152,6 +1153,17 @@ sub top_of_kernel_tree { return 0; } +sub escape_name { + my ($name) = @_; + + if ($name =~ /[^\w \-]/ai) { ##has "must quote" chars + $name =~ s/(? 2) { my $first = $nw[@nw - 3]; my $middle = $nw[@nw - 2]; my $last = $nw[@nw - 1]; - if (((length($first) == 1 && $first =~ m/[A-Za-z]/) || + if (((length($first) == 1 && $first =~ m/\p{L}/) || (length($first) == 2 && substr($first, -1) eq ".")) || (length($middle) == 1 || (length($middle) == 2 && substr($middle, -1) eq "."))) { -- cgit 1.2.3-korg From 2639772a11c860628c5f7007842eca52a1c34d78 Mon Sep 17 00:00:00 2001 From: Alvin Šipraga Date: Tue, 19 Dec 2023 02:25:15 +0100 Subject: get_maintainer: remove stray punctuation when cleaning file emails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When parsing emails from .yaml files in particular, stray punctuation such as a leading '-' can end up in the name. For example, consider a common YAML section such as: maintainers: - devicetree@vger.kernel.org This would previously be processed by get_maintainer.pl as: - Make the logic in clean_file_emails more robust by deleting any sub-names which consist of common single punctuation marks before proceeding to the best-effort name extraction logic. The output is then correct: devicetree@vger.kernel.org Some additional comments are added to the function to make things clearer to future readers. Link: https://lore.kernel.org/all/0173e76a36b3a9b4e7f324dd3a36fd4a9757f302.camel@perches.com/ Suggested-by: Joe Perches Signed-off-by: Alvin Šipraga Signed-off-by: Linus Torvalds --- scripts/get_maintainer.pl | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'scripts') diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl index dac38c6e3b1cf..ee1aed7e090ca 100755 --- a/scripts/get_maintainer.pl +++ b/scripts/get_maintainer.pl @@ -2462,11 +2462,17 @@ sub clean_file_emails { foreach my $email (@file_emails) { $email =~ s/[\(\<\{]{0,1}([A-Za-z0-9_\.\+-]+\@[A-Za-z0-9\.-]+)[\)\>\}]{0,1}/\<$1\>/g; my ($name, $address) = parse_email($email); - if ($name eq '"[,\.]"') { - $name = ""; - } + # Strip quotes for easier processing, format_email will add them back + $name =~ s/^"(.*)"$/$1/; + + # Split into name-like parts and remove stray punctuation particles my @nw = split(/[^\p{L}\'\,\.\+-]/, $name); + @nw = grep(!/^[\'\,\.\+-]$/, @nw); + + # Make a best effort to extract the name, and only the name, by taking + # only the last two names, or in the case of obvious initials, the last + # three names. if (@nw > 2) { my $first = $nw[@nw - 3]; my $middle = $nw[@nw - 2]; @@ -2480,18 +2486,16 @@ sub clean_file_emails { } else { $name = "$middle $last"; } + } else { + $name = "@nw"; } if (substr($name, -1) =~ /[,\.]/) { $name = substr($name, 0, length($name) - 1); - } elsif (substr($name, -2) =~ /[,\.]"/) { - $name = substr($name, 0, length($name) - 2) . '"'; } if (substr($name, 0, 1) =~ /[,\.]/) { $name = substr($name, 1, length($name) - 1); - } elsif (substr($name, 0, 2) =~ /"[,\.]/) { - $name = '"' . substr($name, 2, length($name) - 2); } my $fmt_email = format_email($name, $address, $email_usename); -- cgit 1.2.3-korg