aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>2021-03-04 10:16:48 +0100
committerWerner Koch <wk@gnupg.org>2021-03-04 10:22:14 +0100
commitdeb6c94362c0f179de1cac18707aad2f51a21e10 (patch)
treedec21d341c77aaa92fdf0ecf8c36a789aad3746c
parentbe2da244565822ad1f268f84dc88a23e5aa8d26a (diff)
downloadgnupg-deb6c94362c0f179de1cac18707aad2f51a21e10.tar.gz
common: First take on handling Unicode command line args.
* common/w32-misc.c: New. * common/t-w32-cmdline.c: New. * common/init.c: Include w32help.h. (prepare_w32_commandline): New. (_init_common_subsystems) [W32]: Call prepare_w32_commandline. * common/Makefile.am (common_sources) [W32]: Add w32-misc.c (module_tests): Add t-w32-cmdline (t_w32_cmdline_LDADD): New. -- The rules for the command line parser are not cleary specified - if at all. See the comment in t-w32-cmdline.c. We can't use the mingw version because that would require to change all argv handling to be wchar_t and that only for Windows. That would be too ugly. Parsing the command line into argv by us is much easier and we can do that only if needed - i.e. if globing is required (we are prepared for this) or a non-ASCII character has been encountered. This way we keep things stable and only fix the currently not working Unicode problem. GnuPG-bug-id: 4398
-rw-r--r--common/Makefile.am7
-rw-r--r--common/init.c78
-rw-r--r--common/t-w32-cmdline.c181
-rw-r--r--common/w32-misc.c192
-rw-r--r--common/w32help.h7
5 files changed, 459 insertions, 6 deletions
diff --git a/common/Makefile.am b/common/Makefile.am
index 11277ca20..bc063ec16 100644
--- a/common/Makefile.am
+++ b/common/Makefile.am
@@ -102,7 +102,7 @@ common_sources = \
if HAVE_W32_SYSTEM
-common_sources += w32-reg.c
+common_sources += w32-reg.c w32-misc.c
endif
# To make the code easier to read we have split home some code into
@@ -168,7 +168,7 @@ module_tests = t-stringhelp t-timestuff \
t-convert t-percent t-gettime t-sysutils t-sexputil \
t-session-env t-openpgp-oid t-ssh-utils \
t-mapstrings t-zb32 t-mbox-util t-iobuf t-strlist \
- t-name-value t-ccparray t-recsel
+ t-name-value t-ccparray t-recsel t-w32-cmdline
if !HAVE_W32CE_SYSTEM
module_tests += t-exechelp t-exectool
endif
@@ -222,6 +222,9 @@ t_name_value_LDADD = $(t_common_ldadd)
t_ccparray_LDADD = $(t_common_ldadd)
t_recsel_LDADD = $(t_common_ldadd)
+t_w32_cmdline_SOURCES = t-w32-cmdline.c w32-misc.c $(t_extra_src)
+t_w32_cmdline_LDADD = $(t_common_ldadd)
+
# System specific test
if HAVE_W32_SYSTEM
t_w32_reg_SOURCES = t-w32-reg.c $(t_extra_src)
diff --git a/common/init.c b/common/init.c
index 073c5cd8a..06fd30956 100644
--- a/common/init.c
+++ b/common/init.c
@@ -42,6 +42,7 @@
#include <gcrypt.h>
#include "util.h"
#include "i18n.h"
+#include "w32help.h"
/* This object is used to register memory cleanup functions.
Technically they are not needed but they can avoid frequent
@@ -79,6 +80,11 @@ sleep_on_exit (void)
}
#endif /*HAVE_W32CE_SYSTEM*/
+#if HAVE_W32_SYSTEM
+static void prepare_w32_commandline (int *argcp, char ***argvp);
+#endif /*HAVE_W32_SYSTEM*/
+
+
static void
run_mem_cleanup (void)
@@ -190,13 +196,10 @@ _init_common_subsystems (gpg_err_source_t errsource, int *argcp, char ***argvp)
gpgrt_init ();
gpgrt_set_alloc_func (gcry_realloc);
+#ifdef HAVE_W32CE_SYSTEM
/* Special hack for Windows CE: We extract some options from arg
to setup the standard handles. */
-#ifdef HAVE_W32CE_SYSTEM
parse_std_file_handles (argcp, argvp);
-#else
- (void)argcp;
- (void)argvp;
#endif
/* Access the standard estreams as early as possible. If we don't
@@ -217,6 +220,16 @@ _init_common_subsystems (gpg_err_source_t errsource, int *argcp, char ***argvp)
/* Logging shall use the standard socket directory as fallback. */
log_set_socket_dir_cb (gnupg_socketdir);
+
+#if HAVE_W32_SYSTEM
+ /* For Standard Windows we use our own parser for the command line
+ * so that we can return an array of utf-8 encoded strings. */
+ prepare_w32_commandline (argcp, argvp);
+#else
+ (void)argcp;
+ (void)argvp;
+#endif
+
}
@@ -290,3 +303,60 @@ parse_std_file_handles (int *argcp, char ***argvp)
}
#endif /*HAVE_W32CE_SYSTEM*/
+
+
+/* For Windows we need to parse the command line so that we can
+ * provide an UTF-8 encoded argv. If there is any Unicode character
+ * we return a new array but if there is no Unicode character we do
+ * nothing. */
+#ifdef HAVE_W32_SYSTEM
+static void
+prepare_w32_commandline (int *r_argc, char ***r_argv)
+{
+ const wchar_t *wcmdline, *ws;
+ char *cmdline;
+ int argc;
+ char **argv;
+ const char *s;
+ int globing;
+
+ s = gpgrt_strusage (95);
+ globing = (s && *s == '1');
+
+ wcmdline = GetCommandLineW ();
+ if (!wcmdline)
+ {
+ log_error ("GetCommandLineW failed\n");
+ return; /* Ooops. */
+ }
+
+ if (!globing)
+ {
+ /* If globbing is not enabled we use our own parser only if
+ * there are any non-ASCII characters. */
+ for (ws=wcmdline; *ws; ws++)
+ if (!iswascii (*ws))
+ break;
+ if (!*ws)
+ return; /* No Unicode - return directly. */
+ }
+
+ cmdline = wchar_to_utf8 (wcmdline);
+ if (!cmdline)
+ {
+ log_error ("parsing command line failed: %s\n", strerror (errno));
+ return; /* Ooops. */
+ }
+ gpgrt_annotate_leaked_object (cmdline);
+
+ argv = w32_parse_commandline (cmdline, globing, &argc);
+ if (!argv)
+ {
+ log_error ("parsing command line failed: %s\n", "internal error");
+ return; /* Ooops. */
+ }
+ gpgrt_annotate_leaked_object (argv);
+ *r_argv = argv;
+ *r_argc = argc;
+}
+#endif /*HAVE_W32_SYSTEM*/
diff --git a/common/t-w32-cmdline.c b/common/t-w32-cmdline.c
new file mode 100644
index 000000000..8686a376a
--- /dev/null
+++ b/common/t-w32-cmdline.c
@@ -0,0 +1,181 @@
+/* t-w32-cmdline.c - Test the parser for the Windows command line
+ * Copyright (C) 2021 g10 Code GmbH
+ *
+ * This file is part of GnuPG.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of either
+ *
+ * - the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at
+ * your option) any later version.
+ *
+ * or
+ *
+ * - the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * or both in parallel, as here.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "t-support.h"
+#include "w32help.h"
+
+#define PGM "t-w32-cmdline"
+
+static int verbose;
+static int debug;
+static int errcount;
+
+
+static void
+test_all (void)
+{
+ static struct {
+ const char *cmdline;
+ int argc; /* Expected number of args. */
+ char *argv[10]; /* Expected results. */
+ } tests[] = {
+ /* Examples from "Parsing C++ Command-Line Arguments" dated 11/18/2006.
+ * https://docs.microsoft.com/en-us/previous-versions/17w5ykft(v=vs.85)
+ */
+ { "\"abc\" d e", 3, { "abc", "d", "e" }},
+ { "a\\\\\\b d\"e f\"g h", 3, { "a\\\\\\b", "de fg", "h" }},
+ { "a\\\\\\\"b c d", 3, { "a\\\"b", "c", "d" }},
+ { "a\\\\\\\\\"b c\" d e", 3, { "a\\\\b c", "d", "e" }},
+ /* Some arbitrary tests created using mingw.
+ * But I am nire sure whether their parser is fully correct.
+ */
+ { "e:a a b\"c\" ", 3, { "e:a", "a", "bc" }},
+ /* { "e:a a b\"c\"\" d\"\"e \" ", */
+ /* 5, { "e:a", "a", "bc\"", "de", " " }}, */
+ /* { "e:a a b\"c\"\" d\"\"e\" f\\gh ", */
+ /* 4, { "e:a", "a", "bc\"", "de f\\gh "}}, */
+ /* { "e:a a b\"c\"\" d\"\"e\" f\\\"gh \" ", */
+ /* 4, { "e:a", "a", "bc\"", "de f\"gh " }},*/
+
+ { "\"foo bar\"", 1 , { "foo bar" }},
+ { "", 1 , { "" }}
+ };
+ int tidx;
+ int i, any, argc;
+ char *cmdline;
+ char **argv;
+
+ for (tidx = 0; tidx < DIM(tests); tidx++)
+ {
+ cmdline = xstrdup (tests[tidx].cmdline);
+ if (verbose && tidx)
+ putchar ('\n');
+ if (verbose)
+ printf ("test %d: line ->%s<-\n", tidx, cmdline);
+ argv = w32_parse_commandline (cmdline, 0, &argc);
+ if (!argv)
+ {
+ fail (tidx);
+ xfree (cmdline);
+ continue;
+ }
+ if (tests[tidx].argc != argc)
+ {
+ fprintf (stderr, PGM": test %d: argc wrong (want %d, got %d)\n",
+ tidx, tests[tidx].argc, argc);
+ any = 1;
+ }
+ else
+ any = 0;
+ for (i=0; i < tests[tidx].argc; i++)
+ {
+ if (verbose)
+ printf ("test %d: argv[%d] ->%s<-\n",
+ tidx, i, tests[tidx].argv[i]);
+ if (i < argc && strcmp (tests[tidx].argv[i], argv[i]))
+ {
+ if (verbose)
+ printf ("test %d: got[%d] ->%s<- ERROR\n",
+ tidx, i, argv[i]);
+ any = 1;
+ }
+ }
+ if (any)
+ {
+ fprintf (stderr, PGM": test %d: error%s\n",
+ tidx, verbose? "":" (use --verbose)");
+ errcount++;
+ }
+ xfree (argv);
+ }
+}
+
+
+
+int
+main (int argc, char **argv)
+{
+ int last_argc = -1;
+
+ no_exit_on_fail = 1;
+
+ if (argc)
+ { argc--; argv++; }
+ while (argc && last_argc != argc )
+ {
+ last_argc = argc;
+ if (!strcmp (*argv, "--"))
+ {
+ argc--; argv++;
+ break;
+ }
+ else if (!strcmp (*argv, "--help"))
+ {
+ fputs ("usage: " PGM " [FILE]\n"
+ "Options:\n"
+ " --verbose Print timings etc.\n"
+ " --debug Flyswatter\n"
+ , stdout);
+ exit (0);
+ }
+ else if (!strcmp (*argv, "--verbose"))
+ {
+ verbose++;
+ argc--; argv++;
+ }
+ else if (!strcmp (*argv, "--debug"))
+ {
+ verbose += 2;
+ debug++;
+ argc--; argv++;
+ }
+ else if (!strncmp (*argv, "--", 2))
+ {
+ fprintf (stderr, PGM ": unknown option '%s'\n", *argv);
+ exit (1);
+ }
+ }
+
+ if (argc)
+ {
+ fprintf (stderr, PGM ": no arguments allowed\n");
+ exit (1);
+ }
+
+ test_all ();
+
+ return !!errcount;
+}
diff --git a/common/w32-misc.c b/common/w32-misc.c
new file mode 100644
index 000000000..8aef12e4a
--- /dev/null
+++ b/common/w32-misc.c
@@ -0,0 +1,192 @@
+/* w32-misc.c - Helper functions needed in Windows
+ * Copyright (C) 2021 g10 Code GmbH
+ *
+ * This file is part of GnuPG.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of either
+ *
+ * - the GNU Lesser General Public License as published by the Free
+ * Software Foundation; either version 3 of the License, or (at
+ * your option) any later version.
+ *
+ * or
+ *
+ * - the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * or both in parallel, as here.
+ *
+ * This file is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "util.h"
+#include "w32help.h"
+
+
+/* Return the number of backslashes. */
+static unsigned int
+count_backslashes (const char *s)
+{
+ unsigned int count = 0;
+
+ for ( ;*s == '\\'; s++)
+ count++;
+ return count;
+}
+
+
+static void
+strip_one_arg (char *string)
+{
+ char *s, *d;
+ unsigned int n, i;
+
+ for (s=d=string; *s; s++)
+ if (*s == '\\')
+ {
+ n = count_backslashes (s);
+ if (s[n] == '"')
+ {
+ for (i=0; i < n/2; i++)
+ *d++ = '\\';
+ if ((n&1)) /* Odd number of backslashes. */
+ *d++ = '"'; /* Print the quote. */
+ }
+ else /* Print all backslashes. */
+ {
+ for (i=0; i < n; i++)
+ *d++ = '\\';
+ n--; /* Adjust for the increment in the for. */
+ }
+ s += n;
+ }
+ else if (*s == '"' && s[1])
+ *d++ = *++s;
+ else
+ *d++ = *s;
+ *d = 0;
+}
+
+
+/* Helper for parse_w32_commandline. */
+static int
+parse_cmdstring (char *string, char **argv)
+{
+ int argc = 0;
+ int inquote = 0;
+ char *p0, *p;
+ unsigned int n;
+
+ p0 = string;
+ for (p=string; *p; p++)
+ {
+ if (inquote)
+ {
+ if (*p == '\\' && p[1] == '"')
+ p++;
+ else if (*p == '"')
+ {
+ if (argv && (p[1] == ' ' || p[1] == '\t' || !p[1]))
+ *p = 0;
+ inquote = 0;
+ }
+ }
+ else if (*p == '\\' && (n=count_backslashes (p)))
+ {
+ if (!p0) /* First non-WS; set start. */
+ p0 = p;
+ if (p[n] == '"')
+ {
+ if (!(n&1)) /* Even number. */
+ inquote = 1;
+ p++;
+ }
+ p += n;
+ }
+ else if (*p == '"')
+ {
+ inquote = 1;
+ if (!p0 || p == string) /* First non-WS or first char; set start. */
+ p0 = p + 1;
+ }
+ else if (*p == ' ' || *p == '\t')
+ {
+ if (p0) /* We are in an argument and reached WS. */
+ {
+ if (argv)
+ {
+ *p = 0;
+ strip_one_arg (p0);
+ argv[argc] = p0;
+ }
+ argc++;
+ p0 = NULL;
+ }
+ }
+ else if (!p0) /* First non-WS; set start. */
+ p0 = p;
+ }
+
+ if (inquote || p0)
+ {
+ /* Closing quote missing (we accept this as argument anyway) or
+ * an open argument. */
+ if (argv)
+ {
+ *p = 0;
+ strip_one_arg (p0);
+ argv[argc] = p0;
+ }
+ argc++;
+ }
+
+ return argc;
+}
+
+/* This is a Windows command line parser, returning an array with
+ * strings and its count. The argument CMDLINE is expected to be
+ * utf-8 encoded and may be modified after returning from this
+ * function. The returned array points into CMDLINE, so this should
+ * not be freed. If GLOBING is set to true globing is done for all
+ * items. Returns NULL on error. The number of items in the array is
+ * returned at R_ARGC. */
+char **
+w32_parse_commandline (char *cmdline, int globing, int *r_argc)
+{
+ int argc, i;
+ char **argv;
+
+ (void)globing;
+
+ argc = parse_cmdstring (cmdline, NULL);
+ if (!argc)
+ {
+ log_error ("%s failed: %s\n", __func__, "internal error");
+ return NULL; /* Ooops. */
+ }
+ argv = xtrycalloc (argc+1, sizeof *argv);
+ if (!argv)
+ {
+ log_error ("%s failed: %s\n", __func__, strerror (errno));
+ return NULL; /* Ooops. */
+ }
+ i = parse_cmdstring (cmdline, argv);
+ if (argc != i)
+ {
+ log_error ("%s failed (argc=%d i=%d)\n", __func__, argc, i);
+ xfree (argv);
+ return NULL; /* Ooops. */
+ }
+ *r_argc = argc;
+ return argv;
+}
diff --git a/common/w32help.h b/common/w32help.h
index 177efbc9f..ca5ccf8bd 100644
--- a/common/w32help.h
+++ b/common/w32help.h
@@ -30,6 +30,13 @@
#ifndef GNUPG_COMMON_W32HELP_H
#define GNUPG_COMMON_W32HELP_H
+
+/*-- w32-misc.c --*/
+/* This module is also part of the Unix tests. */
+char **w32_parse_commandline (char *cmdline, int globing, int *r_argc);
+
+
+
#ifdef HAVE_W32_SYSTEM
/*-- w32-reg.c --*/