aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2018-04-12 10:34:11 -0500
committerEric Sandeen <sandeen@redhat.com>2018-04-12 10:34:11 -0500
commitbff5d1a4e8df8a23957e5739850754991ad2b9c8 (patch)
tree70023890ede6c9d59b797833730cb71b2cfb1c5b
parent3029a02cad9c8daaeced03ac4abcbf53e1968e66 (diff)
downloadxfsprogs-dev-bff5d1a4e8df8a23957e5739850754991ad2b9c8.tar.gz
xfs_scrub: transition from libunistring to libicu for Unicode processing
Move off of libunistring and onto libicu for Unicode name scanning. This will make it easy to warn about unicode code points that do not belong in identifiers (directional overrides, zero width elements) and warn about names that could render similarly enough to cause confusion. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Acked-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
-rw-r--r--configure.ac13
-rw-r--r--debian/control2
-rw-r--r--include/builddefs.in6
-rw-r--r--m4/Makefile2
-rw-r--r--m4/package_icu.m46
-rw-r--r--m4/package_unistring.m419
-rw-r--r--scrub/Makefile8
-rw-r--r--scrub/unicrash.c64
-rw-r--r--scrub/unicrash.h4
9 files changed, 78 insertions, 46 deletions
diff --git a/configure.ac b/configure.ac
index 686bf789bd..1885c4569d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -95,6 +95,11 @@ AC_ARG_ENABLE(lto,
enable_lto=probe)
AC_SUBST(enable_lto)
+# Enable libicu for xfs_scrubbing of malicious unicode sequences in names
+AC_ARG_ENABLE(libicu,
+[ --enable-libicu=[yes/no] Enable Unicode name scanning (libicu) [default=probe]],,
+ enable_libicu=probe)
+
#
# If the user specified a libdir ending in lib64 do not append another
# 64 to the library names.
@@ -173,8 +178,12 @@ AC_HAVE_DEVMAPPER
AC_HAVE_MALLINFO
AC_PACKAGE_WANT_ATTRIBUTES_H
AC_HAVE_LIBATTR
-AC_PACKAGE_WANT_UNINORM_H
-AC_HAVE_U8NORMALIZE
+if test "$enable_libicu" = "yes" || test "$enable_libicu" = "probe"; then
+ AC_HAVE_LIBICU
+fi
+if test "$enable_libicu" = "yes" && test "$have_libicu" != "yes"; then
+ AC_MSG_ERROR([libicu not found.])
+fi
AC_HAVE_OPENAT
AC_HAVE_FSTATAT
AC_HAVE_SG_IO
diff --git a/debian/control b/debian/control
index 2937c99198..f4f807b0d0 100644
--- a/debian/control
+++ b/debian/control
@@ -3,7 +3,7 @@ Section: admin
Priority: optional
Maintainer: XFS Development Team <linux-xfs@vger.kernel.org>
Uploaders: Nathan Scott <nathans@debian.org>, Anibal Monsalve Salazar <anibal@debian.org>
-Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libunistring-dev, dh-python, pkg-config
+Build-Depends: uuid-dev, dh-autoreconf, debhelper (>= 5), gettext, libtool, libreadline-gplv2-dev, libblkid-dev (>= 2.17), linux-libc-dev, libdevmapper-dev, libattr1-dev, libicu-dev, dh-python, pkg-config
Standards-Version: 4.0.0
Homepage: https://xfs.wiki.kernel.org/
diff --git a/include/builddefs.in b/include/builddefs.in
index 7a2a626867..8aac06cf90 100644
--- a/include/builddefs.in
+++ b/include/builddefs.in
@@ -36,7 +36,6 @@ LIBEDITLINE = @libeditline@
LIBREADLINE = @libreadline@
LIBBLKID = @libblkid@
LIBDEVMAPPER = @libdevmapper@
-LIBUNISTRING = @libunistring@
LIBXFS = $(TOPDIR)/libxfs/libxfs.la
LIBFROG = $(TOPDIR)/libfrog/libfrog.la
LIBXCMD = $(TOPDIR)/libxcmd/libxcmd.la
@@ -122,7 +121,7 @@ HAVE_MAP_SYNC = @have_map_sync@
HAVE_DEVMAPPER = @have_devmapper@
HAVE_MALLINFO = @have_mallinfo@
HAVE_LIBATTR = @have_libattr@
-HAVE_U8NORMALIZE = @have_u8normalize@
+HAVE_LIBICU = @have_libicu@
HAVE_OPENAT = @have_openat@
HAVE_FSTATAT = @have_fstatat@
HAVE_SG_IO = @have_sg_io@
@@ -173,6 +172,9 @@ ifeq ($(HAVE_GETFSMAP),yes)
PCFLAGS+= -DHAVE_GETFSMAP
endif
+LIBICU_LIBS = @libicu_LIBS@
+LIBICU_CFLAGS = @libicu_CFLAGS@
+
SANITIZER_CFLAGS += @addrsan_cflags@ @threadsan_cflags@ @ubsan_cflags@
SANITIZER_LDFLAGS += @addrsan_ldflags@ @threadsan_ldflags@ @ubsan_ldflags@
diff --git a/m4/Makefile b/m4/Makefile
index a6d11e9704..cf0ce60bd3 100644
--- a/m4/Makefile
+++ b/m4/Makefile
@@ -23,7 +23,7 @@ LSRCFILES = \
package_sanitizer.m4 \
package_services.m4 \
package_types.m4 \
- package_unistring.m4 \
+ package_icu.m4 \
package_utilies.m4 \
package_uuiddev.m4 \
multilib.m4 \
diff --git a/m4/package_icu.m4 b/m4/package_icu.m4
new file mode 100644
index 0000000000..3ccbe0cc98
--- /dev/null
+++ b/m4/package_icu.m4
@@ -0,0 +1,6 @@
+AC_DEFUN([AC_HAVE_LIBICU],
+ [ PKG_CHECK_MODULES([libicu], [icu-i18n], [have_libicu=yes], [have_libicu=no])
+ AC_SUBST(have_libicu)
+ AC_SUBST(libicu_CFLAGS)
+ AC_SUBST(libicu_LIBS)
+ ])
diff --git a/m4/package_unistring.m4 b/m4/package_unistring.m4
deleted file mode 100644
index 9cbfcb015b..0000000000
--- a/m4/package_unistring.m4
+++ /dev/null
@@ -1,19 +0,0 @@
-AC_DEFUN([AC_PACKAGE_WANT_UNINORM_H],
- [ AC_CHECK_HEADERS(uninorm.h)
- if test $ac_cv_header_uninorm_h = no; then
- AC_CHECK_HEADERS(uninorm.h,, [
- echo
- echo 'WARNING: could not find a valid uninorm.h header.'])
- fi
- ])
-
-AC_DEFUN([AC_HAVE_U8NORMALIZE],
- [ AC_CHECK_LIB(unistring, u8_normalize,[
- libunistring=-lunistring
- have_u8normalize=yes
- ],[
- echo
- echo 'WARNING: xfs_scrub will not be built with Unicode libraries.'])
- AC_SUBST(libunistring)
- AC_SUBST(have_u8normalize)
- ])
diff --git a/scrub/Makefile b/scrub/Makefile
index 0632794002..bcc05a0ed9 100644
--- a/scrub/Makefile
+++ b/scrub/Makefile
@@ -68,8 +68,8 @@ spacemap.c \
vfs.c \
xfs_scrub.c
-LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBUNISTRING) $(LIBRT)
-LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG) $(LIBUNISTRING) $(LIBRT)
+LLDLIBS += $(LIBHANDLE) $(LIBFROG) $(LIBPTHREAD) $(LIBICU_LIBS) $(LIBRT)
+LTDEPENDENCIES += $(LIBHANDLE) $(LIBFROG)
LLDFLAGS = -static
ifeq ($(HAVE_MALLINFO),yes)
@@ -84,9 +84,9 @@ ifeq ($(HAVE_LIBATTR),yes)
LCFLAGS += -DHAVE_LIBATTR
endif
-ifeq ($(HAVE_U8NORMALIZE),yes)
+ifeq ($(HAVE_LIBICU),yes)
CFILES += unicrash.c
-LCFLAGS += -DHAVE_U8NORMALIZE
+LCFLAGS += -DHAVE_LIBICU $(LIBICU_CFLAGS)
endif
ifeq ($(HAVE_SG_IO),yes)
diff --git a/scrub/unicrash.c b/scrub/unicrash.c
index 51da32c2eb..06ccadf0ff 100644
--- a/scrub/unicrash.c
+++ b/scrub/unicrash.c
@@ -23,8 +23,9 @@
#include <dirent.h>
#include <sys/types.h>
#include <sys/statvfs.h>
-#include <unistr.h>
-#include <uninorm.h>
+#include <strings.h>
+#include <unicode/ustring.h>
+#include <unicode/unorm2.h>
#include "path.h"
#include "xfs_scrub.h"
#include "common.h"
@@ -63,7 +64,7 @@ struct name_entry {
struct name_entry *next;
/* NFKC normalized name */
- uint8_t *normstr;
+ UChar *normstr;
size_t normstrlen;
xfs_ino_t ino;
@@ -77,6 +78,7 @@ struct name_entry {
struct unicrash {
struct scrub_ctx *ctx;
+ const UNormalizer2 *normalizer;
bool compare_ino;
size_t nr_buckets;
struct name_entry *buckets[0];
@@ -135,23 +137,48 @@ name_entry_compute_checknames(
struct unicrash *uc,
struct name_entry *entry)
{
- uint8_t *normstr;
- size_t normstrlen;
-
- normstrlen = (entry->namelen * 2) + 1;
- normstr = calloc(normstrlen, sizeof(uint8_t));
- if (!normstr)
+ UChar *normstr;
+ UChar *unistr;
+ int32_t normstrlen;
+ int32_t unistrlen;
+ UErrorCode uerr = U_ZERO_ERROR;
+
+ /* Convert bytestr to unistr for normalization */
+ u_strFromUTF8(NULL, 0, &unistrlen, entry->name, entry->namelen, &uerr);
+ if (uerr != U_BUFFER_OVERFLOW_ERROR)
return false;
-
- if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
- entry->namelen, normstr, &normstrlen));
+ uerr = U_ZERO_ERROR;
+ unistr = calloc(unistrlen + 1, sizeof(UChar));
+ if (!unistr)
+ return false;
+ u_strFromUTF8(unistr, unistrlen, NULL, entry->name, entry->namelen,
+ &uerr);
+ if (U_FAILURE(uerr))
+ goto out_unistr;
+
+ /* Normalize the string. */
+ normstrlen = unorm2_normalize(uc->normalizer, unistr, unistrlen, NULL,
+ 0, &uerr);
+ if (uerr != U_BUFFER_OVERFLOW_ERROR)
+ goto out_unistr;
+ uerr = U_ZERO_ERROR;
+ normstr = calloc(normstrlen + 1, sizeof(UChar));
+ if (!normstr)
+ goto out_unistr;
+ unorm2_normalize(uc->normalizer, unistr, unistrlen, normstr, normstrlen,
+ &uerr);
+ if (U_FAILURE(uerr))
goto out_normstr;
entry->normstr = normstr;
entry->normstrlen = normstrlen;
+ free(unistr);
return true;
+
out_normstr:
free(normstr);
+out_unistr:
+ free(unistr);
return false;
}
@@ -214,8 +241,8 @@ name_entry_hash(
size_t namelen;
xfs_dahash_t hash;
- name = entry->normstr;
- namelen = entry->normstrlen;
+ name = (uint8_t *)entry->normstr;
+ namelen = entry->normstrlen * sizeof(UChar);
/*
* Do four characters at a time as long as we can.
@@ -249,6 +276,7 @@ unicrash_init(
size_t nr_buckets)
{
struct unicrash *p;
+ UErrorCode uerr = U_ZERO_ERROR;
if (!is_utf8_locale()) {
*ucp = NULL;
@@ -266,9 +294,15 @@ unicrash_init(
p->ctx = ctx;
p->nr_buckets = nr_buckets;
p->compare_ino = compare_ino;
+ p->normalizer = unorm2_getNFKCInstance(&uerr);
+ if (U_FAILURE(uerr))
+ goto out_free;
*ucp = p;
return true;
+out_free:
+ free(p);
+ return false;
}
/* Initialize the collision detector for a directory. */
@@ -378,7 +412,7 @@ unicrash_add(
while (entry != NULL) {
/* Same normalization? */
if (new_entry->normstrlen == entry->normstrlen &&
- !u8_strcmp(new_entry->normstr, entry->normstr) &&
+ !u_strcmp(new_entry->normstr, entry->normstr) &&
(uc->compare_ino ? entry->ino != new_entry->ino : true)) {
*badflags |= UNICRASH_NOT_UNIQUE;
*existing_entry = entry;
diff --git a/scrub/unicrash.h b/scrub/unicrash.h
index 33373190ac..67e70aed3b 100644
--- a/scrub/unicrash.h
+++ b/scrub/unicrash.h
@@ -23,7 +23,7 @@
struct unicrash;
/* Unicode name collision detection. */
-#ifdef HAVE_U8NORMALIZE
+#ifdef HAVE_LIBICU
struct dirent;
@@ -42,6 +42,6 @@ bool unicrash_check_xattr_name(struct unicrash *uc, const char *descr,
# define unicrash_free(u) do {(u) = (u);} while (0)
# define unicrash_check_dir_name(u, d, n) (true)
# define unicrash_check_xattr_name(u, d, n) (true)
-#endif /* HAVE_U8NORMALIZE */
+#endif /* HAVE_LIBICU */
#endif /* XFS_SCRUB_UNICRASH_H_ */