aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2018-04-12 10:34:11 -0500
committerEric Sandeen <sandeen@redhat.com>2018-04-12 10:34:11 -0500
commit3029a02cad9c8daaeced03ac4abcbf53e1968e66 (patch)
treef85ceb509c57e924fd6afc21789e3eb072e8c9e7
parent95c2f78bbc4cb13a96dc10ab112b315158254cab (diff)
downloadxfsprogs-dev-3029a02cad9c8daaeced03ac4abcbf53e1968e66.tar.gz
xfs_scrub: make name_entry a first class structure
Instead of open-coding the construction and hashtable insertion of name entries, make name_entry a first class object. This means that we now have name_entry_ prefix functions that take care of computing Unicode normalized names as part of name_entry construction, and we pass around the name_entries when we're looking for suspicious characters and identically rendering names. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com> Acked-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Eric Sandeen <sandeen@sandeen.net>
-rw-r--r--scrub/unicrash.c264
1 files changed, 157 insertions, 107 deletions
diff --git a/scrub/unicrash.c b/scrub/unicrash.c
index 3538a60647..51da32c2eb 100644
--- a/scrub/unicrash.c
+++ b/scrub/unicrash.c
@@ -61,9 +61,16 @@
struct name_entry {
struct name_entry *next;
+
+ /* NFKC normalized name */
+ uint8_t *normstr;
+ size_t normstrlen;
+
xfs_ino_t ino;
- size_t uninamelen;
- uint8_t uniname[0];
+
+ /* Raw UTF8 name */
+ size_t namelen;
+ char name[0];
};
#define NAME_ENTRY_SZ(nl) (sizeof(struct name_entry) + 1 + \
(nl * sizeof(uint8_t)))
@@ -119,6 +126,120 @@ is_utf8_locale(void)
return answer;
}
+/*
+ * Generate normalized form of the name.
+ * If this fails, just forget everything; this is an advisory checker.
+ */
+static bool
+name_entry_compute_checknames(
+ struct unicrash *uc,
+ struct name_entry *entry)
+{
+ uint8_t *normstr;
+ size_t normstrlen;
+
+ normstrlen = (entry->namelen * 2) + 1;
+ normstr = calloc(normstrlen, sizeof(uint8_t));
+ if (!normstr)
+ return false;
+
+ if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)entry->name,
+ entry->namelen, normstr, &normstrlen));
+ goto out_normstr;
+
+ entry->normstr = normstr;
+ entry->normstrlen = normstrlen;
+ return true;
+out_normstr:
+ free(normstr);
+ return false;
+}
+
+/* Create a new name entry, returns false if we could not succeed. */
+static bool
+name_entry_create(
+ struct unicrash *uc,
+ const char *name,
+ xfs_ino_t ino,
+ struct name_entry **entry)
+{
+ struct name_entry *new_entry;
+ size_t namelen = strlen(name);
+
+ /* Create new entry */
+ new_entry = calloc(NAME_ENTRY_SZ(namelen), 1);
+ if (!new_entry)
+ return false;
+ new_entry->next = NULL;
+ new_entry->ino = ino;
+ memcpy(new_entry->name, name, namelen);
+ new_entry->name[namelen] = 0;
+ new_entry->namelen = namelen;
+
+ /* Normalize name to find collisions. */
+ if (!name_entry_compute_checknames(uc, new_entry))
+ goto out;
+
+ *entry = new_entry;
+ return true;
+
+out:
+ free(new_entry);
+ return false;
+}
+
+/* Free a name entry */
+static void
+name_entry_free(
+ struct name_entry *entry)
+{
+ free(entry->normstr);
+ free(entry);
+}
+
+/* Adapt the dirhash function from libxfs, avoid linking with libxfs. */
+
+#define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
+
+/*
+ * Implement a simple hash on a character string.
+ * Rotate the hash value by 7 bits, then XOR each character in.
+ * This is implemented with some source-level loop unrolling.
+ */
+static xfs_dahash_t
+name_entry_hash(
+ struct name_entry *entry)
+{
+ uint8_t *name;
+ size_t namelen;
+ xfs_dahash_t hash;
+
+ name = entry->normstr;
+ namelen = entry->normstrlen;
+
+ /*
+ * Do four characters at a time as long as we can.
+ */
+ for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
+ hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
+ (name[3] << 0) ^ rol32(hash, 7 * 4);
+
+ /*
+ * Now do the rest of the characters.
+ */
+ switch (namelen) {
+ case 3:
+ return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
+ rol32(hash, 7 * 3);
+ case 2:
+ return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
+ case 1:
+ return (name[0] << 0) ^ rol32(hash, 7 * 1);
+ default: /* case 0: */
+ return hash;
+ }
+}
+
/* Initialize the collision detector. */
static bool
unicrash_init(
@@ -190,89 +311,28 @@ unicrash_free(
for (i = 0; i < uc->nr_buckets; i++) {
for (ne = uc->buckets[i]; ne != NULL; ne = x) {
x = ne->next;
- free(ne);
+ name_entry_free(ne);
}
}
free(uc);
}
-/* Steal the dirhash function from libxfs, avoid linking with libxfs. */
-
-#define rol32(x, y) (((x) << (y)) | ((x) >> (32 - (y))))
-
-/*
- * Implement a simple hash on a character string.
- * Rotate the hash value by 7 bits, then XOR each character in.
- * This is implemented with some source-level loop unrolling.
- */
-static xfs_dahash_t
-unicrash_hashname(
- const uint8_t *name,
- size_t namelen)
-{
- xfs_dahash_t hash;
-
- /*
- * Do four characters at a time as long as we can.
- */
- for (hash = 0; namelen >= 4; namelen -= 4, name += 4)
- hash = (name[0] << 21) ^ (name[1] << 14) ^ (name[2] << 7) ^
- (name[3] << 0) ^ rol32(hash, 7 * 4);
-
- /*
- * Now do the rest of the characters.
- */
- switch (namelen) {
- case 3:
- return (name[0] << 14) ^ (name[1] << 7) ^ (name[2] << 0) ^
- rol32(hash, 7 * 3);
- case 2:
- return (name[0] << 7) ^ (name[1] << 0) ^ rol32(hash, 7 * 2);
- case 1:
- return (name[0] << 0) ^ rol32(hash, 7 * 1);
- default: /* case 0: */
- return hash;
- }
-}
-
-/*
- * Normalize a name according to Unicode NFKC normalization rules.
- * Returns true if the name was already normalized.
- */
-static bool
-unicrash_normalize(
- const char *in,
- uint8_t *out,
- size_t outlen)
-{
- size_t inlen = strlen(in);
-
- assert(inlen <= outlen);
- if (!u8_normalize(UNINORM_NFKC, (const uint8_t *)in, inlen,
- out, &outlen)) {
- /* Didn't normalize, just return the same buffer. */
- memcpy(out, in, inlen + 1);
- return true;
- }
- out[outlen] = 0;
- return outlen == inlen ? memcmp(in, out, inlen) == 0 : false;
-}
-
/* Complain about Unicode problems. */
static void
unicrash_complain(
struct unicrash *uc,
const char *descr,
const char *what,
+ struct name_entry *entry,
unsigned int badflags,
- const char *name,
- uint8_t *uniname)
+ struct name_entry *dup_entry)
{
char *bad1 = NULL;
char *bad2 = NULL;
- bad1 = string_escape(name);
- bad2 = string_escape((char *)uniname);
+ bad1 = string_escape(entry->name);
+ if (dup_entry)
+ bad2 = string_escape(dup_entry->name);
/*
* Two names that normalize to the same string will render
@@ -294,52 +354,39 @@ out:
/*
* Try to add a name -> ino entry to the collision detector. The name
- * must be normalized according to Unicode NFKC normalization rules to
- * detect byte-unique names that map to the same sequence of Unicode
- * code points.
- *
- * This function returns true either if there was no previous mapping or
- * there was a mapping that matched exactly. It returns false if
- * there is already a record with that name pointing to a different
- * inode.
+ * must be normalized according to Unicode NFKC rules to detect names that
+ * could be confused with each other.
*/
static bool
unicrash_add(
struct unicrash *uc,
- uint8_t *uniname,
- xfs_ino_t ino,
- unsigned int *badflags)
+ struct name_entry *new_entry,
+ unsigned int *badflags,
+ struct name_entry **existing_entry)
{
- struct name_entry *ne;
- struct name_entry *x;
- struct name_entry **nep;
- size_t uninamelen = u8_strlen(uniname);
+ struct name_entry *entry;
size_t bucket;
xfs_dahash_t hash;
- /* Do we already know about that name? */
- hash = unicrash_hashname(uniname, uninamelen);
+ /* Store name in hashtable. */
+ hash = name_entry_hash(new_entry);
bucket = hash % uc->nr_buckets;
- for (nep = &uc->buckets[bucket], ne = *nep; ne != NULL; ne = x) {
- if (u8_strcmp(uniname, ne->uniname) == 0 &&
- (uc->compare_ino ? ino != ne->ino : true)) {
+ entry = uc->buckets[bucket];
+ new_entry->next = entry;
+ uc->buckets[bucket] = new_entry;
+
+ while (entry != NULL) {
+ /* Same normalization? */
+ if (new_entry->normstrlen == entry->normstrlen &&
+ !u8_strcmp(new_entry->normstr, entry->normstr) &&
+ (uc->compare_ino ? entry->ino != new_entry->ino : true)) {
*badflags |= UNICRASH_NOT_UNIQUE;
+ *existing_entry = entry;
return true;
}
- nep = &ne->next;
- x = ne->next;
+ entry = entry->next;
}
- /* Remember that name. */
- x = malloc(NAME_ENTRY_SZ(uninamelen));
- if (!x)
- return false;
- x->next = NULL;
- x->ino = ino;
- x->uninamelen = uninamelen;
- memcpy(x->uniname, uniname, uninamelen + 1);
- *nep = x;
-
return true;
}
@@ -352,19 +399,22 @@ __unicrash_check_name(
const char *name,
xfs_ino_t ino)
{
- uint8_t uniname[(NAME_MAX * 2) + 1];
+ struct name_entry *dup_entry = NULL;
+ struct name_entry *new_entry;
unsigned int badflags = 0;
bool moveon;
- memset(uniname, 0, (NAME_MAX * 2) + 1);
- unicrash_normalize(name, uniname, NAME_MAX * 2);
- moveon = unicrash_add(uc, uniname, ino, &badflags);
+ /* If we can't create entry data, just skip it. */
+ if (!name_entry_create(uc, name, ino, &new_entry))
+ return true;
+
+ moveon = unicrash_add(uc, new_entry, &badflags, &dup_entry);
if (!moveon)
return false;
if (badflags)
- unicrash_complain(uc, descr, namedescr, badflags, name,
- uniname);
+ unicrash_complain(uc, descr, namedescr, new_entry, badflags,
+ dup_entry);
return true;
}