diff options
author | Hamilton Coutinho (he/him) <hcoutinho@purestorage.com> | 2024-03-27 15:23:58 -0700 |
---|---|---|
committer | Andi Kleen <andi@firstfloor.org> | 2024-03-27 18:32:08 -0700 |
commit | c0f843b8a88f8634403eea32177cb2fdc7e1b685 (patch) | |
tree | 5e9b6a44a59d1861739f2f882412268de4a97b97 | |
parent | 2e7e64d442e579e66d29cfa528042d7afdc38589 (diff) | |
download | mcelog-c0f843b8a88f8634403eea32177cb2fdc7e1b685.tar.gz |
mcelog: mempage_replace missing initialization of mempage fields
We hit a bug where when mcelog sees enough CE events for unique page
addresses that exceed max_corr_err_counters and starts reusing mempage
structs from previously used mempage_cluster linked
to mempage_cluster_lru_list, then some fields are not correctly reset,
which may lead to issues, eg, if the reused mempage struct was previously
used for a page that was offlined, then the newly reused mempage will have
that flag set and will prevent the new page from being offlined if required.
We fixed it with the attached patch.
-rw-r--r-- | page.c | 9 |
1 files changed, 8 insertions, 1 deletions
@@ -103,13 +103,20 @@ static struct mempage *mempage_alloc(void) static struct mempage *mempage_replace(void) { + struct mempage *mp; + /* If no free mp_cluster, reuse the last mp_cluster of the LRU list */ if (mp_cluster->mp_used == N) { mp_cluster = list_last_entry(&mempage_cluster_lru_list, struct mempage_cluster, lru); mp_cluster->mp_used = 0; } - return &mp_cluster->mp[mp_cluster->mp_used++]; + mp = &mp_cluster->mp[mp_cluster->mp_used++]; + mp->offlined = PAGE_ONLINE; + mp->triggered = 0; + mp->ce.count = 0; + + return mp; } static struct mempage *mempage_lookup(u64 addr) |