diff options
51 files changed, 4474 insertions, 1799 deletions
@@ -8,3 +8,4 @@ setpci example update-pciids pci.ids.gz +pcilmr @@ -1,3 +1,43 @@ +2024-02-18 Martin Mares <mj@ucw.cz> + + * Will be released as 3.11.0. + + * update-pciids now supports XZ compression. If libpci is configured + with support for compression, all downloaded files are recompressed + as gzip. Otherwise they are stored as plain text. + + * update-pciids now sends itself as the User-Agent. + + * Added a pcilmr utility for PCIe lane margining. Thanks to Nikita + Proshkin for contributing it. + + * Re-factored access to i386 ports on all relevant platforms. + + * Added i386 port access on OpenBSD. + + * Linux systems without pread/pwrite are no longer supported + as they are hopefully long gone. This helps avoid the tricky check + for presence of pread which was found to fail on musl libc. + + * Improved decoding of PCIe control and status registers. + + * Decoding of CXL capabilities now supports up to CXL 3.0. + + * lspci now displays interrupt message numbers consistently across + different capabilities. + + * Cache of IDs resolved via DNS, which was located in ~/.pci-ids + by default, is now stored according to the XDG base directory + specification in $XDG_CACHE_HOME/pci-ids. + + * All source files now have SPDX license identifiers. + + * Internal: The "aux" fields of structs pci_access and pci_dev + reserved for use by back-ends were renamed to backend_data to better + reflect their meaning. + + * As usually, various minor bug fixes and updated pci.ids. + 2023-05-01 Martin Mares <mj@ucw.cz> * Released as 3.10.0. @@ -65,9 +65,15 @@ LIBNAME=libpci PCIINC=lib/config.h lib/header.h lib/pci.h lib/types.h lib/sysdep.h PCIINC_INS=lib/config.h lib/header.h lib/pci.h lib/types.h +UTILINC=pciutils.h bitops.h $(PCIINC) + +LMR=margin_hw.o margin.o margin_log.o margin_results.o +LMROBJS=$(addprefix lmr/,$(LMR)) +LMRINC=lmr/lmr.h $(UTILINC) + export -all: lib/$(PCIIMPLIB) lspci$(EXEEXT) setpci$(EXEEXT) example$(EXEEXT) lspci.8 setpci.8 pcilib.7 pci.ids.5 update-pciids update-pciids.8 $(PCI_IDS) +all: lib/$(PCIIMPLIB) lspci$(EXEEXT) setpci$(EXEEXT) example$(EXEEXT) lspci.8 setpci.8 pcilib.7 pci.ids.5 update-pciids update-pciids.8 $(PCI_IDS) pcilmr$(EXEEXT) pcilmr.8 lib/$(PCIIMPLIB): $(PCIINC) force $(MAKE) -C lib all @@ -86,7 +92,7 @@ endif lspci$(EXEEXT): lspci.o ls-vpd.o ls-caps.o ls-caps-vendor.o ls-ecaps.o ls-kernel.o ls-tree.o ls-map.o $(COMMON) lib/$(PCIIMPLIB) setpci$(EXEEXT): setpci.o $(COMMON) lib/$(PCIIMPLIB) -LSPCIINC=lspci.h pciutils.h $(PCIINC) +LSPCIINC=lspci.h $(UTILINC) lspci.o: lspci.c $(LSPCIINC) ls-vpd.o: ls-vpd.c $(LSPCIINC) ls-caps.o: ls-caps.c $(LSPCIINC) @@ -95,12 +101,12 @@ ls-kernel.o: ls-kernel.c $(LSPCIINC) ls-tree.o: ls-tree.c $(LSPCIINC) ls-map.o: ls-map.c $(LSPCIINC) -setpci.o: setpci.c pciutils.h $(PCIINC) -common.o: common.c pciutils.h $(PCIINC) +setpci.o: setpci.c $(UTILINC) +common.o: common.c $(UTILINC) compat/getopt.o: compat/getopt.c lspci$(EXEEXT): LDLIBS+=$(LIBKMOD_LIBS) -ls-kernel.o: CFLAGS+=$(LIBKMOD_CFLAGS) +ls-kernel.o: override CFLAGS+=$(LIBKMOD_CFLAGS) update-pciids: update-pciids.sh sed <$< >$@ "s@^DEST=.*@DEST=$(if $(IDSDIR),$(IDSDIR)/,)$(PCI_IDS)@;s@^PCI_COMPRESSED_IDS=.*@PCI_COMPRESSED_IDS=$(PCI_COMPRESSED_IDS)@;s@VERSION=.*@VERSION=$(VERSION)@" @@ -110,6 +116,12 @@ update-pciids: update-pciids.sh example$(EXEEXT): example.o lib/$(PCIIMPLIB) example.o: example.c $(PCIINC) +$(LMROBJS) pcilmr.o: override CFLAGS+=-I . +$(LMROBJS): %.o: %.c $(LMRINC) + +pcilmr$(EXEEXT): pcilmr.o $(LMROBJS) $(COMMON) lib/$(PCIIMPLIB) +pcilmr.o: pcilmr.c $(LMRINC) + %$(EXEEXT): %.o $(CC) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@ @@ -126,6 +138,7 @@ comma := , $(WINDRES) --input=$< --output=$@ --input-format=rc --output-format=coff lspci$(EXEEXT): lspci-rsrc.o setpci$(EXEEXT): setpci-rsrc.o +pcilmr$(EXEEXT): pcilmr-rsrc.o endif %.8 %.7 %.5: %.man @@ -141,7 +154,7 @@ TAGS: clean: rm -f `find . -name "*~" -o -name "*.[oa]" -o -name "\#*\#" -o -name TAGS -o -name core -o -name "*.orig"` - rm -f update-pciids lspci$(EXEEXT) setpci$(EXEEXT) example$(EXEEXT) lib/config.* *.[578] pci.ids.gz lib/*.pc lib/*.so lib/*.so.* lib/*.dll lib/*.def lib/dllrsrc.rc *-rsrc.rc tags + rm -f update-pciids lspci$(EXEEXT) setpci$(EXEEXT) example$(EXEEXT) lib/config.* *.[578] pci.ids.gz lib/*.pc lib/*.so lib/*.so.* lib/*.dll lib/*.def lib/dllrsrc.rc *-rsrc.rc tags pcilmr$(EXEEXT) rm -rf maint/dist distclean: clean @@ -151,13 +164,14 @@ install: all $(DIRINSTALL) -m 755 $(DESTDIR)$(BINDIR) $(DESTDIR)$(SBINDIR) $(DESTDIR)$(IDSDIR) $(DESTDIR)$(MANDIR)/man8 $(DESTDIR)$(MANDIR)/man7 $(DESTDIR)$(MANDIR)/man5 $(INSTALL) -c -m 755 $(STRIP) lspci$(EXEEXT) $(DESTDIR)$(LSPCIDIR) $(INSTALL) -c -m 755 $(STRIP) setpci$(EXEEXT) $(DESTDIR)$(SBINDIR) + $(INSTALL) -c -m 755 $(STRIP) pcilmr$(EXEEXT) $(DESTDIR)$(SBINDIR) $(INSTALL) -c -m 755 update-pciids $(DESTDIR)$(SBINDIR) ifneq ($(IDSDIR),) $(INSTALL) -c -m 644 $(PCI_IDS) $(DESTDIR)$(IDSDIR) else $(INSTALL) -c -m 644 $(PCI_IDS) $(DESTDIR)$(SBINDIR) endif - $(INSTALL) -c -m 644 lspci.8 setpci.8 update-pciids.8 $(DESTDIR)$(MANDIR)/man8 + $(INSTALL) -c -m 644 lspci.8 setpci.8 pcilmr.8 update-pciids.8 $(DESTDIR)$(MANDIR)/man8 $(INSTALL) -c -m 644 pcilib.7 $(DESTDIR)$(MANDIR)/man7 $(INSTALL) -c -m 644 pci.ids.5 $(DESTDIR)$(MANDIR)/man5 ifeq ($(SHARED),yes) @@ -203,13 +217,13 @@ endif endif uninstall: all - rm -f $(DESTDIR)$(SBINDIR)/lspci$(EXEEXT) $(DESTDIR)$(SBINDIR)/setpci$(EXEEXT) $(DESTDIR)$(SBINDIR)/update-pciids + rm -f $(DESTDIR)$(LSPCIDIR)/lspci$(EXEEXT) $(DESTDIR)$(SBINDIR)/setpci$(EXEEXT) $(DESTDIR)$(SBINDIR)/pcilmr$(EXEEXT) $(DESTDIR)$(SBINDIR)/update-pciids ifneq ($(IDSDIR),) rm -f $(DESTDIR)$(IDSDIR)/$(PCI_IDS) else rm -f $(DESTDIR)$(SBINDIR)/$(PCI_IDS) endif - rm -f $(DESTDIR)$(MANDIR)/man8/lspci.8 $(DESTDIR)$(MANDIR)/man8/setpci.8 $(DESTDIR)$(MANDIR)/man8/update-pciids.8 + rm -f $(DESTDIR)$(MANDIR)/man8/lspci.8 $(DESTDIR)$(MANDIR)/man8/setpci.8 $(DESTDIR)$(MANDIR)/man8/pcilmr.8 $(DESTDIR)$(MANDIR)/man8/update-pciids.8 rm -f $(DESTDIR)$(MANDIR)/man7/pcilib.7 rm -f $(DESTDIR)$(MANDIR)/man5/pci.ids.5 ifeq ($(SHARED)_$(LIBEXT),yes_dll) @@ -20,7 +20,7 @@ In runs on the following systems: Linux (via /sys/bus/pci, /proc/bus/pci or i386 ports) FreeBSD (via /dev/pci) NetBSD (via libpci) - OpenBSD (via /dev/pci) + OpenBSD (via /dev/pci or i386 ports) GNU/kFreeBSD (via /dev/pci) Solaris/i386 (direct port access) Aix (via /dev/pci and odmget) diff --git a/bitops.h b/bitops.h new file mode 100644 index 0000000..029741e --- /dev/null +++ b/bitops.h @@ -0,0 +1,39 @@ +/* + * The PCI Utilities -- Decode bits and bit fields + * + * Copyright (c) 2023 Martin Mares <mj@ucw.cz> + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _BITOPS_H +#define _BITOPS_H + +#ifndef _PCI_LIB_H +#error Import only from pci.h +#endif + +/* Useful macros for decoding of bits and bit fields */ + +#define FLAG(x, y) ((x & y) ? '+' : '-') + +// Generate mask + +#define BIT(at) ((u64)1 << (at)) +// Boundaries inclusive +#define MASK(h, l) ((((u64)1 << ((h) + 1)) - 1) & ~(((u64)1 << (l)) - 1)) + +// Get/set from register + +#define BITS(x, at, width) (((x) >> (at)) & ((1 << (width)) - 1)) +#define GET_REG_MASK(reg, mask) (((reg) & (mask)) / ((mask) & ~((mask) << 1))) +#define SET_REG_MASK(reg, mask, val) \ + (((reg) & ~(mask)) | (((val) * ((mask) & ~((mask) << 1))) & (mask))) + +#define TABLE(tab, x, buf) \ + ((x) < sizeof(tab) / sizeof((tab)[0]) ? (tab)[x] : (sprintf((buf), "??%d", (x)), (buf))) + +#endif diff --git a/lib/Makefile b/lib/Makefile index bacdbc5..de976ed 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,10 +20,12 @@ endif ifdef PCI_HAVE_PM_MMIO_CONF OBJS += mmio-ports +PCI_USE_PHYSMEM = 1 endif ifdef PCI_HAVE_PM_ECAM OBJS += ecam +PCI_USE_PHYSMEM = 1 endif ifdef PCI_HAVE_PM_DUMP @@ -75,6 +77,18 @@ ifdef PCI_HAVE_PM_WIN32_SYSDBG OBJS += win32-sysdbg endif +ifdef PCI_OS_WINDOWS +OBJS += win32-helpers +endif + +ifdef PCI_USE_PHYSMEM +ifndef PCI_OS_WINDOWS +ifndef PCI_OS_DJGPP +OBJS += physmem-posix +endif +endif +endif + ifdef PCI_HAVE_PM_AOS_EXPANSION OBJS += aos-expansion endif @@ -127,8 +141,8 @@ init.o: init.c $(INCL) access.o: access.c $(INCL) params.o: params.c $(INCL) i386-ports.o: i386-ports.c $(INCL) i386-io-hurd.h i386-io-linux.h i386-io-sunos.h i386-io-windows.h i386-io-cygwin.h -mmio-ports.o: mmio-ports.c $(INCL) -ecam.o: ecam.c $(INCL) +mmio-ports.o: mmio-ports.c $(INCL) physmem.h physmem-access.h +ecam.o: ecam.c $(INCL) physmem.h physmem-access.h proc.o: proc.c $(INCL) sysfs.o: sysfs.c $(INCL) generic.o: generic.c $(INCL) @@ -147,9 +161,11 @@ names-hwdb.o: names-hwdb.c $(INCL) names.h filter.o: filter.c $(INCL) nbsd-libpci.o: nbsd-libpci.c $(INCL) hurd.o: hurd.c $(INCL) -win32-cfgmgr32.o: win32-cfgmgr32.c $(INCL) -win32-kldbg.o: win32-kldbg.c $(INCL) -win32-sysdbg.o: win32-sysdbg.c $(INCL) +win32-helpers.o: win32-helpers.c $(INCL) win32-helpers.h +win32-cfgmgr32.o: win32-cfgmgr32.c $(INCL) win32-helpers.h +win32-kldbg.o: win32-kldbg.c $(INCL) win32-helpers.h +win32-sysdbg.o: win32-sysdbg.c $(INCL) win32-helpers.h +i386-io-windows.h: win32-helpers.h # MinGW32 toolchain has some required Win32 header files in /ddk subdirectory. # But these header files include another header files from /ddk subdirectory diff --git a/lib/configure b/lib/configure index 8bdfc02..3df057a 100755 --- a/lib/configure +++ b/lib/configure @@ -88,16 +88,17 @@ case $sys in LSPCIDIR=BINDIR ;; sunos) + echo_n " mem-ports ecam" case $cpu in i?86) echo_n " i386-ports" echo >>$c "#define PCI_HAVE_PM_INTEL_CONF" ;; - *) - echo " The PCI library does not support Solaris for this architecture: $cpu" - exit 1 - ;; esac - echo >>$c '#define PCI_HAVE_STDINT_H' + echo >>$c '#define PCI_HAVE_PM_MMIO_CONF' + echo >>$c '#define PCI_HAVE_PM_ECAM' + echo >>$c '#define PCI_PATH_DEVMEM_DEVICE "/dev/xsvc"' + echo >>$c '#define PCI_PATH_ACPI_MCFG ""' + echo >>$c '#define PCI_PATH_EFI_SYSTAB ""' ;; freebsd*|kfreebsd*) echo_n " fbsd-device mem-ports ecam" @@ -121,6 +122,12 @@ case $sys in echo >>$c '#define PCI_PATH_DEVMEM_DEVICE "/dev/mem"' echo >>$c '#define PCI_PATH_ACPI_MCFG "/var/db/acpi/MCFG.*"' echo >>$c '#define PCI_PATH_EFI_SYSTAB ""' + case $cpu in + i386|amd64) echo_n " i386-ports" + echo >>$c '#define PCI_HAVE_PM_INTEL_CONF' + echo >>$m 'WITH_LIBS+=-l'$cpu + ;; + esac LIBRESOLV= ;; @@ -190,12 +197,17 @@ case $sys in LIBEXT=dll ;; beos|haiku) + echo_n " mem-ports ecam" case $cpu in i?86|x86_64) echo_n " i386-ports" echo >>$c '#define PCI_HAVE_PM_INTEL_CONF' ;; esac - echo >>$c '#define PCI_HAVE_STDINT_H' + echo >>$c '#define PCI_HAVE_PM_MMIO_CONF' + echo >>$c '#define PCI_HAVE_PM_ECAM' + echo >>$c '#define PCI_PATH_DEVMEM_DEVICE "/dev/misc/mem"' + echo >>$c '#define PCI_PATH_ACPI_MCFG ""' + echo >>$c '#define PCI_PATH_EFI_SYSTAB ""' ;; sylixos) echo >>$c '#define PCI_PATH_SYLIXOS_DEVICE "/proc/pci"' @@ -8,14 +8,9 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Tell 32-bit platforms that we are interested in 64-bit variant of off_t type - * as 32-bit variant of off_t type is signed and so it cannot represent all - * possible 32-bit offsets. It is required because off_t type is used by mmap(). - */ -#define _FILE_OFFSET_BITS 64 - #include "internal.h" +#include "physmem.h" +#include "physmem-access.h" #include <ctype.h> #include <errno.h> @@ -24,9 +19,6 @@ #include <string.h> #include <limits.h> -#include <sys/mman.h> -#include <sys/types.h> -#include <fcntl.h> #include <glob.h> #include <unistd.h> @@ -38,12 +30,6 @@ #include <kenv.h> #endif -#ifndef OFF_MAX -#define OFF_MAX (off_t)((1ULL << (sizeof(off_t) * CHAR_BIT - 1)) - 1) -#endif - -static long pagesize; - struct acpi_rsdp { char signature[8]; u8 checksum; @@ -92,6 +78,23 @@ struct acpi_mcfg { } allocations[0]; } PCI_PACKED; +struct mmap_cache { + void *map; + u64 addr; + u32 length; + int domain; + u8 bus; + int w; +}; + +// Back-end data linked to struct pci_access +struct ecam_access { + struct acpi_mcfg *mcfg; + struct mmap_cache *cache; + struct physmem *physmem; + long pagesize; +}; + static unsigned int get_rsdt_addresses_count(struct acpi_rsdt *rsdt) { @@ -121,40 +124,40 @@ calculate_checksum(const u8 *bytes, int len) } static struct acpi_sdt * -check_and_map_sdt(int fd, u64 addr, const char *signature, void **map_addr, u32 *map_length) +check_and_map_sdt(struct physmem *physmem, long pagesize, u64 addr, const char *signature, void **map_addr, u32 *map_length) { struct acpi_sdt *sdt; char sdt_signature[sizeof(sdt->signature)]; u32 length; void *map; - if (addr > OFF_MAX - sizeof(*sdt)) + if (addr + sizeof(*sdt) < addr) return NULL; - map = mmap(NULL, sizeof(*sdt) + (addr & (pagesize-1)), PROT_READ, MAP_SHARED, fd, addr & ~(pagesize-1)); - if (map == MAP_FAILED) + map = physmem_map(physmem, addr & ~(pagesize-1), sizeof(*sdt) + (addr & (pagesize-1)), 0); + if (map == (void *)-1) return NULL; sdt = (struct acpi_sdt *)((unsigned char *)map + (addr & (pagesize-1))); length = sdt->length; memcpy(sdt_signature, sdt->signature, sizeof(sdt->signature)); - munmap(map, sizeof(*sdt) + (addr & (pagesize-1))); + physmem_unmap(physmem, map, sizeof(*sdt) + (addr & (pagesize-1))); if (memcmp(sdt_signature, signature, sizeof(sdt_signature)) != 0) return NULL; if (length < sizeof(*sdt)) return NULL; - map = mmap(NULL, length + (addr & (pagesize-1)), PROT_READ, MAP_SHARED, fd, addr & ~(pagesize-1)); - if (map == MAP_FAILED) + map = physmem_map(physmem, addr & ~(pagesize-1), length + (addr & (pagesize-1)), 0); + if (map == (void *)-1) return NULL; sdt = (struct acpi_sdt *)((unsigned char *)map + (addr & (pagesize-1))); if (calculate_checksum((u8 *)sdt, sdt->length) != 0) { - munmap(map, length + (addr & (pagesize-1))); + physmem_unmap(physmem, map, length + (addr & (pagesize-1))); return NULL; } @@ -174,20 +177,20 @@ check_rsdp(struct acpi_rsdp *rsdp) } static int -check_and_parse_rsdp(int fd, off_t addr, u32 *rsdt_address, u64 *xsdt_address) +check_and_parse_rsdp(struct physmem *physmem, long pagesize, u64 addr, u32 *rsdt_address, u64 *xsdt_address) { struct acpi_rsdp *rsdp; unsigned char buf[sizeof(*rsdp) + sizeof(*rsdp->rsdp20)]; void *map; - map = mmap(NULL, sizeof(buf) + (addr & (pagesize-1)), PROT_READ, MAP_SHARED, fd, addr & ~(pagesize-1)); - if (map == MAP_FAILED) + map = physmem_map(physmem, addr & ~(pagesize-1), sizeof(buf) + (addr & (pagesize-1)), 0); + if (map == (void *)-1) return 0; rsdp = (struct acpi_rsdp *)buf; memcpy(rsdp, (unsigned char *)map + (addr & (pagesize-1)), sizeof(buf)); - munmap(map, sizeof(buf)); + physmem_unmap(physmem, map, sizeof(buf)); if (!check_rsdp(rsdp)) return 0; @@ -204,20 +207,22 @@ check_and_parse_rsdp(int fd, off_t addr, u32 *rsdt_address, u64 *xsdt_address) return 1; } -static off_t +static u64 find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSED, int use_x86bios UNUSED) { - unsigned long long ullnum; + u64 ullnum; #if defined (__FreeBSD__) || defined (__DragonFly__) || defined(__NetBSD__) unsigned long ulnum; #endif char buf[1024]; char *endptr; - off_t acpi20; - off_t acpi; + u64 acpi20; + u64 acpi; #if defined(__amd64__) || defined(__i386__) - off_t rsdp_addr; - off_t addr; + struct ecam_access *eacc = a->backend_data; + struct physmem *physmem = eacc->physmem; + u64 rsdp_addr; + u64 addr; void *map; #endif size_t len; @@ -240,19 +245,21 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE { errno = 0; ullnum = strtoull(buf+7, &endptr, 16); - if (!errno && !*endptr && ullnum <= OFF_MAX) + if (!errno && !*endptr) acpi20 = ullnum; } else if (strncmp(buf, "ACPI=", 5) == 0 && isxdigit(buf[5])) { errno = 0; ullnum = strtoull(buf+5, &endptr, 16); - if (!errno && !*endptr && ullnum <= OFF_MAX) + if (!errno && !*endptr) acpi = ullnum; } } fclose(f); } + else + a->debug("opening failed: %s...", strerror(errno)); if (acpi20) return acpi20; @@ -269,14 +276,14 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE { errno = 0; ullnum = strtoull(buf, &endptr, 16); - if (!errno && !*endptr && ullnum <= OFF_MAX) + if (!errno && !*endptr) return ullnum; } /* Then try FreeBSD sysctl machdep.acpi_root */ a->debug("calling sysctl machdep.acpi_root..."); len = sizeof(ulnum); - if (sysctlbyname("machdep.acpi_root", &ulnum, &len, NULL, 0) == 0 && ulnum <= OFF_MAX) + if (sysctlbyname("machdep.acpi_root", &ulnum, &len, NULL, 0) == 0) return ulnum; } #endif @@ -287,7 +294,7 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE /* Try NetBSD sysctl hw.acpi.root */ a->debug("calling sysctl hw.acpi.root..."); len = sizeof(ulnum); - if (sysctlbyname("hw.acpi.root", &ulnum, &len, NULL, 0) == 0 && ulnum <= OFF_MAX) + if (sysctlbyname("hw.acpi.root", &ulnum, &len, NULL, 0) == 0) return ulnum; } #endif @@ -299,8 +306,8 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE /* Scan first kB of Extended BIOS Data Area */ a->debug("scanning first kB of EBDA..."); - map = mmap(NULL, 0x40E + 1024, PROT_READ, MAP_SHARED, a->fd, 0); - if (map != MAP_FAILED) + map = physmem_map(physmem, 0, 0x40E + 1024, 0); + if (map != (void *)-1) { for (addr = 0x40E; addr < 0x40E + 1024; addr += 16) { @@ -310,16 +317,19 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE break; } } - munmap(map, 0x40E + 1024); + if (physmem_unmap(physmem, map, 0x40E + 1024) != 0) + a->debug("unmapping of EBDA failed: %s...", strerror(errno)); } + else + a->debug("mapping of EBDA failed: %s...", strerror(errno)); if (rsdp_addr) return rsdp_addr; /* Scan the main BIOS area below 1 MB */ a->debug("scanning BIOS below 1 MB..."); - map = mmap(NULL, 0x20000, PROT_READ, MAP_SHARED, a->fd, 0xE0000); - if (map != MAP_FAILED) + map = physmem_map(physmem, 0xE0000, 0x20000, 0); + if (map != (void *)-1) { for (addr = 0x0; addr < 0x20000; addr += 16) { @@ -329,8 +339,11 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE break; } } - munmap(map, 0x20000); + if (physmem_unmap(physmem, map, 0x20000) != 0) + a->debug("unmapping of BIOS failed: %s...", strerror(errno)); } + else + a->debug("mapping of BIOS failed: %s...", strerror(errno)); if (rsdp_addr) return rsdp_addr; @@ -343,20 +356,24 @@ find_rsdp_address(struct pci_access *a, const char *efisystab, int use_bsd UNUSE static struct acpi_mcfg * find_mcfg(struct pci_access *a, const char *acpimcfg, const char *efisystab, int use_bsd, int use_x86bios) { + struct ecam_access *eacc = a->backend_data; + struct physmem *physmem = eacc->physmem; + long pagesize = eacc->pagesize; struct acpi_xsdt *xsdt; struct acpi_rsdt *rsdt; struct acpi_mcfg *mcfg; struct acpi_sdt *sdt; unsigned int i, count; - off_t rsdp_address; + u64 rsdp_address; u64 xsdt_address; u32 rsdt_address; void *map_addr; u32 map_length; void *map2_addr; u32 map2_length; - off_t length; - int mcfg_fd; + long length; + FILE *mcfg_file; + const char *path; glob_t mcfg_glob; int ret; @@ -365,26 +382,30 @@ find_mcfg(struct pci_access *a, const char *acpimcfg, const char *efisystab, int ret = glob(acpimcfg, GLOB_NOCHECK, NULL, &mcfg_glob); if (ret == 0) { - a->debug("reading acpi mcfg file: %s...", mcfg_glob.gl_pathv[0]); - mcfg_fd = open(mcfg_glob.gl_pathv[0], O_RDONLY); + path = mcfg_glob.gl_pathv[0]; + a->debug("reading acpi mcfg file: %s...", path); + mcfg_file = fopen(path, "rb"); globfree(&mcfg_glob); - if (mcfg_fd >= 0) + if (mcfg_file) { - length = lseek(mcfg_fd, 0, SEEK_END); - if (length != (off_t)-1 && length > (off_t)sizeof(*mcfg)) + if (fseek(mcfg_file, 0, SEEK_END) == 0) + length = ftell(mcfg_file); + else + length = -1; + if (length > 0 && (size_t)length > sizeof(*mcfg)) { - lseek(mcfg_fd, 0, SEEK_SET); + rewind(mcfg_file); mcfg = pci_malloc(a, length); - if (read(mcfg_fd, mcfg, length) == length && + if (fread(mcfg, 1, length, mcfg_file) == (size_t)length && memcmp(mcfg->sdt.signature, "MCFG", 4) == 0 && - mcfg->sdt.length <= length && + mcfg->sdt.length <= (size_t)length && calculate_checksum((u8 *)mcfg, mcfg->sdt.length) == 0) { - close(mcfg_fd); + fclose(mcfg_file); return mcfg; } } - close(mcfg_fd); + fclose(mcfg_file); } a->debug("failed..."); } @@ -399,34 +420,34 @@ find_mcfg(struct pci_access *a, const char *acpimcfg, const char *efisystab, int a->debug("not found..."); return NULL; } - a->debug("found at 0x%llx...", (unsigned long long)rsdp_address); + a->debug("found at 0x%" PCI_U64_FMT_X "...", rsdp_address); - if (!check_and_parse_rsdp(a->fd, rsdp_address, &rsdt_address, &xsdt_address)) + if (!check_and_parse_rsdp(physmem, pagesize, rsdp_address, &rsdt_address, &xsdt_address)) { a->debug("invalid..."); return NULL; } mcfg = NULL; - a->debug("searching for ACPI MCFG (XSDT=0x%llx, RSDT=0x%x)...", (unsigned long long)xsdt_address, rsdt_address); + a->debug("searching for ACPI MCFG (XSDT=0x%" PCI_U64_FMT_X ", RSDT=0x%lx)...", xsdt_address, (unsigned long)rsdt_address); - xsdt = xsdt_address ? (struct acpi_xsdt *)check_and_map_sdt(a->fd, xsdt_address, "XSDT", &map_addr, &map_length) : NULL; + xsdt = xsdt_address ? (struct acpi_xsdt *)check_and_map_sdt(physmem, pagesize, xsdt_address, "XSDT", &map_addr, &map_length) : NULL; if (xsdt) { a->debug("via XSDT..."); count = get_xsdt_addresses_count(xsdt); for (i = 0; i < count; i++) { - sdt = check_and_map_sdt(a->fd, xsdt->sdt_addresses[i], "MCFG", &map2_addr, &map2_length); + sdt = check_and_map_sdt(physmem, pagesize, xsdt->sdt_addresses[i], "MCFG", &map2_addr, &map2_length); if (sdt) { mcfg = pci_malloc(a, sdt->length); memcpy(mcfg, sdt, sdt->length); - munmap(map2_addr, map2_length); + physmem_unmap(physmem, map2_addr, map2_length); break; } } - munmap(map_addr, map_length); + physmem_unmap(physmem, map_addr, map_length); if (mcfg) { a->debug("found..."); @@ -434,23 +455,23 @@ find_mcfg(struct pci_access *a, const char *acpimcfg, const char *efisystab, int } } - rsdt = (struct acpi_rsdt *)check_and_map_sdt(a->fd, rsdt_address, "RSDT", &map_addr, &map_length); + rsdt = (struct acpi_rsdt *)check_and_map_sdt(physmem, pagesize, rsdt_address, "RSDT", &map_addr, &map_length); if (rsdt) { a->debug("via RSDT..."); count = get_rsdt_addresses_count(rsdt); for (i = 0; i < count; i++) { - sdt = check_and_map_sdt(a->fd, rsdt->sdt_addresses[i], "MCFG", &map2_addr, &map2_length); + sdt = check_and_map_sdt(physmem, pagesize, rsdt->sdt_addresses[i], "MCFG", &map2_addr, &map2_length); if (sdt) { mcfg = pci_malloc(a, sdt->length); memcpy(mcfg, sdt, sdt->length); - munmap(map2_addr, map2_length); + physmem_unmap(physmem, map2_addr, map2_length); break; } } - munmap(map_addr, map_length); + physmem_unmap(physmem, map_addr, map_length); if (mcfg) { a->debug("found..."); @@ -463,7 +484,7 @@ find_mcfg(struct pci_access *a, const char *acpimcfg, const char *efisystab, int } static void -get_mcfg_allocation(struct acpi_mcfg *mcfg, unsigned int i, int *domain, u8 *start_bus, u8 *end_bus, off_t *addr, u32 *length) +get_mcfg_allocation(struct acpi_mcfg *mcfg, unsigned int i, int *domain, u8 *start_bus, u8 *end_bus, u64 *addr, u32 *length) { int buses = (int)mcfg->allocations[i].end_bus_number - (int)mcfg->allocations[i].start_bus_number + 1; @@ -480,15 +501,15 @@ get_mcfg_allocation(struct acpi_mcfg *mcfg, unsigned int i, int *domain, u8 *sta } static int -parse_next_addrs(const char *addrs, const char **next, int *domain, u8 *start_bus, u8 *end_bus, off_t *addr, u32 *length) +parse_next_addrs(const char *addrs, const char **next, int *domain, u8 *start_bus, u8 *end_bus, u64 *addr, u32 *length) { - unsigned long long ullnum; + u64 ullnum; const char *sep1, *sep2; int addr_len; char *endptr; long num; int buses; - unsigned long long start_addr; + u64 start_addr; if (!*addrs) { @@ -563,7 +584,7 @@ parse_next_addrs(const char *addrs, const char **next, int *domain, u8 *start_bu errno = 0; ullnum = strtoull(sep2+1, &endptr, 16); - if (errno || (ullnum & 3) || ullnum > OFF_MAX) + if (errno || (ullnum & 3)) return 0; if (addr) *addr = ullnum; @@ -577,7 +598,7 @@ parse_next_addrs(const char *addrs, const char **next, int *domain, u8 *start_bu if (end_bus) *end_bus = 0xff; } - if ((unsigned)buses * 32 * 8 * 4096 > OFF_MAX - start_addr) + if (start_addr + (unsigned)buses * 32 * 8 * 4096 < start_addr) return 0; if (length) *length = buses * 32 * 8 * 4096; @@ -588,9 +609,9 @@ parse_next_addrs(const char *addrs, const char **next, int *domain, u8 *start_bu return 0; errno = 0; ullnum = strtoull(endptr+1, &endptr, 16); - if (errno || endptr != addrs + addr_len || (ullnum & 3) || ullnum > OFF_MAX || ullnum > 256 * 32 * 8 * 4096) + if (errno || endptr != addrs + addr_len || (ullnum & 3) || ullnum > 256 * 32 * 8 * 4096) return 0; - if (ullnum > OFF_MAX - start_addr) + if (start_addr + ullnum < start_addr) return 0; if (buses > 0 && ullnum > (unsigned)buses * 32 * 8 * 4096) return 0; @@ -619,7 +640,7 @@ validate_addrs(const char *addrs) } static int -calculate_bus_addr(u8 start_bus, off_t start_addr, u32 total_length, u8 bus, off_t *addr, u32 *length) +calculate_bus_addr(u8 start_bus, u64 start_addr, u32 total_length, u8 bus, u64 *addr, u32 *length) { u32 offset; @@ -637,12 +658,12 @@ calculate_bus_addr(u8 start_bus, off_t start_addr, u32 total_length, u8 bus, off } static int -get_bus_addr(struct acpi_mcfg *mcfg, const char *addrs, int domain, u8 bus, off_t *addr, u32 *length) +get_bus_addr(struct acpi_mcfg *mcfg, const char *addrs, int domain, u8 bus, u64 *addr, u32 *length) { int cur_domain; u8 start_bus; u8 end_bus; - off_t start_addr; + u64 start_addr; u32 total_length; int i, count; @@ -670,33 +691,18 @@ get_bus_addr(struct acpi_mcfg *mcfg, const char *addrs, int domain, u8 bus, off_ } } -struct mmap_cache -{ - void *map; - off_t addr; - u32 length; - int domain; - u8 bus; - int w; -}; - -// Back-end data linked to struct pci_access -struct ecam_access -{ - struct acpi_mcfg *mcfg; - struct mmap_cache *cache; -}; - static void munmap_reg(struct pci_access *a) { struct ecam_access *eacc = a->backend_data; struct mmap_cache *cache = eacc->cache; + struct physmem *physmem = eacc->physmem; + long pagesize = eacc->pagesize; if (!cache) return; - munmap(cache->map, cache->length + (cache->addr & (pagesize-1))); + physmem_unmap(physmem, cache->map, cache->length + (cache->addr & (pagesize-1))); pci_mfree(cache); eacc->cache = NULL; } @@ -706,9 +712,11 @@ mmap_reg(struct pci_access *a, int w, int domain, u8 bus, u8 dev, u8 func, int p { struct ecam_access *eacc = a->backend_data; struct mmap_cache *cache = eacc->cache; + struct physmem *physmem = eacc->physmem; + long pagesize = eacc->pagesize; const char *addrs; void *map; - off_t addr; + u64 addr; u32 length; u32 offset; @@ -724,12 +732,12 @@ mmap_reg(struct pci_access *a, int w, int domain, u8 bus, u8 dev, u8 func, int p if (!get_bus_addr(eacc->mcfg, addrs, domain, bus, &addr, &length)) return 0; - map = mmap(NULL, length + (addr & (pagesize-1)), w ? PROT_WRITE : PROT_READ, MAP_SHARED, a->fd, addr & ~(pagesize-1)); - if (map == MAP_FAILED) + map = physmem_map(physmem, addr & ~(pagesize-1), length + (addr & (pagesize-1)), w); + if (map == (void *)-1) return 0; if (cache) - munmap(cache->map, cache->length + (cache->addr & (pagesize-1))); + physmem_unmap(physmem, cache->map, cache->length + (cache->addr & (pagesize-1))); else cache = eacc->cache = pci_malloc(a, sizeof(*cache)); @@ -755,45 +763,9 @@ mmap_reg(struct pci_access *a, int w, int domain, u8 bus, u8 dev, u8 func, int p } static void -writeb(unsigned char value, volatile void *addr) -{ - *(volatile unsigned char *)addr = value; -} - -static void -writew(unsigned short value, volatile void *addr) -{ - *(volatile unsigned short *)addr = value; -} - -static void -writel(unsigned int value, volatile void *addr) -{ - *(volatile unsigned int *)addr = value; -} - -static unsigned char -readb(volatile void *addr) -{ - return *(volatile unsigned char *)addr; -} - -static unsigned short -readw(volatile void *addr) -{ - return *(volatile unsigned short *)addr; -} - -static unsigned int -readl(volatile void *addr) -{ - return *(volatile unsigned int *)addr; -} - -static void ecam_config(struct pci_access *a) { - pci_define_param(a, "devmem.path", PCI_PATH_DEVMEM_DEVICE, "Path to the /dev/mem device"); + physmem_init_config(a); pci_define_param(a, "ecam.acpimcfg", PCI_PATH_ACPI_MCFG, "Path to the ACPI MCFG table"); pci_define_param(a, "ecam.efisystab", PCI_PATH_EFI_SYSTAB, "Path to the EFI system table"); #if defined (__FreeBSD__) || defined (__DragonFly__) || defined(__NetBSD__) @@ -809,7 +781,6 @@ static int ecam_detect(struct pci_access *a) { int use_addrs = 1, use_acpimcfg = 1, use_efisystab = 1, use_bsd = 1, use_x86bios = 1; - const char *devmem = pci_get_param(a, "devmem.path"); const char *acpimcfg = pci_get_param(a, "ecam.acpimcfg"); const char *efisystab = pci_get_param(a, "ecam.efisystab"); #if defined (__FreeBSD__) || defined (__DragonFly__) || defined(__NetBSD__) @@ -819,6 +790,7 @@ ecam_detect(struct pci_access *a) const char *x86bios = pci_get_param(a, "ecam.x86bios"); #endif const char *addrs = pci_get_param(a, "ecam.addrs"); + struct ecam_access *eacc; glob_t mcfg_glob; int ret; @@ -849,7 +821,7 @@ ecam_detect(struct pci_access *a) else use_acpimcfg = 0; - if (access(efisystab, R_OK)) + if (!efisystab[0] || access(efisystab, R_OK)) { if (efisystab[0]) a->debug("cannot access efisystab: %s: %s...", efisystab, strerror(errno)); @@ -888,16 +860,50 @@ ecam_detect(struct pci_access *a) return 0; } - if (access(devmem, R_OK)) + if (physmem_access(a, 0)) { - a->debug("cannot access physical memory via %s: %s", devmem, strerror(errno)); + a->debug("cannot access physical memory: %s", strerror(errno)); return 0; } + if (!use_addrs) + { + eacc = pci_malloc(a, sizeof(*eacc)); + + eacc->physmem = physmem_open(a, a->writeable); + if (!eacc->physmem) + { + a->debug("cannot open physcal memory: %s.", strerror(errno)); + pci_mfree(eacc); + return 0; + } + + eacc->pagesize = physmem_get_pagesize(eacc->physmem); + if (eacc->pagesize <= 0) + { + a->debug("Cannot get page size: %s.", strerror(errno)); + physmem_close(eacc->physmem); + pci_mfree(eacc); + return 0; + } + + eacc->mcfg = NULL; + eacc->cache = NULL; + a->backend_data = eacc; + eacc->mcfg = find_mcfg(a, acpimcfg, efisystab, use_bsd, use_x86bios); + if (!eacc->mcfg) + { + physmem_close(eacc->physmem); + pci_mfree(eacc); + a->backend_data = NULL; + return 0; + } + } + if (use_addrs) - a->debug("using %s with ecam addresses %s", devmem, addrs); + a->debug("using with ecam addresses %s", addrs); else - a->debug("using %s with%s%s%s%s%s%s", devmem, use_acpimcfg ? " acpimcfg=" : "", use_acpimcfg ? acpimcfg : "", use_efisystab ? " efisystab=" : "", use_efisystab ? efisystab : "", use_bsd ? " bsd" : "", use_x86bios ? " x86bios" : ""); + a->debug("using with%s%s%s%s%s%s", use_acpimcfg ? " acpimcfg=" : "", use_acpimcfg ? acpimcfg : "", use_efisystab ? " efisystab=" : "", use_efisystab ? efisystab : "", use_bsd ? " bsd" : "", use_x86bios ? " x86bios" : ""); return 1; } @@ -905,7 +911,6 @@ ecam_detect(struct pci_access *a) static void ecam_init(struct pci_access *a) { - const char *devmem = pci_get_param(a, "devmem.path"); const char *acpimcfg = pci_get_param(a, "ecam.acpimcfg"); const char *efisystab = pci_get_param(a, "ecam.efisystab"); #if defined (__FreeBSD__) || defined (__DragonFly__) || defined(__NetBSD__) @@ -915,24 +920,35 @@ ecam_init(struct pci_access *a) const char *x86bios = pci_get_param(a, "ecam.x86bios"); #endif const char *addrs = pci_get_param(a, "ecam.addrs"); - struct acpi_mcfg *mcfg = NULL; - struct ecam_access *eacc = NULL; + struct physmem *physmem = NULL; + struct ecam_access *eacc = a->backend_data; + long pagesize = 0; int use_bsd = 0; int use_x86bios = 0; int test_domain = 0; u8 test_bus = 0; volatile void *test_reg; - pagesize = sysconf(_SC_PAGESIZE); - if (pagesize < 0) - a->error("Cannot get page size: %s.", strerror(errno)); - if (!validate_addrs(addrs)) a->error("Option ecam.addrs has invalid address format \"%s\".", addrs); - a->fd = open(devmem, (a->writeable ? O_RDWR : O_RDONLY) | O_DSYNC); - if (a->fd < 0) - a->error("Cannot open %s: %s.", devmem, strerror(errno)); + if (!eacc) + { + physmem = physmem_open(a, a->writeable); + if (!physmem) + a->error("Cannot open physcal memory: %s.", strerror(errno)); + + pagesize = physmem_get_pagesize(physmem); + if (pagesize <= 0) + a->error("Cannot get page size: %s.", strerror(errno)); + + eacc = pci_malloc(a, sizeof(*eacc)); + eacc->mcfg = NULL; + eacc->cache = NULL; + eacc->physmem = physmem; + eacc->pagesize = pagesize; + a->backend_data = eacc; + } if (!*addrs) { @@ -944,18 +960,14 @@ ecam_init(struct pci_access *a) if (strcmp(x86bios, "0") != 0) use_x86bios = 1; #endif - mcfg = find_mcfg(a, acpimcfg, efisystab, use_bsd, use_x86bios); - if (!mcfg) + if (!eacc->mcfg) + eacc->mcfg = find_mcfg(a, acpimcfg, efisystab, use_bsd, use_x86bios); + if (!eacc->mcfg) a->error("Option ecam.addrs was not specified and ACPI MCFG table cannot be found."); } - eacc = pci_malloc(a, sizeof(*eacc)); - eacc->mcfg = mcfg; - eacc->cache = NULL; - a->backend_data = eacc; - - if (mcfg) - get_mcfg_allocation(mcfg, 0, &test_domain, &test_bus, NULL, NULL, NULL); + if (eacc->mcfg) + get_mcfg_allocation(eacc->mcfg, 0, &test_domain, &test_bus, NULL, NULL, NULL); else parse_next_addrs(addrs, NULL, &test_domain, &test_bus, NULL, NULL, NULL); @@ -969,16 +981,11 @@ ecam_cleanup(struct pci_access *a) { struct ecam_access *eacc = a->backend_data; - if (a->fd < 0) - return; - munmap_reg(a); + physmem_close(eacc->physmem); pci_mfree(eacc->mcfg); pci_mfree(eacc); a->backend_data = NULL; - - close(a->fd); - a->fd = -1; } static void @@ -1037,13 +1044,13 @@ ecam_read(struct pci_dev *d, int pos, byte *buf, int len) switch (len) { case 1: - buf[0] = readb(reg); + buf[0] = physmem_readb(reg); break; case 2: - ((u16 *) buf)[0] = readw(reg); + ((u16 *) buf)[0] = physmem_readw(reg); break; case 4: - ((u32 *) buf)[0] = readl(reg); + ((u32 *) buf)[0] = physmem_readl(reg); break; } @@ -1067,13 +1074,13 @@ ecam_write(struct pci_dev *d, int pos, byte *buf, int len) switch (len) { case 1: - writeb(buf[0], reg); + physmem_writeb(buf[0], reg); break; case 2: - writew(((u16 *) buf)[0], reg); + physmem_writew(((u16 *) buf)[0], reg); break; case 4: - writel(((u32 *) buf)[0], reg); + physmem_writel(((u32 *) buf)[0], reg); break; } diff --git a/lib/header.h b/lib/header.h index 58fe7df..2cee94f 100644 --- a/lib/header.h +++ b/lib/header.h @@ -1415,6 +1415,13 @@ #define PCI_DOE_STS_ERROR 0x4 /* DOE Error */ #define PCI_DOE_STS_OBJECT_READY 0x80000000 /* Data Object Ready */ +/* Lane Margining at the Receiver Extended Capability */ +#define PCI_LMR_CAPS 0x4 /* Margining Port Capabilities Register */ +#define PCI_LMR_CAPS_DRVR 0x1 /* Margining uses Driver Software */ +#define PCI_LMR_PORT_STS 0x6 /* Margining Port Status Register */ +#define PCI_LMR_PORT_STS_READY 0x1 /* Margining Ready */ +#define PCI_LMR_PORT_STS_SOFT_READY 0x2 /* Margining Software Ready */ + /* * The PCI interface treats multi-function devices as independent * devices. The slot/function address of each device is encoded diff --git a/lib/i386-io-access.h b/lib/i386-io-access.h new file mode 100644 index 0000000..8b1ad5f --- /dev/null +++ b/lib/i386-io-access.h @@ -0,0 +1,75 @@ +/* + * The PCI Library -- Compiler-specific wrappers around x86 I/O port access instructions + * + * Copyright (c) 2023 Pali Rohár <pali@kernel.org> + * + * Can be freely distributed and used under the terms of the GNU GPL v2+ + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#if defined(__GNUC__) + +static inline unsigned char +intel_inb(unsigned short int port) +{ + unsigned char value; + asm volatile ("inb %w1, %0" : "=a" (value) : "Nd" (port)); + return value; +} + +static inline unsigned short int +intel_inw(unsigned short int port) +{ + unsigned short value; + asm volatile ("inw %w1, %0" : "=a" (value) : "Nd" (port)); + return value; +} + +static inline unsigned int +intel_inl(unsigned short int port) +{ + u32 value; + asm volatile ("inl %w1, %0" : "=a" (value) : "Nd" (port)); + return value; +} + +static inline void +intel_outb(unsigned char value, unsigned short int port) +{ + asm volatile ("outb %b0, %w1" : : "a" (value), "Nd" (port)); +} + +static inline void +intel_outw(unsigned short int value, unsigned short int port) +{ + asm volatile ("outw %w0, %w1" : : "a" (value), "Nd" (port)); +} + +static inline void +intel_outl(u32 value, unsigned short int port) +{ + asm volatile ("outl %0, %w1" : : "a" (value), "Nd" (port)); +} + +#elif defined(_MSC_VER) + +#pragma intrinsic(_outp) +#pragma intrinsic(_outpw) +#pragma intrinsic(_outpd) +#pragma intrinsic(_inp) +#pragma intrinsic(_inpw) +#pragma intrinsic(_inpd) + +#define intel_outb(x, y) _outp(y, x) +#define intel_outw(x, y) _outpw(y, x) +#define intel_outl(x, y) _outpd(y, x) +#define intel_inb(x) _inp(x) +#define intel_inw(x) _inpw(x) +#define intel_inl(x) _inpd(x) + +#else + +#error Do not know how to access I/O ports on this compiler + +#endif diff --git a/lib/i386-io-beos.h b/lib/i386-io-beos.h index 49b7094..dac0e4b 100644 --- a/lib/i386-io-beos.h +++ b/lib/i386-io-beos.h @@ -24,37 +24,37 @@ intel_cleanup_io(struct pci_access *a UNUSED) } static inline u8 -inb (u16 port) +intel_inb (u16 port) { return (u8)read_isa_io(0, (void *)(u32)port, sizeof(u8)); } static inline u16 -inw (u16 port) +intel_inw (u16 port) { return (u16)read_isa_io(0, (void *)(u32)port, sizeof(u16)); } static inline u32 -inl (u16 port) +intel_inl (u16 port) { return (u32)read_isa_io(0, (void *)(u32)port, sizeof(u32)); } static inline void -outb (u8 value, u16 port) +intel_outb (u8 value, u16 port) { write_isa_io(0, (void *)(u32)port, sizeof(value), value); } static inline void -outw (u16 value, u16 port) +intel_outw (u16 value, u16 port) { write_isa_io(0, (void *)(u32)port, sizeof(value), value); } static inline void -outl (u32 value, u16 port) +intel_outl (u32 value, u16 port) { write_isa_io(0, (void *)(u32)port, sizeof(value), value); } diff --git a/lib/i386-io-cygwin.h b/lib/i386-io-cygwin.h index 0b71d16..4118057 100644 --- a/lib/i386-io-cygwin.h +++ b/lib/i386-io-cygwin.h @@ -10,6 +10,8 @@ #include <sys/io.h> +#include "i386-io-access.h" + static int intel_setup_io(struct pci_access *a UNUSED) { diff --git a/lib/i386-io-djgpp.h b/lib/i386-io-djgpp.h index bb29526..1afb00e 100644 --- a/lib/i386-io-djgpp.h +++ b/lib/i386-io-djgpp.h @@ -8,15 +8,9 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#include <pc.h> #include <dos.h> -#define outb(x,y) outportb(y, x) -#define outw(x,y) outportw(y, x) -#define outl(x,y) outportl(y, x) -#define inb inportb -#define inw inportw -#define inl inportl +#include "i386-io-access.h" static int irq_enabled; diff --git a/lib/i386-io-haiku.h b/lib/i386-io-haiku.h index ce5362b..23843ea 100644 --- a/lib/i386-io-haiku.h +++ b/lib/i386-io-haiku.h @@ -66,6 +66,15 @@ static int poke_driver_fd; static int intel_setup_io(struct pci_access *a UNUSED) { + /* + * Opening poke device on systems with the linked change below + * automatically changes process IOPL to 3 and closing its file + * descriptor changes process IOPL back to 0, which give access + * to all x86 IO ports via x86 in/out instructions for this + * userspace process. To support also older systems without this + * change, access IO ports via ioctl() instead of x86 in/out. + * https://review.haiku-os.org/c/haiku/+/1077 + */ poke_driver_fd = open(POKE_DEVICE_FULLNAME, O_RDWR); return (poke_driver_fd < 0) ? 0 : 1; } @@ -77,7 +86,7 @@ intel_cleanup_io(struct pci_access *a UNUSED) } static inline u8 -inb (u16 port) +intel_inb (u16 port) { port_io_args args = { POKE_SIGNATURE, port, sizeof(u8), 0 }; if (ioctl(poke_driver_fd, POKE_PORT_READ, &args, sizeof(args)) < 0) @@ -86,7 +95,7 @@ inb (u16 port) } static inline u16 -inw (u16 port) +intel_inw (u16 port) { port_io_args args = { POKE_SIGNATURE, port, sizeof(u16), 0 }; if (ioctl(poke_driver_fd, POKE_PORT_READ, &args, sizeof(args)) < 0) @@ -95,7 +104,7 @@ inw (u16 port) } static inline u32 -inl (u16 port) +intel_inl (u16 port) { port_io_args args = { POKE_SIGNATURE, port, sizeof(u32), 0 }; if (ioctl(poke_driver_fd, POKE_PORT_READ, &args, sizeof(args)) < 0) @@ -104,21 +113,21 @@ inl (u16 port) } static inline void -outb (u8 value, u16 port) +intel_outb (u8 value, u16 port) { port_io_args args = { POKE_SIGNATURE, port, sizeof(u8), value }; ioctl(poke_driver_fd, POKE_PORT_WRITE, &args, sizeof(args)); } static inline void -outw (u16 value, u16 port) +intel_outw (u16 value, u16 port) { port_io_args args = { POKE_SIGNATURE, port, sizeof(u16), value }; ioctl(poke_driver_fd, POKE_PORT_WRITE, &args, sizeof(args)); } static inline void -outl (u32 value, u16 port) +intel_outl (u32 value, u16 port) { port_io_args args = { POKE_SIGNATURE, port, sizeof(u32), value }; ioctl(poke_driver_fd, POKE_PORT_WRITE, &args, sizeof(args)); diff --git a/lib/i386-io-hurd.h b/lib/i386-io-hurd.h index d44b2f5..01d684e 100644 --- a/lib/i386-io-hurd.h +++ b/lib/i386-io-hurd.h @@ -14,6 +14,8 @@ #include <sys/io.h> +#include "i386-io-access.h" + static inline int intel_setup_io(struct pci_access *a UNUSED) { diff --git a/lib/i386-io-linux.h b/lib/i386-io-linux.h index e6bb9b6..317f079 100644 --- a/lib/i386-io-linux.h +++ b/lib/i386-io-linux.h @@ -11,6 +11,8 @@ #include <sys/io.h> #include <errno.h> +#include "i386-io-access.h" + static int ioperm_enabled; static int iopl_enabled; diff --git a/lib/i386-io-openbsd.h b/lib/i386-io-openbsd.h new file mode 100644 index 0000000..8a9b4a4 --- /dev/null +++ b/lib/i386-io-openbsd.h @@ -0,0 +1,54 @@ +/* + * The PCI Library -- Access to i386 I/O ports on OpenBSD + * + * Copyright (c) 2023 Grant Pannell <grant@pannell.net.au> + * + * Can be freely distributed and used under the terms of the GNU GPL. + */ + +#include <sys/types.h> +#include <machine/sysarch.h> +#include <machine/pio.h> + +#include "i386-io-access.h" + +#if defined(__amd64__) + #define obsd_iopl amd64_iopl +#else + #define obsd_iopl i386_iopl +#endif + +static int iopl_enabled; + +static int +intel_setup_io(struct pci_access *a UNUSED) +{ + if (iopl_enabled) + return 1; + + if (obsd_iopl(3) < 0) + { + return 0; + } + + iopl_enabled = 1; + return 1; +} + +static inline void +intel_cleanup_io(struct pci_access *a UNUSED) +{ + if (iopl_enabled) + { + obsd_iopl(0); + iopl_enabled = 0; + } +} + +static inline void intel_io_lock(void) +{ +} + +static inline void intel_io_unlock(void) +{ +} diff --git a/lib/i386-io-sunos.h b/lib/i386-io-sunos.h index 86948d9..99fd576 100644 --- a/lib/i386-io-sunos.h +++ b/lib/i386-io-sunos.h @@ -12,6 +12,8 @@ #include <sys/sysi86.h> #include <sys/psw.h> +#include "i386-io-access.h" + static int intel_setup_io(struct pci_access *a UNUSED) { @@ -24,48 +26,6 @@ intel_cleanup_io(struct pci_access *a UNUSED) /* FIXME: How to switch off I/O port access? */ } -static inline u8 -inb (u16 port) -{ - u8 v; - __asm__ __volatile__ ("inb (%w1)":"=a" (v):"Nd" (port)); - return v; -} - -static inline u16 -inw (u16 port) -{ - u16 v; - __asm__ __volatile__ ("inw (%w1)":"=a" (v):"Nd" (port)); - return v; -} - -static inline u32 -inl (u16 port) -{ - u32 v; - __asm__ __volatile__ ("inl (%w1)":"=a" (v):"Nd" (port)); - return v; -} - -static inline void -outb (u8 value, u16 port) -{ - __asm__ __volatile__ ("outb (%w1)": :"a" (value), "Nd" (port)); -} - -static inline void -outw (u16 value, u16 port) -{ - __asm__ __volatile__ ("outw (%w1)": :"a" (value), "Nd" (port)); -} - -static inline void -outl (u32 value, u16 port) -{ - __asm__ __volatile__ ("outl (%w1)": :"a" (value), "Nd" (port)); -} - static inline void intel_io_lock(void) { } diff --git a/lib/i386-io-windows.h b/lib/i386-io-windows.h index 73af883..d2da452 100644 --- a/lib/i386-io-windows.h +++ b/lib/i386-io-windows.h @@ -10,95 +10,10 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -#include <io.h> #include <windows.h> -#include <aclapi.h> +#include "win32-helpers.h" -#ifdef _MSC_VER -/* MSVC compiler provides I/O port intrinsics for both 32 and 64-bit modes. */ -#pragma intrinsic(_outp) -#pragma intrinsic(_outpw) -#pragma intrinsic(_outpd) -#pragma intrinsic(_inp) -#pragma intrinsic(_inpw) -#pragma intrinsic(_inpd) -#elif defined(_WIN64) || defined(_UCRT) -/* - * For other compilers I/O port intrinsics are available in <intrin.h> header - * file either as inline/external functions or macros. Beware that <intrin.h> - * names are different than MSVC intrinsics names and glibc function names. - * Usage of <intrin.h> is also the prefered way for 64-bit mode or when using - * new UCRT library. - */ -#include <intrin.h> -#define _outp(x,y) __outbyte(x,y) -#define _outpw(x,y) __outword(x,y) -#define _outpd(x,y) __outdword(x,y) -#define _inp(x) __inbyte(x) -#define _inpw(x) __inword(x) -#define _inpd(x) __indword(x) -#elif defined(__CRTDLL__) || (defined(__MSVCRT_VERSION__) && __MSVCRT_VERSION__ < 0x400) -/* - * Old 32-bit CRTDLL library and pre-4.00 MSVCRT library do not provide I/O - * port functions. As these libraries exist only in 32-bit mode variant, - * implement I/O port functions via 32-bit inline assembly. - */ -static inline int _outp(unsigned short port, int databyte) -{ - asm volatile ("outb %b0, %w1" : : "a" (databyte), "Nd" (port)); - return databyte; -} -static inline unsigned short _outpw(unsigned short port, unsigned short dataword) -{ - asm volatile ("outw %w0, %w1" : : "a" (dataword), "Nd" (port)); - return dataword; -} -static inline unsigned long _outpd(unsigned short port, unsigned long dataword) -{ - asm volatile ("outl %0, %w1" : : "a" (dataword), "Nd" (port)); - return dataword; -} -static inline int _inp(unsigned short port) -{ - unsigned char ret; - asm volatile ("inb %w1, %0" : "=a" (ret) : "Nd" (port)); - return ret; -} -static inline unsigned short _inpw(unsigned short port) -{ - unsigned short ret; - asm volatile ("inw %w1, %0" : "=a" (ret) : "Nd" (port)); - return ret; -} -static inline unsigned long _inpd(unsigned short port) -{ - unsigned long ret; - asm volatile ("inl %w1, %0" : "=a" (ret) : "Nd" (port)); - return ret; -} -#elif !defined(__GNUC__) -/* - * Old 32-bit MSVCRT (non-UCRT) library provides I/O port functions. Function - * prototypes are defined in <conio.h> header file but they are missing in - * some MinGW toolchains. So for GCC compiler define them manually. - */ -#include <conio.h> -#else -int _outp(unsigned short port, int databyte); -unsigned short _outpw(unsigned short port, unsigned short dataword); -unsigned long _outpd(unsigned short port, unsigned long dataword); -int _inp(unsigned short port); -unsigned short _inpw(unsigned short port); -unsigned long _inpd(unsigned short port); -#endif - -#define outb(x,y) _outp(y,x) -#define outw(x,y) _outpw(y,x) -#define outl(x,y) _outpd(y,x) - -#define inb(x) _inp(x) -#define inw(x) _inpw(x) -#define inl(x) _inpd(x) +#include "i386-io-access.h" /* * Define __readeflags() for MSVC and GCC compilers. @@ -111,17 +26,26 @@ unsigned long _inpd(unsigned short port); * GCC version 4.9.0 and higher provides __builtin_ia32_readeflags_uXX() * builtin for XX-mode. This builtin is also available as __readeflags() * function indirectly via <x86intrin.h> header file. + * + * CAVEAT: Semicolon in MSVC __asm block means start of the comment, and not + * end of the __asm statement, like it is for all other C statements. Also + * function which uses MSVC inline assembly cannot be inlined to another function + * (compiler reports a warning about it, not a fatal error). So we add explicit + * curly brackets for __asm blocks, remove misleading semicolons and do not + * declare functions as inline. */ #if defined(_MSC_VER) && (_MSC_VER >= 1500 || (_MSC_VER >= 1400 && defined(__BUILDMACHINE__))) #pragma intrinsic(__readeflags) #elif defined(__GNUC__) && ((__GNUC__ == 4 && __GNUC_MINOR__ >= 9) || (__GNUC__ > 4)) #include <x86intrin.h> #elif defined(_MSC_VER) && defined(_M_IX86) -static inline unsigned int +static unsigned int __readeflags(void) { - __asm pushfd; - __asm pop eax; + __asm { + pushfd + pop eax + } } #elif defined(__GNUC__) static inline unsigned @@ -146,42 +70,6 @@ __readeflags(void) /* Read IOPL of the current process, IOPL is stored in eflag bits [13:12]. */ #define read_iopl() ((__readeflags() >> 12) & 0x3) -/* Unfortunately i586-mingw32msvc toolchain does not provide this constant. */ -#ifndef PROCESS_QUERY_LIMITED_INFORMATION -#define PROCESS_QUERY_LIMITED_INFORMATION 0x1000 -#endif - -/* Unfortunately some toolchains do not provide this constant. */ -#ifndef SE_IMPERSONATE_NAME -#define SE_IMPERSONATE_NAME TEXT("SeImpersonatePrivilege") -#endif - -/* - * These psapi functions are available in kernel32.dll library with K32 prefix - * on Windows 7 and higher systems. On older Windows systems these functions are - * available in psapi.dll libary without K32 prefix. So resolve pointers to - * these functions dynamically at runtime from the available system library. - * Function GetProcessImageFileNameW() is not available on Windows 2000 and - * older systems. - */ -typedef BOOL (WINAPI *EnumProcessesProt)(DWORD *lpidProcess, DWORD cb, DWORD *cbNeeded); -typedef DWORD (WINAPI *GetProcessImageFileNameWProt)(HANDLE hProcess, LPWSTR lpImageFileName, DWORD nSize); -typedef DWORD (WINAPI *GetModuleFileNameExWProt)(HANDLE hProcess, HMODULE hModule, LPWSTR lpImageFileName, DWORD nSize); - -/* - * These aclapi functions are available in advapi.dll library on Windows NT 4.0 - * and higher systems. - */ -typedef DWORD (WINAPI *GetSecurityInfoProt)(HANDLE handle, SE_OBJECT_TYPE ObjectType, SECURITY_INFORMATION SecurityInfo, PSID *ppsidOwner, PSID *ppsidGroup, PACL *ppDacl, PACL *ppSacl, PSECURITY_DESCRIPTOR *ppSecurityDescriptor); -typedef DWORD (WINAPI *SetSecurityInfoProt)(HANDLE handle, SE_OBJECT_TYPE ObjectType, SECURITY_INFORMATION SecurityInfo, PSID psidOwner, PSID psidGroup, PACL pDacl, PACL pSacl); -typedef DWORD (WINAPI *SetEntriesInAclProt)(ULONG cCountOfExplicitEntries, PEXPLICIT_ACCESS pListOfExplicitEntries, PACL OldAcl, PACL *NewAcl); - -/* - * This errhandlingapi function is available in kernel32.dll library on - * Windows 7 and higher systems. - */ -typedef BOOL (WINAPI *SetThreadErrorModeProt)(DWORD dwNewMode, LPDWORD lpOldMode); - /* * Unfortunately NtSetInformationProcess() function, ProcessUserModeIOPL * constant and all other helpers for its usage are not specified in any @@ -205,858 +93,46 @@ typedef BOOL (WINAPI *SetThreadErrorModeProt)(DWORD dwNewMode, LPDWORD lpOldMode #define ProcessUserModeIOPL 16 #endif typedef NTSTATUS (NTAPI *NtSetInformationProcessProt)(HANDLE ProcessHandle, PROCESSINFOCLASS ProcessInformationClass, PVOID ProcessInformation, ULONG ProcessInformationLength); +typedef ULONG (NTAPI *RtlNtStatusToDosErrorProt)(NTSTATUS Status); /* - * Check if the current thread has particular privilege in current active access - * token. Case when it not possible to determinate it (e.g. current thread does - * not have permission to open its own current active access token) is evaluated - * as thread does not have that privilege. - */ -static BOOL -have_privilege(LUID luid_privilege) -{ - PRIVILEGE_SET priv; - HANDLE token; - BOOL ret; - - /* - * If the current thread does not have active access token then thread - * uses primary process access token for all permission checks. - */ - if (!OpenThreadToken(GetCurrentThread(), TOKEN_QUERY, TRUE, &token) && - (GetLastError() != ERROR_NO_TOKEN || - !OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token))) - return FALSE; - - priv.PrivilegeCount = 1; - priv.Control = PRIVILEGE_SET_ALL_NECESSARY; - priv.Privilege[0].Luid = luid_privilege; - priv.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED; - - if (!PrivilegeCheck(token, &priv, &ret)) - return FALSE; - - return ret; -} - -/* - * Enable or disable particular privilege in specified access token. + * ProcessUserModeIOPL is syscall for NT kernel to change x86 IOPL + * of the current running process to 3. * - * Note that it is not possible to disable privilege in access token with - * SE_PRIVILEGE_ENABLED_BY_DEFAULT attribute. This function does not check - * this case and incorrectly returns no error even when disabling failed. - * Rationale for this decision: Simplification of this function as WinAPI - * call AdjustTokenPrivileges() does not signal error in this case too. - */ -static BOOL -set_privilege(HANDLE token, LUID luid_privilege, BOOL enable) -{ - TOKEN_PRIVILEGES token_privileges; - - token_privileges.PrivilegeCount = 1; - token_privileges.Privileges[0].Luid = luid_privilege; - token_privileges.Privileges[0].Attributes = enable ? SE_PRIVILEGE_ENABLED : 0; - - /* - * WinAPI function AdjustTokenPrivileges() success also when not all - * privileges were enabled. It is always required to check for failure - * via GetLastError() call. AdjustTokenPrivileges() always sets error - * also when it success, as opposite to other WinAPI functions. - */ - if (!AdjustTokenPrivileges(token, FALSE, &token_privileges, sizeof(token_privileges), NULL, NULL) || - GetLastError() != ERROR_SUCCESS) - return FALSE; - - return TRUE; -} - -/* - * Change access token for the current thread to new specified access token. - * Previously active access token is stored in old_token variable and can be - * used for reverting to this access token. It is set to NULL if the current - * thread previously used primary process access token. - */ -static BOOL -change_token(HANDLE new_token, HANDLE *old_token) -{ - HANDLE token; - - if (!OpenThreadToken(GetCurrentThread(), TOKEN_IMPERSONATE, TRUE, &token)) - { - if (GetLastError() != ERROR_NO_TOKEN) - return FALSE; - token = NULL; - } - - if (!ImpersonateLoggedOnUser(new_token)) - { - if (token) - CloseHandle(token); - return FALSE; - } - - *old_token = token; - return TRUE; -} - -/* - * Change access token for the current thread to the primary process access - * token. This function fails also when the current thread already uses primary - * process access token. - */ -static BOOL -change_token_to_primary(HANDLE *old_token) -{ - HANDLE token; - - if (!OpenThreadToken(GetCurrentThread(), TOKEN_IMPERSONATE, TRUE, &token)) - return FALSE; - - RevertToSelf(); - - *old_token = token; - return TRUE; -} - -/* - * Revert to the specified access token for the current thread. When access - * token is specified as NULL then revert to the primary process access token. - * Use to revert after change_token() or change_token_to_primary() call. - */ -static VOID -revert_to_token(HANDLE token) -{ - /* - * If SetThreadToken() call fails then there is no option to revert to - * the specified previous thread access token. So in this case revert to - * the primary process access token. - */ - if (!token || !SetThreadToken(NULL, token)) - RevertToSelf(); - if (token) - CloseHandle(token); -} - -/* - * Enable particular privilege for the current thread. And set method how to - * revert this privilege (if to revert whole token or only privilege). - */ -static BOOL -enable_privilege(LUID luid_privilege, HANDLE *revert_token, BOOL *revert_only_privilege) -{ - HANDLE thread_token; - HANDLE new_token; - - if (OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, TRUE, &thread_token)) - { - if (set_privilege(thread_token, luid_privilege, TRUE)) - { - /* - * Indicate that correct revert method is just to - * disable privilege in access token. - */ - if (revert_token && revert_only_privilege) - { - *revert_token = thread_token; - *revert_only_privilege = TRUE; - } - else - { - CloseHandle(thread_token); - } - return TRUE; - } - CloseHandle(thread_token); - /* - * If enabling privilege failed then try to enable it via - * primary process access token. - */ - } - - /* - * If the current thread has already active thread access token then - * open it with just impersonate right as it would be used only for - * future revert. - */ - if (revert_token && revert_only_privilege) - { - if (!OpenThreadToken(GetCurrentThread(), TOKEN_IMPERSONATE, TRUE, &thread_token)) - { - if (GetLastError() != ERROR_NO_TOKEN) - return FALSE; - thread_token = NULL; - } - - /* - * If current thread has no access token (and uses primary - * process access token) or it does not have permission to - * adjust privileges or it does not have specified privilege - * then create a copy of the primary process access token, - * assign it for the current thread (= impersonate self) - * and then try adjusting privilege again. - */ - if (!ImpersonateSelf(SecurityImpersonation)) - { - if (thread_token) - CloseHandle(thread_token); - return FALSE; - } - } - - if (!OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, TRUE, &new_token)) - { - /* thread_token is set only when we were asked for revert method. */ - if (revert_token && revert_only_privilege) - revert_to_token(thread_token); - return FALSE; - } - - if (!set_privilege(new_token, luid_privilege, TRUE)) - { - CloseHandle(new_token); - /* thread_token is set only when we were asked for revert method. */ - if (revert_token && revert_only_privilege) - revert_to_token(thread_token); - return FALSE; - } - - /* - * Indicate that correct revert method is to change to the previous - * access token. Either to the primary process access token or to the - * previous thread access token. - */ - if (revert_token && revert_only_privilege) - { - *revert_token = thread_token; - *revert_only_privilege = FALSE; - } - return TRUE; -} - -/* - * Revert particular privilege for the current thread was previously enabled by - * enable_privilege() call. Either disable privilege in specified access token - * or revert to previous access token. - */ -static VOID -revert_privilege(LUID luid_privilege, HANDLE revert_token, BOOL revert_only_privilege) -{ - if (revert_only_privilege) - { - set_privilege(revert_token, luid_privilege, FALSE); - CloseHandle(revert_token); - } - else - { - revert_to_token(revert_token); - } -} - -/* - * Return owner of the access token used by the current thread. Buffer for - * returned owner needs to be released by LocalFree() call. - */ -static TOKEN_OWNER * -get_current_token_owner(VOID) -{ - HANDLE token; - DWORD length; - TOKEN_OWNER *owner; - - /* - * If the current thread does not have active access token then thread - * uses primary process access token for all permission checks. - */ - if (!OpenThreadToken(GetCurrentThread(), TOKEN_QUERY, TRUE, &token) && - (GetLastError() != ERROR_NO_TOKEN || - !OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token))) - return NULL; - - if (!GetTokenInformation(token, TokenOwner, NULL, 0, &length) && - GetLastError() != ERROR_INSUFFICIENT_BUFFER) - { - CloseHandle(token); - return NULL; - } - -retry: - owner = (TOKEN_OWNER *)LocalAlloc(LPTR, length); - if (!owner) - { - CloseHandle(token); - return NULL; - } - - if (!GetTokenInformation(token, TokenOwner, owner, length, &length)) - { - /* - * Length of token owner (SID) buffer between two get calls may - * changes (e.g. by another thread of process), so retry. - */ - if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) - { - LocalFree(owner); - goto retry; - } - LocalFree(owner); - CloseHandle(token); - return NULL; - } - - CloseHandle(token); - return owner; -} - -/* - * Grant particular permissions in the primary access token of the specified - * process for the owner of current thread token and set old DACL of the - * process access token for reverting permissions. Security descriptor is - * just memory buffer for old DACL. + * Process handle argument for ProcessUserModeIOPL is ignored and + * IOPL is always changed for the current running process. So pass + * GetCurrentProcess() handle for documentation purpose. Process + * information buffer and length are unused for ProcessUserModeIOPL. + * + * ProcessUserModeIOPL may success (return value >= 0) or may fail + * because it is not implemented or because of missing privilege. + * Other errors are not defined, so handle them as unknown. */ static BOOL -grant_process_token_dacl_permissions(HANDLE process, DWORD permissions, HANDLE *token, PACL *old_dacl, PSECURITY_DESCRIPTOR *security_descriptor) +SetProcessUserModeIOPLFunc(LPVOID Arg) { - GetSecurityInfoProt MyGetSecurityInfo; - SetSecurityInfoProt MySetSecurityInfo; - SetEntriesInAclProt MySetEntriesInAcl; - EXPLICIT_ACCESS explicit_access; - TOKEN_OWNER *owner; - HMODULE advapi32; - PACL new_dacl; - - /* - * This source file already uses advapi32.dll library, so it is - * linked to executable and automatically loaded when starting - * current running process. - */ - advapi32 = GetModuleHandle(TEXT("advapi32.dll")); - if (!advapi32) - return FALSE; - - /* - * It does not matter if SetEntriesInAclA() or SetEntriesInAclW() is - * called as no string is passed to SetEntriesInAcl function. - */ - MyGetSecurityInfo = (GetSecurityInfoProt)(LPVOID)GetProcAddress(advapi32, "GetSecurityInfo"); - MySetSecurityInfo = (SetSecurityInfoProt)(LPVOID)GetProcAddress(advapi32, "SetSecurityInfo"); - MySetEntriesInAcl = (SetEntriesInAclProt)(LPVOID)GetProcAddress(advapi32, "SetEntriesInAclA"); - if (!MyGetSecurityInfo || !MySetSecurityInfo || !MySetEntriesInAcl) - return FALSE; - - owner = get_current_token_owner(); - if (!owner) - return FALSE; - - /* - * READ_CONTROL is required for GetSecurityInfo(DACL_SECURITY_INFORMATION) - * and WRITE_DAC is required for SetSecurityInfo(DACL_SECURITY_INFORMATION). - */ - if (!OpenProcessToken(process, READ_CONTROL | WRITE_DAC, token)) - { - LocalFree(owner); - return FALSE; - } - - if (MyGetSecurityInfo(*token, SE_KERNEL_OBJECT, DACL_SECURITY_INFORMATION, NULL, NULL, old_dacl, NULL, security_descriptor) != ERROR_SUCCESS) - { - LocalFree(owner); - CloseHandle(*token); - return FALSE; - } - - /* - * Set new explicit access for the owner of the current thread access - * token with non-inherited granting access to specified permissions. - */ - explicit_access.grfAccessPermissions = permissions; - explicit_access.grfAccessMode = GRANT_ACCESS; - explicit_access.grfInheritance = NO_PROPAGATE_INHERIT_ACE; - explicit_access.Trustee.pMultipleTrustee = NULL; - explicit_access.Trustee.MultipleTrusteeOperation = NO_MULTIPLE_TRUSTEE; - explicit_access.Trustee.TrusteeForm = TRUSTEE_IS_SID; - explicit_access.Trustee.TrusteeType = TRUSTEE_IS_USER; - /* - * Unfortunately i586-mingw32msvc toolchain does not have pSid pointer - * member in Trustee union. So assign owner SID to ptstrName pointer - * member which aliases with pSid pointer member in the same union. - */ - explicit_access.Trustee.ptstrName = (PVOID)owner->Owner; - - if (MySetEntriesInAcl(1, &explicit_access, *old_dacl, &new_dacl) != ERROR_SUCCESS) - { - LocalFree(*security_descriptor); - LocalFree(owner); - CloseHandle(*token); - return FALSE; - } - - if (MySetSecurityInfo(*token, SE_KERNEL_OBJECT, DACL_SECURITY_INFORMATION, NULL, NULL, new_dacl, NULL) != ERROR_SUCCESS) - { - LocalFree(new_dacl); - LocalFree(*security_descriptor); - LocalFree(owner); - CloseHandle(*token); - return FALSE; - } - - LocalFree(new_dacl); - LocalFree(owner); - return TRUE; -} - -/* - * Revert particular granted permissions in specified access token done by - * grant_process_token_dacl_permissions() call. - */ -static VOID -revert_token_dacl_permissions(HANDLE token, PACL old_dacl, PSECURITY_DESCRIPTOR security_descriptor) -{ - SetSecurityInfoProt MySetSecurityInfo; - HMODULE advapi32; - - /* - * This source file already uses advapi32.dll library, so it is - * linked to executable and automatically loaded when starting - * current running process. - */ - advapi32 = GetModuleHandle(TEXT("advapi32.dll")); - if (advapi32) - { - MySetSecurityInfo = (SetSecurityInfoProt)(LPVOID)GetProcAddress(advapi32, "SetSecurityInfo"); - MySetSecurityInfo(token, SE_KERNEL_OBJECT, DACL_SECURITY_INFORMATION, NULL, NULL, old_dacl, NULL); - } - - LocalFree(security_descriptor); - CloseHandle(token); -} - -/* - * Change error mode of the current thread. If it is not possible then change - * error mode of the whole process. Always returns previous error mode. - */ -static UINT -change_error_mode(UINT new_mode) -{ - SetThreadErrorModeProt MySetThreadErrorMode = NULL; - HMODULE kernel32; - DWORD old_mode; - - /* - * Function SetThreadErrorMode() was introduced in Windows 7, so use - * GetProcAddress() for compatibility with older systems. - */ - kernel32 = GetModuleHandle(TEXT("kernel32.dll")); - if (kernel32) - MySetThreadErrorMode = (SetThreadErrorModeProt)(LPVOID)GetProcAddress(kernel32, "SetThreadErrorMode"); - - if (MySetThreadErrorMode && - MySetThreadErrorMode(new_mode, &old_mode)) - return old_mode; - - /* - * Fallback to function SetErrorMode() which modifies error mode of the - * whole process and returns old mode. - */ - return SetErrorMode(new_mode); -} - -/* - * Open process handle specified by the process id with the query right and - * optionally also with vm read right. - */ -static HANDLE -open_process_for_query(DWORD pid, BOOL with_vm_read) -{ - BOOL revert_only_privilege; - LUID luid_debug_privilege; - OSVERSIONINFO version; - DWORD process_right; - HANDLE revert_token; - HANDLE process; + RtlNtStatusToDosErrorProt RtlNtStatusToDosErrorPtr = (RtlNtStatusToDosErrorProt)(((LPVOID *)Arg)[1]); + NtSetInformationProcessProt NtSetInformationProcessPtr = (NtSetInformationProcessProt)(((LPVOID *)Arg)[0]); + NTSTATUS nt_status = NtSetInformationProcessPtr(GetCurrentProcess(), ProcessUserModeIOPL, NULL, 0); + if (nt_status >= 0) + return TRUE; /* - * Some processes on Windows Vista and higher systems can be opened only - * with PROCESS_QUERY_LIMITED_INFORMATION right. This right is enough - * for accessing primary process token. But this right is not supported - * on older pre-Vista systems. When the current thread on these older - * systems does not have Debug privilege then OpenProcess() fails with - * ERROR_ACCESS_DENIED. If the current thread has Debug privilege then - * OpenProcess() success and returns handle to requested process. - * Problem is that this handle does not have PROCESS_QUERY_INFORMATION - * right and so cannot be used for accessing primary process token - * on those older systems. Moreover it has zero rights and therefore - * such handle is fully useless. So never try to use open process with - * PROCESS_QUERY_LIMITED_INFORMATION right on older systems than - * Windows Vista (NT 6.0). + * If we have optional RtlNtStatusToDosError() function then use it for + * translating NT status to Win32 error. If we do not have it then translate + * two important status codes which we use later STATUS_NOT_IMPLEMENTED and + * STATUS_PRIVILEGE_NOT_HELD. */ - version.dwOSVersionInfoSize = sizeof(version); - if (GetVersionEx(&version) && - version.dwPlatformId == VER_PLATFORM_WIN32_NT && - version.dwMajorVersion >= 6) - process_right = PROCESS_QUERY_LIMITED_INFORMATION; + if (RtlNtStatusToDosErrorPtr) + SetLastError(RtlNtStatusToDosErrorPtr(nt_status)); + else if (nt_status == STATUS_NOT_IMPLEMENTED) + SetLastError(ERROR_INVALID_FUNCTION); + else if (nt_status == STATUS_PRIVILEGE_NOT_HELD) + SetLastError(ERROR_PRIVILEGE_NOT_HELD); else - process_right = PROCESS_QUERY_INFORMATION; - - if (with_vm_read) - process_right |= PROCESS_VM_READ; - - process = OpenProcess(process_right, FALSE, pid); - if (process) - return process; - - /* - * It is possible to open only processes to which owner of the current - * thread access token has permissions. For opening other processing it - * is required to have Debug privilege enabled. By default local - * administrators have this privilege, but it is disabled. So try to - * enable it and then try to open process again. - */ - - if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &luid_debug_privilege)) - return NULL; - - if (!enable_privilege(luid_debug_privilege, &revert_token, &revert_only_privilege)) - return NULL; - - process = OpenProcess(process_right, FALSE, pid); - - revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); + SetLastError(ERROR_GEN_FAILURE); - return process; -} - -/* - * Check if process image path name (wide string) matches exe file name - * (7-bit ASCII string). Do case-insensitive string comparison. Process - * image path name can be in any namespace format (DOS, Win32, UNC, ...). - */ -static BOOL -check_process_name(LPCWSTR path, DWORD path_length, LPCSTR exe_file) -{ - DWORD exe_file_length; - WCHAR c1; - UCHAR c2; - DWORD i; - - exe_file_length = 0; - while (exe_file[exe_file_length] != '\0') - exe_file_length++; - - /* Path must have backslash before exe file name. */ - if (exe_file_length >= path_length || - path[path_length-exe_file_length-1] != L'\\') - return FALSE; - - for (i = 0; i < exe_file_length; i++) - { - c1 = path[path_length-exe_file_length+i]; - c2 = exe_file[i]; - /* - * Input string for comparison is 7-bit ASCII and file name part - * of path must not contain backslash as it is path separator. - */ - if (c1 >= 0x80 || c2 >= 0x80 || c1 == L'\\') - return FALSE; - if (c1 >= L'a' && c1 <= L'z') - c1 -= L'a' - L'A'; - if (c2 >= 'a' && c2 <= 'z') - c2 -= 'a' - 'A'; - if (c1 != c2) - return FALSE; - } - - return TRUE; -} - -/* Open process handle with the query right specified by process exe file. */ -static HANDLE -find_and_open_process_for_query(LPCSTR exe_file) -{ - GetProcessImageFileNameWProt MyGetProcessImageFileNameW; - GetModuleFileNameExWProt MyGetModuleFileNameExW; - EnumProcessesProt MyEnumProcesses; - HMODULE kernel32, psapi; - UINT prev_error_mode; - DWORD partial_retry; - BOOL found_process; - DWORD size, length; - DWORD *processes; - HANDLE process; - LPWSTR path; - DWORD error; - DWORD count; - DWORD i; - - psapi = NULL; - kernel32 = GetModuleHandle(TEXT("kernel32.dll")); - if (!kernel32) - return NULL; - - /* - * On Windows 7 and higher systems these functions are available in - * kernel32.dll library with K32 prefix. - */ - MyGetModuleFileNameExW = NULL; - MyGetProcessImageFileNameW = (GetProcessImageFileNameWProt)(LPVOID)GetProcAddress(kernel32, "K32GetProcessImageFileNameW"); - MyEnumProcesses = (EnumProcessesProt)(LPVOID)GetProcAddress(kernel32, "K32EnumProcesses"); - if (!MyGetProcessImageFileNameW || !MyEnumProcesses) - { - /* - * On older NT-based systems these functions are available in - * psapi.dll library without K32 prefix. - */ - prev_error_mode = change_error_mode(SEM_FAILCRITICALERRORS); - psapi = LoadLibrary(TEXT("psapi.dll")); - change_error_mode(prev_error_mode); - - if (!psapi) - return NULL; - - /* - * Function GetProcessImageFileNameW() is available in - * Windows XP and higher systems. On older versions is - * available function GetModuleFileNameExW(). - */ - MyGetProcessImageFileNameW = (GetProcessImageFileNameWProt)(LPVOID)GetProcAddress(psapi, "GetProcessImageFileNameW"); - MyGetModuleFileNameExW = (GetModuleFileNameExWProt)(LPVOID)GetProcAddress(psapi, "GetModuleFileNameExW"); - MyEnumProcesses = (EnumProcessesProt)(LPVOID)GetProcAddress(psapi, "EnumProcesses"); - if ((!MyGetProcessImageFileNameW && !MyGetModuleFileNameExW) || !MyEnumProcesses) - { - FreeLibrary(psapi); - return NULL; - } - } - - /* Make initial buffer size for 1024 processes. */ - size = 1024 * sizeof(*processes); - -retry: - processes = (DWORD *)LocalAlloc(LPTR, size); - if (!processes) - { - if (psapi) - FreeLibrary(psapi); - return NULL; - } - - if (!MyEnumProcesses(processes, size, &length)) - { - LocalFree(processes); - if (psapi) - FreeLibrary(psapi); - return NULL; - } - else if (size == length) - { - /* - * There is no indication given when the buffer is too small to - * store all process identifiers. Therefore if returned length - * is same as buffer size there can be more processes. Call - * again with larger buffer. - */ - LocalFree(processes); - size *= 2; - goto retry; - } - - process = NULL; - count = length / sizeof(*processes); - - for (i = 0; i < count; i++) - { - /* Skip System Idle Process. */ - if (processes[i] == 0) - continue; - - /* - * Function GetModuleFileNameExW() requires additional - * PROCESS_VM_READ right as opposite to function - * GetProcessImageFileNameW() which does not need it. - */ - process = open_process_for_query(processes[i], MyGetProcessImageFileNameW ? FALSE : TRUE); - if (!process) - continue; - - /* - * Set initial buffer size to 256 (wide) characters. - * Final path length on the modern NT-based systems can be also larger. - */ - size = 256; - found_process = FALSE; - partial_retry = 0; - -retry_path: - path = (LPWSTR)LocalAlloc(LPTR, size * sizeof(*path)); - if (!path) - goto end_path; - - if (MyGetProcessImageFileNameW) - length = MyGetProcessImageFileNameW(process, path, size); - else - length = MyGetModuleFileNameExW(process, NULL, path, size); - - error = GetLastError(); - - /* - * GetModuleFileNameEx() returns zero and signal error ERROR_PARTIAL_COPY - * when remote process is in the middle of updating its module table. - * Sleep 10 ms and try again, max 10 attempts. - */ - if (!MyGetProcessImageFileNameW) - { - if (length == 0 && error == ERROR_PARTIAL_COPY && partial_retry++ < 10) - { - Sleep(10); - goto retry_path; - } - partial_retry = 0; - } - - /* - * When buffer is too small then function GetModuleFileNameEx() returns - * its size argument on older systems (Windows XP) or its size minus - * argument one on new systems (Windows 10) without signalling any error. - * Function GetProcessImageFileNameW() on the other hand returns zero - * value and signals error ERROR_INSUFFICIENT_BUFFER. So in all these - * cases call function again with larger buffer. - */ - - if (MyGetProcessImageFileNameW && length == 0 && error != ERROR_INSUFFICIENT_BUFFER) - goto end_path; - - if ((MyGetProcessImageFileNameW && length == 0) || - (!MyGetProcessImageFileNameW && (length == size || length == size-1))) - { - LocalFree(path); - size *= 2; - goto retry_path; - } - - if (length && check_process_name(path, length, exe_file)) - found_process = TRUE; - -end_path: - if (path) - { - LocalFree(path); - path = NULL; - } - - if (found_process) - break; - - CloseHandle(process); - process = NULL; - } - - LocalFree(processes); - - if (psapi) - FreeLibrary(psapi); - - return process; -} - -/* - * Try to open primary access token of the particular process with specified - * rights. Before opening access token try to adjust DACL permissions of the - * primary process access token, so following open does not fail on error - * related to no open permissions. Revert DACL permissions after open attempt. - * As following steps are not atomic, try to execute them more times in case - * of possible race conditions caused by other threads or processes. - */ -static HANDLE -try_grant_permissions_and_open_process_token(HANDLE process, DWORD rights) -{ - PSECURITY_DESCRIPTOR security_descriptor; - HANDLE grant_token; - PACL old_dacl; - HANDLE token; - DWORD retry; - DWORD error; - - /* - * This code is not atomic. Between grant and open calls can other - * thread or process change or revert permissions. So try to execute - * it more times. - */ - for (retry = 0; retry < 10; retry++) - { - if (!grant_process_token_dacl_permissions(process, rights, &grant_token, &old_dacl, &security_descriptor)) - return NULL; - if (!OpenProcessToken(process, rights, &token)) - { - token = NULL; - error = GetLastError(); - } - revert_token_dacl_permissions(grant_token, old_dacl, security_descriptor); - if (token) - return token; - else if (error != ERROR_ACCESS_DENIED) - return NULL; - } - - return NULL; -} - -/* - * Open primary access token of particular process handle with specified rights. - * If permissions for specified rights are missing then try to grant them. - */ -static HANDLE -open_process_token_with_rights(HANDLE process, DWORD rights) -{ - HANDLE old_token; - HANDLE token; - - /* First try to open primary access token of process handle directly. */ - if (OpenProcessToken(process, rights, &token)) - return token; - - /* - * If opening failed then it means that owner of the current thread - * access token does not have permission for it. Try it again with - * primary process access token. - */ - if (change_token_to_primary(&old_token)) - { - if (!OpenProcessToken(process, rights, &token)) - token = NULL; - revert_to_token(old_token); - if (token) - return token; - } - - /* - * If opening is still failing then try to grant specified permissions - * for the current thread and try to open it again. - */ - token = try_grant_permissions_and_open_process_token(process, rights); - if (token) - return token; - - /* - * And if it is still failing then try it again with granting - * permissions for the primary process token of the current process. - */ - if (change_token_to_primary(&old_token)) - { - token = try_grant_permissions_and_open_process_token(process, rights); - revert_to_token(old_token); - if (token) - return token; - } - - /* - * TODO: Sorry, no other option for now... - * It could be possible to use Take Ownership Name privilege to - * temporary change token owner of specified process to the owner of - * the current thread token, grant permissions for current thread in - * that process token, change ownership back to original one, open - * that process token and revert granted permissions. But this is - * not implemented yet. - */ - return NULL; + return FALSE; } /* @@ -1068,226 +144,45 @@ open_process_token_with_rights(HANDLE process, DWORD rights) static BOOL SetProcessUserModeIOPL(VOID) { - NtSetInformationProcessProt MyNtSetInformationProcess; - - LUID luid_tcb_privilege; - LUID luid_impersonate_privilege; - - HANDLE revert_token_tcb_privilege; - BOOL revert_only_tcb_privilege; - - HANDLE revert_token_impersonate_privilege; - BOOL revert_only_impersonate_privilege; - - BOOL impersonate_privilege_enabled; - - BOOL revert_to_old_token; - HANDLE old_token; - - HANDLE lsass_process; - HANDLE lsass_token; - + LPVOID Arg[2]; UINT prev_error_mode; - NTSTATUS nt_status; HMODULE ntdll; BOOL ret; - impersonate_privilege_enabled = FALSE; - revert_to_old_token = FALSE; - lsass_token = NULL; - old_token = NULL; - - /* Fast path when ProcessUserModeIOPL was already called. */ - if (read_iopl() == 3) - return TRUE; - /* * Load ntdll.dll library with disabled critical-error-handler message box. * It means that NT kernel does not show unwanted GUI message box to user * when LoadLibrary() function fails. */ - prev_error_mode = change_error_mode(SEM_FAILCRITICALERRORS); + prev_error_mode = win32_change_error_mode(SEM_FAILCRITICALERRORS); ntdll = LoadLibrary(TEXT("ntdll.dll")); - change_error_mode(prev_error_mode); + win32_change_error_mode(prev_error_mode); if (!ntdll) - goto err_not_implemented; - - /* Retrieve pointer to NtSetInformationProcess() function. */ - MyNtSetInformationProcess = (NtSetInformationProcessProt)(LPVOID)GetProcAddress(ntdll, "NtSetInformationProcess"); - if (!MyNtSetInformationProcess) - goto err_not_implemented; - - /* - * ProcessUserModeIOPL is syscall for NT kernel to change x86 IOPL - * of the current running process to 3. - * - * Process handle argument for ProcessUserModeIOPL is ignored and - * IOPL is always changed for the current running process. So pass - * GetCurrentProcess() handle for documentation purpose. Process - * information buffer and length are unused for ProcessUserModeIOPL. - * - * ProcessUserModeIOPL may success (return value >= 0) or may fail - * because it is not implemented or because of missing privilege. - * Other errors are not defined, so handle them as unknown. - */ - nt_status = MyNtSetInformationProcess(GetCurrentProcess(), ProcessUserModeIOPL, NULL, 0); - if (nt_status >= 0) - goto verify; - else if (nt_status == STATUS_NOT_IMPLEMENTED) - goto err_not_implemented; - else if (nt_status != STATUS_PRIVILEGE_NOT_HELD) - goto err_unknown; - - /* - * If ProcessUserModeIOPL call failed with STATUS_PRIVILEGE_NOT_HELD - * error then it means that the current thread token does not have - * Tcb privilege enabled. Try to enable it. - */ - - if (!LookupPrivilegeValue(NULL, SE_TCB_NAME, &luid_tcb_privilege)) - goto err_privilege_not_held; - - /* - * If the current thread has already Tcb privilege enabled then there - * is some additional unhanded restriction. - */ - if (have_privilege(luid_tcb_privilege)) - goto err_privilege_not_held; - - /* Try to enable Tcb privilege and try ProcessUserModeIOPL call again. */ - if (enable_privilege(luid_tcb_privilege, &revert_token_tcb_privilege, &revert_only_tcb_privilege)) { - nt_status = MyNtSetInformationProcess(GetCurrentProcess(), ProcessUserModeIOPL, NULL, 0); - revert_privilege(luid_tcb_privilege, revert_token_tcb_privilege, revert_only_tcb_privilege); - if (nt_status >= 0) - goto verify; - else if (nt_status == STATUS_NOT_IMPLEMENTED) - goto err_not_implemented; - else if (nt_status == STATUS_PRIVILEGE_NOT_HELD) - goto err_privilege_not_held; - else - goto err_unknown; + SetLastError(ERROR_INVALID_FUNCTION); + return FALSE; } - /* - * If enabling of Tcb privilege failed then it means that current thread - * does not this privilege. But current process may have it. So try it - * again with primary process access token. - */ - - /* - * If system supports Impersonate privilege (Windows 2000 SP4 or higher) then - * all future actions in this function require this Impersonate privilege. - * So try to enable it in case it is currently disabled. - */ - if (LookupPrivilegeValue(NULL, SE_IMPERSONATE_NAME, &luid_impersonate_privilege) && - !have_privilege(luid_impersonate_privilege)) + /* Retrieve pointer to NtSetInformationProcess() function. */ + Arg[0] = (LPVOID)GetProcAddress(ntdll, "NtSetInformationProcess"); + if (!Arg[0]) { - /* - * If current thread does not have Impersonate privilege enabled - * then first try to enable it just for the current thread. If - * it is not possible to enable it just for the current thread - * then try it to enable globally for whole process (which - * affects all process threads). Both actions will be reverted - * at the end of this function. - */ - if (enable_privilege(luid_impersonate_privilege, &revert_token_impersonate_privilege, &revert_only_impersonate_privilege)) - { - impersonate_privilege_enabled = TRUE; - } - else if (enable_privilege(luid_impersonate_privilege, NULL, NULL)) - { - impersonate_privilege_enabled = TRUE; - revert_token_impersonate_privilege = NULL; - revert_only_impersonate_privilege = TRUE; - } - else - { - goto err_privilege_not_held; - } - - /* - * Now when Impersonate privilege is enabled, try to enable Tcb - * privilege again. Enabling other privileges for the current - * thread requires Impersonate privilege, so enabling Tcb again - * could now pass. - */ - if (enable_privilege(luid_tcb_privilege, &revert_token_tcb_privilege, &revert_only_tcb_privilege)) - { - nt_status = MyNtSetInformationProcess(GetCurrentProcess(), ProcessUserModeIOPL, NULL, 0); - revert_privilege(luid_tcb_privilege, revert_token_tcb_privilege, revert_only_tcb_privilege); - if (nt_status >= 0) - goto verify; - else if (nt_status == STATUS_NOT_IMPLEMENTED) - goto err_not_implemented; - else if (nt_status == STATUS_PRIVILEGE_NOT_HELD) - goto err_privilege_not_held; - else - goto err_unknown; - } + FreeLibrary(ntdll); + SetLastError(ERROR_INVALID_FUNCTION); + return FALSE; } - /* - * If enabling Tcb privilege failed then it means that the current - * thread access token does not have this privilege or does not - * have permission to adjust privileges. - * - * Try to use more privileged token from Local Security Authority - * Subsystem Service process (lsass.exe) which has Tcb privilege. - * Retrieving this more privileged token is possible for local - * administrators (unless it was disabled by local administrators). - */ - - lsass_process = find_and_open_process_for_query("lsass.exe"); - if (!lsass_process) - goto err_privilege_not_held; + /* Retrieve pointer to optional RtlNtStatusToDosError() function, it may be NULL. */ + Arg[1] = (LPVOID)GetProcAddress(ntdll, "RtlNtStatusToDosError"); - /* - * Open primary lsass.exe process access token with query and duplicate - * rights. Just these two rights are required for impersonating other - * primary process token (impersonate right is really not required!). - */ - lsass_token = open_process_token_with_rights(lsass_process, TOKEN_QUERY | TOKEN_DUPLICATE); - - CloseHandle(lsass_process); - - if (!lsass_token) - goto err_privilege_not_held; - - /* - * After successful open of the primary lsass.exe process access token, - * assign its copy for the current thread. - */ - if (!change_token(lsass_token, &old_token)) - goto err_privilege_not_held; + /* Call ProcessUserModeIOPL with Tcb privilege. */ + ret = win32_call_func_with_tcb_privilege(SetProcessUserModeIOPLFunc, (LPVOID)&Arg); - revert_to_old_token = TRUE; + FreeLibrary(ntdll); - nt_status = MyNtSetInformationProcess(GetCurrentProcess(), ProcessUserModeIOPL, NULL, 0); - if (nt_status == STATUS_PRIVILEGE_NOT_HELD) - { - /* - * Now current thread is not using primary process token anymore - * but is using custom access token. There is no need to revert - * enabled Tcb privilege as the whole custom access token would - * be reverted. So there is no need to setup revert method for - * enabling privilege. - */ - if (have_privilege(luid_tcb_privilege) || - !enable_privilege(luid_tcb_privilege, NULL, NULL)) - goto err_privilege_not_held; - nt_status = MyNtSetInformationProcess(GetCurrentProcess(), ProcessUserModeIOPL, NULL, 0); - } - if (nt_status >= 0) - goto verify; - else if (nt_status == STATUS_NOT_IMPLEMENTED) - goto err_not_implemented; - else if (nt_status == STATUS_PRIVILEGE_NOT_HELD) - goto err_privilege_not_held; - else - goto err_unknown; + if (!ret) + return FALSE; -verify: /* * Some Windows NT kernel versions (e.g. Windows 2003 x64) do not * implement ProcessUserModeIOPL syscall at all but incorrectly @@ -1295,39 +190,12 @@ verify: * after this call verify that IOPL is set to 3. */ if (read_iopl() != 3) - goto err_not_implemented; - ret = TRUE; - goto ret; - -err_not_implemented: - SetLastError(ERROR_INVALID_FUNCTION); - ret = FALSE; - goto ret; - -err_privilege_not_held: - SetLastError(ERROR_PRIVILEGE_NOT_HELD); - ret = FALSE; - goto ret; - -err_unknown: - SetLastError(ERROR_GEN_FAILURE); - ret = FALSE; - goto ret; - -ret: - if (revert_to_old_token) - revert_to_token(old_token); - - if (impersonate_privilege_enabled) - revert_privilege(luid_impersonate_privilege, revert_token_impersonate_privilege, revert_only_impersonate_privilege); - - if (lsass_token) - CloseHandle(lsass_token); - - if (ntdll) - FreeLibrary(ntdll); + { + SetLastError(ERROR_INVALID_FUNCTION); + return FALSE; + } - return ret; + return TRUE; } static int @@ -1335,20 +203,25 @@ intel_setup_io(struct pci_access *a) { #ifndef _WIN64 /* 16/32-bit non-NT systems allow applications to access PCI I/O ports without any special setup. */ - OSVERSIONINFOA version; - version.dwOSVersionInfoSize = sizeof(version); - if (GetVersionExA(&version) && version.dwPlatformId < VER_PLATFORM_WIN32_NT) + if (win32_is_non_nt_system()) { a->debug("Detected 16/32-bit non-NT system, skipping NT setup..."); return 1; } #endif + /* Check if we have I/O permission */ + if (read_iopl() == 3) + { + a->debug("IOPL is already set to 3, skipping NT setup..."); + return 1; + } + /* On NT-based systems issue ProcessUserModeIOPL syscall which changes IOPL to 3. */ if (!SetProcessUserModeIOPL()) { DWORD error = GetLastError(); - a->debug("NT ProcessUserModeIOPL call failed: %s.", error == ERROR_INVALID_FUNCTION ? "Not Implemented" : error == ERROR_PRIVILEGE_NOT_HELD ? "Access Denied" : "Operation Failed"); + a->debug("NT ProcessUserModeIOPL call failed: %s.", error == ERROR_INVALID_FUNCTION ? "Call is not supported" : win32_strerror(error)); return 0; } diff --git a/lib/i386-ports.c b/lib/i386-ports.c index 1e2c402..5f8aea4 100644 --- a/lib/i386-ports.c +++ b/lib/i386-ports.c @@ -30,6 +30,8 @@ #include "i386-io-beos.h" #elif defined(PCI_OS_DJGPP) #include "i386-io-djgpp.h" +#elif defined(PCI_OS_OPENBSD) +#include "i386-io-openbsd.h" #else #error Do not know how to access I/O ports on this OS. #endif @@ -116,12 +118,12 @@ conf1_detect(struct pci_access *a) } intel_io_lock(); - outb (0x01, 0xCFB); - tmp = inl (0xCF8); - outl (0x80000000, 0xCF8); - if (inl (0xCF8) == 0x80000000) + intel_outb (0x01, 0xCFB); + tmp = intel_inl (0xCF8); + intel_outl (0x80000000, 0xCF8); + if (intel_inl (0xCF8) == 0x80000000) res = 1; - outl (tmp, 0xCF8); + intel_outl (tmp, 0xCF8); intel_io_unlock(); if (res) @@ -142,18 +144,18 @@ conf1_read(struct pci_dev *d, int pos, byte *buf, int len) return pci_generic_block_read(d, pos, buf, len); intel_io_lock(); - outl(0x80000000 | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos&~3), 0xcf8); + intel_outl(0x80000000 | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos&~3), 0xcf8); switch (len) { case 1: - buf[0] = inb(addr); + buf[0] = intel_inb(addr); break; case 2: - ((u16 *) buf)[0] = cpu_to_le16(inw(addr)); + ((u16 *) buf)[0] = cpu_to_le16(intel_inw(addr)); break; case 4: - ((u32 *) buf)[0] = cpu_to_le32(inl(addr)); + ((u32 *) buf)[0] = cpu_to_le32(intel_inl(addr)); break; } @@ -174,18 +176,18 @@ conf1_write(struct pci_dev *d, int pos, byte *buf, int len) return pci_generic_block_write(d, pos, buf, len); intel_io_lock(); - outl(0x80000000 | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos&~3), 0xcf8); + intel_outl(0x80000000 | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos&~3), 0xcf8); switch (len) { case 1: - outb(buf[0], addr); + intel_outb(buf[0], addr); break; case 2: - outw(le16_to_cpu(((u16 *) buf)[0]), addr); + intel_outw(le16_to_cpu(((u16 *) buf)[0]), addr); break; case 4: - outl(le32_to_cpu(((u32 *) buf)[0]), addr); + intel_outl(le32_to_cpu(((u32 *) buf)[0]), addr); break; } intel_io_unlock(); @@ -210,10 +212,10 @@ conf2_detect(struct pci_access *a) /* This is ugly and tends to produce false positives. Beware. */ intel_io_lock(); - outb(0x00, 0xCFB); - outb(0x00, 0xCF8); - outb(0x00, 0xCFA); - if (inb(0xCF8) == 0x00 && inb(0xCFA) == 0x00) + intel_outb(0x00, 0xCFB); + intel_outb(0x00, 0xCF8); + intel_outb(0x00, 0xCFA); + if (intel_inb(0xCF8) == 0x00 && intel_inb(0xCFA) == 0x00) res = intel_sanity_check(a, &pm_intel_conf2); intel_io_unlock(); return res; @@ -236,21 +238,21 @@ conf2_read(struct pci_dev *d, int pos, byte *buf, int len) return pci_generic_block_read(d, pos, buf, len); intel_io_lock(); - outb((d->func << 1) | 0xf0, 0xcf8); - outb(d->bus, 0xcfa); + intel_outb((d->func << 1) | 0xf0, 0xcf8); + intel_outb(d->bus, 0xcfa); switch (len) { case 1: - buf[0] = inb(addr); + buf[0] = intel_inb(addr); break; case 2: - ((u16 *) buf)[0] = cpu_to_le16(inw(addr)); + ((u16 *) buf)[0] = cpu_to_le16(intel_inw(addr)); break; case 4: - ((u32 *) buf)[0] = cpu_to_le32(inl(addr)); + ((u32 *) buf)[0] = cpu_to_le32(intel_inl(addr)); break; } - outb(0, 0xcf8); + intel_outb(0, 0xcf8); intel_io_unlock(); return res; } @@ -272,22 +274,22 @@ conf2_write(struct pci_dev *d, int pos, byte *buf, int len) return pci_generic_block_write(d, pos, buf, len); intel_io_lock(); - outb((d->func << 1) | 0xf0, 0xcf8); - outb(d->bus, 0xcfa); + intel_outb((d->func << 1) | 0xf0, 0xcf8); + intel_outb(d->bus, 0xcfa); switch (len) { case 1: - outb(buf[0], addr); + intel_outb(buf[0], addr); break; case 2: - outw(le16_to_cpu(* (u16 *) buf), addr); + intel_outw(le16_to_cpu(* (u16 *) buf), addr); break; case 4: - outl(le32_to_cpu(* (u32 *) buf), addr); + intel_outl(le32_to_cpu(* (u32 *) buf), addr); break; } - outb(0, 0xcf8); + intel_outb(0, 0xcf8); intel_io_unlock(); return res; } @@ -1,7 +1,7 @@ /* * The PCI Library -- Initialization and related things * - * Copyright (c) 1997--2018 Martin Mares <mj@ucw.cz> + * Copyright (c) 1997--2024 Martin Mares <mj@ucw.cz> * * Can be freely distributed and used under the terms of the GNU GPL v2+. * @@ -430,6 +430,27 @@ pci_init_name_list_path(struct pci_access *a) #endif +#ifdef PCI_USE_DNS + +static void +pci_init_dns(struct pci_access *a) +{ + pci_define_param(a, "net.domain", PCI_ID_DOMAIN, "DNS domain used for resolving of ID's"); + a->id_lookup_mode = PCI_LOOKUP_CACHE; + + char *cache_dir = getenv("XDG_CACHE_HOME"); + if (!cache_dir) + cache_dir = "~/.cache"; + + int name_len = strlen(cache_dir) + 32; + char *cache_name = pci_malloc(NULL, name_len); + snprintf(cache_name, name_len, "%s/pci-ids", cache_dir); + struct pci_param *param = pci_define_param(a, "net.cache_name", cache_name, "Name of the ID cache file"); + param->value_malloced = 1; +} + +#endif + struct pci_access * pci_alloc(void) { @@ -439,9 +460,7 @@ pci_alloc(void) memset(a, 0, sizeof(*a)); pci_init_name_list_path(a); #ifdef PCI_USE_DNS - pci_define_param(a, "net.domain", PCI_ID_DOMAIN, "DNS domain used for resolving of ID's"); - pci_define_param(a, "net.cache_name", "~/.pciids-cache", "Name of the ID cache file"); - a->id_lookup_mode = PCI_LOOKUP_CACHE; + pci_init_dns(a); #endif #ifdef PCI_HAVE_HWDB pci_define_param(a, "hwdb.disable", "0", "Do not look up names in UDEV's HWDB if non-zero"); diff --git a/lib/internal.h b/lib/internal.h index ba07d26..549f94c 100644 --- a/lib/internal.h +++ b/lib/internal.h @@ -20,7 +20,7 @@ // optimizations is happy to optimize them away, leading to linker failures. #define VERSIONED_ABI __attribute__((used)) PCI_ABI #ifdef __APPLE__ -#define STATIC_ALIAS(_decl, _for) _decl VERSIONED_ABI { return _for; } +#define STATIC_ALIAS(_decl, _for) VERSIONED_ABI _decl { return _for; } #define DEFINE_ALIAS(_decl, _for) #define SYMBOL_VERSION(_int, _ext) #else @@ -130,7 +130,7 @@ struct pci_property { char *pci_set_property(struct pci_dev *d, u32 key, char *value); /* params.c */ -void pci_define_param(struct pci_access *acc, char *param, char *val, char *help); +struct pci_param *pci_define_param(struct pci_access *acc, char *param, char *val, char *help); int pci_set_param_internal(struct pci_access *acc, char *param, char *val, int copy); void pci_free_params(struct pci_access *acc); diff --git a/lib/mmio-ports.c b/lib/mmio-ports.c index 9a9b48d..cac8a7e 100644 --- a/lib/mmio-ports.c +++ b/lib/mmio-ports.c @@ -8,64 +8,58 @@ * SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Tell 32-bit platforms that we are interested in 64-bit variant of off_t type - * as 32-bit variant of off_t type is signed and so it cannot represent all - * possible 32-bit offsets. It is required because off_t type is used by mmap(). - */ -#define _FILE_OFFSET_BITS 64 - #include "internal.h" +#include "physmem.h" +#include "physmem-access.h" #include <ctype.h> #include <errno.h> #include <stdlib.h> #include <string.h> -#include <limits.h> - -#include <sys/mman.h> -#include <sys/types.h> -#include <fcntl.h> -#include <unistd.h> -#ifndef OFF_MAX -#define OFF_MAX (off_t)((1ULL << (sizeof(off_t) * CHAR_BIT - 1)) - 1) -#endif - -struct mmio_cache -{ - off_t addr_page; - off_t data_page; +struct mmio_cache { + u64 addr_page; + u64 data_page; void *addr_map; void *data_map; }; -static long pagesize; +struct mmio_access { + struct mmio_cache *cache; + struct physmem *physmem; + long pagesize; +}; static void munmap_regs(struct pci_access *a) { - struct mmio_cache *cache = a->backend_data; + struct mmio_access *macc = a->backend_data; + struct mmio_cache *cache = macc->cache; + struct physmem *physmem = macc->physmem; + long pagesize = macc->pagesize; if (!cache) return; - munmap(cache->addr_map, pagesize); + physmem_unmap(physmem, cache->addr_map, pagesize); if (cache->addr_page != cache->data_page) - munmap(cache->data_map, pagesize); + physmem_unmap(physmem, cache->data_map, pagesize); - pci_mfree(a->backend_data); - a->backend_data = NULL; + pci_mfree(macc->cache); + macc->cache = NULL; } static int -mmap_regs(struct pci_access *a, off_t addr_reg, off_t data_reg, int data_off, volatile void **addr, volatile void **data) +mmap_regs(struct pci_access *a, u64 addr_reg, u64 data_reg, int data_off, volatile void **addr, volatile void **data) { - struct mmio_cache *cache = a->backend_data; - off_t addr_page = addr_reg & ~(pagesize-1); - off_t data_page = data_reg & ~(pagesize-1); - void *addr_map = MAP_FAILED; - void *data_map = MAP_FAILED; + struct mmio_access *macc = a->backend_data; + struct mmio_cache *cache = macc->cache; + struct physmem *physmem = macc->physmem; + long pagesize = macc->pagesize; + u64 addr_page = addr_reg & ~(pagesize-1); + u64 data_page = data_reg & ~(pagesize-1); + void *addr_map = (void *)-1; + void *data_map = (void *)-1; if (cache && cache->addr_page == addr_page) addr_map = cache->addr_map; @@ -73,35 +67,35 @@ mmap_regs(struct pci_access *a, off_t addr_reg, off_t data_reg, int data_off, vo if (cache && cache->data_page == data_page) data_map = cache->data_map; - if (addr_map == MAP_FAILED) - addr_map = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, a->fd, addr_page); + if (addr_map == (void *)-1) + addr_map = physmem_map(physmem, addr_page, pagesize, 1); - if (addr_map == MAP_FAILED) + if (addr_map == (void *)-1) return 0; - if (data_map == MAP_FAILED) + if (data_map == (void *)-1) { if (data_page == addr_page) data_map = addr_map; else - data_map = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_SHARED, a->fd, data_page); + data_map = physmem_map(physmem, data_page, pagesize, 1); } - if (data_map == MAP_FAILED) + if (data_map == (void *)-1) { if (!cache || cache->addr_map != addr_map) - munmap(addr_map, pagesize); + physmem_unmap(physmem, addr_map, pagesize); return 0; } if (cache && cache->addr_page != addr_page) - munmap(cache->addr_map, pagesize); + physmem_unmap(physmem, cache->addr_map, pagesize); if (cache && cache->data_page != data_page && cache->data_page != cache->addr_page) - munmap(cache->data_map, pagesize); + physmem_unmap(physmem, cache->data_map, pagesize); if (!cache) - cache = a->backend_data = pci_malloc(a, sizeof(*cache)); + cache = macc->cache = pci_malloc(a, sizeof(*cache)); cache->addr_page = addr_page; cache->data_page = data_page; @@ -113,47 +107,11 @@ mmap_regs(struct pci_access *a, off_t addr_reg, off_t data_reg, int data_off, vo return 1; } -static void -writeb(unsigned char value, volatile void *addr) -{ - *(volatile unsigned char *)addr = value; -} - -static void -writew(unsigned short value, volatile void *addr) -{ - *(volatile unsigned short *)addr = value; -} - -static void -writel(u32 value, volatile void *addr) -{ - *(volatile u32 *)addr = value; -} - -static unsigned char -readb(volatile void *addr) -{ - return *(volatile unsigned char *)addr; -} - -static unsigned short -readw(volatile void *addr) -{ - return *(volatile unsigned short *)addr; -} - -static u32 -readl(volatile void *addr) -{ - return *(volatile u32 *)addr; -} - static int validate_addrs(const char *addrs) { const char *sep, *next; - unsigned long long num; + u64 num; char *endptr; if (!*addrs) @@ -174,12 +132,12 @@ validate_addrs(const char *addrs) errno = 0; num = strtoull(addrs, &endptr, 16); - if (errno || endptr != sep || (num & 3) || num > OFF_MAX) + if (errno || endptr != sep || (num & 3)) return 0; errno = 0; num = strtoull(sep+1, &endptr, 16); - if (errno || endptr != next || (num & 3) || num > OFF_MAX) + if (errno || endptr != next || (num & 3)) return 0; if (!*next) @@ -202,7 +160,7 @@ get_domain_count(const char *addrs) } static int -get_domain_addr(const char *addrs, int domain, off_t *addr_reg, off_t *data_reg) +get_domain_addr(const char *addrs, int domain, u64 *addr_reg, u64 *data_reg) { char *endptr; @@ -223,14 +181,14 @@ get_domain_addr(const char *addrs, int domain, off_t *addr_reg, off_t *data_reg) static void conf1_config(struct pci_access *a) { - pci_define_param(a, "devmem.path", PCI_PATH_DEVMEM_DEVICE, "Path to the /dev/mem device"); + physmem_init_config(a); pci_define_param(a, "mmio-conf1.addrs", "", "Physical addresses of memory mapped Intel conf1 interface"); /* format: 0xaddr1/0xdata1,0xaddr2/0xdata2,... */ } static void conf1_ext_config(struct pci_access *a) { - pci_define_param(a, "devmem.path", PCI_PATH_DEVMEM_DEVICE, "Path to the /dev/mem device"); + physmem_init_config(a); pci_define_param(a, "mmio-conf1-ext.addrs", "", "Physical addresses of memory mapped Intel conf1 extended interface"); /* format: 0xaddr1/0xdata1,0xaddr2/0xdata2,... */ } @@ -238,7 +196,6 @@ static int detect(struct pci_access *a, char *addrs_param_name) { char *addrs = pci_get_param(a, addrs_param_name); - char *devmem = pci_get_param(a, "devmem.path"); if (!*addrs) { @@ -252,13 +209,13 @@ detect(struct pci_access *a, char *addrs_param_name) return 0; } - if (access(devmem, R_OK | W_OK)) + if (physmem_access(a, 1)) { - a->debug("cannot access %s: %s", devmem, strerror(errno)); + a->debug("cannot access physical memory: %s", strerror(errno)); return 0; } - a->debug("using %s with %s", devmem, addrs); + a->debug("using with %s", addrs); return 1; } @@ -288,11 +245,9 @@ conf1_init(struct pci_access *a) { char *addrs_param_name = get_addrs_param_name(a); char *addrs = pci_get_param(a, addrs_param_name); - char *devmem = pci_get_param(a, "devmem.path"); - - pagesize = sysconf(_SC_PAGESIZE); - if (pagesize < 0) - a->error("Cannot get page size: %s", strerror(errno)); + struct mmio_access *macc; + struct physmem *physmem; + long pagesize; if (!*addrs) a->error("Option %s was not specified.", addrs_param_name); @@ -300,20 +255,29 @@ conf1_init(struct pci_access *a) if (!validate_addrs(addrs)) a->error("Option %s has invalid address format \"%s\".", addrs_param_name, addrs); - a->fd = open(devmem, O_RDWR | O_DSYNC); /* O_DSYNC bypass CPU cache for mmap() on Linux */ - if (a->fd < 0) - a->error("Cannot open %s: %s.", devmem, strerror(errno)); + physmem = physmem_open(a, 1); + if (!physmem) + a->error("Cannot open physcal memory: %s.", strerror(errno)); + + pagesize = physmem_get_pagesize(physmem); + if (pagesize <= 0) + a->error("Cannot get page size: %s.", strerror(errno)); + + macc = pci_malloc(a, sizeof(*macc)); + macc->cache = NULL; + macc->physmem = physmem; + macc->pagesize = pagesize; + a->backend_data = macc; } static void conf1_cleanup(struct pci_access *a) { - if (a->fd < 0) - return; + struct mmio_access *macc = a->backend_data; munmap_regs(a); - close(a->fd); - a->fd = -1; + physmem_close(macc->physmem); + pci_mfree(macc); } static void @@ -334,7 +298,7 @@ conf1_ext_read(struct pci_dev *d, int pos, byte *buf, int len) char *addrs_param_name = get_addrs_param_name(d->access); char *addrs = pci_get_param(d->access, addrs_param_name); volatile void *addr, *data; - off_t addr_reg, data_reg; + u64 addr_reg, data_reg; if (pos >= 4096) return 0; @@ -348,19 +312,19 @@ conf1_ext_read(struct pci_dev *d, int pos, byte *buf, int len) if (!mmap_regs(d->access, addr_reg, data_reg, pos&3, &addr, &data)) return 0; - writel(0x80000000 | ((pos & 0xf00) << 16) | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos & 0xfc), addr); - readl(addr); /* write barrier for address */ + physmem_writel(0x80000000 | ((pos & 0xf00) << 16) | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos & 0xfc), addr); + physmem_readl(addr); /* write barrier for address */ switch (len) { case 1: - buf[0] = readb(data); + buf[0] = physmem_readb(data); break; case 2: - ((u16 *) buf)[0] = readw(data); + ((u16 *) buf)[0] = physmem_readw(data); break; case 4: - ((u32 *) buf)[0] = readl(data); + ((u32 *) buf)[0] = physmem_readl(data); break; } @@ -382,7 +346,7 @@ conf1_ext_write(struct pci_dev *d, int pos, byte *buf, int len) char *addrs_param_name = get_addrs_param_name(d->access); char *addrs = pci_get_param(d->access, addrs_param_name); volatile void *addr, *data; - off_t addr_reg, data_reg; + u64 addr_reg, data_reg; if (pos >= 4096) return 0; @@ -396,19 +360,19 @@ conf1_ext_write(struct pci_dev *d, int pos, byte *buf, int len) if (!mmap_regs(d->access, addr_reg, data_reg, pos&3, &addr, &data)) return 0; - writel(0x80000000 | ((pos & 0xf00) << 16) | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos & 0xfc), addr); - readl(addr); /* write barrier for address */ + physmem_writel(0x80000000 | ((pos & 0xf00) << 16) | ((d->bus & 0xff) << 16) | (PCI_DEVFN(d->dev, d->func) << 8) | (pos & 0xfc), addr); + physmem_readl(addr); /* write barrier for address */ switch (len) { case 1: - writeb(buf[0], data); + physmem_writeb(buf[0], data); break; case 2: - writew(((u16 *) buf)[0], data); + physmem_writew(((u16 *) buf)[0], data); break; case 4: - writel(((u32 *) buf)[0], data); + physmem_writel(((u32 *) buf)[0], data); break; } @@ -421,7 +385,7 @@ conf1_ext_write(struct pci_dev *d, int pos, byte *buf, int len) * Correct way is to issue CPU instruction for full hw sync barrier but gcc * does not provide any (builtin) function yet. */ - readl(addr); + physmem_readl(addr); return 1; } diff --git a/lib/names-cache.c b/lib/names-cache.c index 65bfb85..16e9e9a 100644 --- a/lib/names-cache.c +++ b/lib/names-cache.c @@ -18,6 +18,7 @@ #include <string.h> #include <errno.h> #include <sys/types.h> +#include <sys/stat.h> #include <pwd.h> #include <unistd.h> @@ -25,24 +26,75 @@ static const char cache_version[] = "#PCI-CACHE-1.0"; static char *get_cache_name(struct pci_access *a) { - char *name, *buf; - - name = pci_get_param(a, "net.cache_name"); - if (!name || !name[0]) - return NULL; - if (strncmp(name, "~/", 2)) - return name; - - uid_t uid = getuid(); - struct passwd *pw = getpwuid(uid); - if (!pw) - return name; - - buf = pci_malloc(a, strlen(pw->pw_dir) + strlen(name+1) + 1); - sprintf(buf, "%s%s", pw->pw_dir, name+1); - pci_set_param_internal(a, "net.cache_name", buf, 1); - pci_mfree(buf); - return pci_get_param(a, "net.cache_name"); + if (!a->id_cache_name) + { + char *name = pci_get_param(a, "net.cache_name"); + if (!name || !name[0]) + return NULL; + + if (strncmp(name, "~/", 2)) + a->id_cache_name = pci_strdup(a, name); + else + { + uid_t uid = getuid(); + struct passwd *pw = getpwuid(uid); + if (!pw) + return name; + + a->id_cache_name = pci_malloc(a, strlen(pw->pw_dir) + strlen(name+1) + 1); + sprintf(a->id_cache_name, "%s%s", pw->pw_dir, name+1); + } + } + + return a->id_cache_name; +} + +static void create_parent_dirs(struct pci_access *a, char *name) +{ + // Assumes that we have a private copy of the name we can modify + + char *p = name + strlen(name); + while (p > name && *p != '/') + p--; + if (p == name) + return; + + while (p > name) + { + // We stand at a slash. Check if the current prefix exists. + *p = 0; + struct stat st; + int res = stat(name, &st); + *p = '/'; + if (res >= 0) + break; + + // Does not exist yet, move up one directory + p--; + while (p > name && *p != '/') + p--; + } + + // We now stand at the end of the longest existing prefix. + // Create all directories to the right of it. + for (;;) + { + p++; + while (*p && *p != '/') + p++; + if (!*p) + break; + + *p = 0; + int res = mkdir(name, 0777); + if (res < 0) + { + a->warning("Cannot create directory %s: %s", name, strerror(errno)); + *p = '/'; + break; + } + *p = '/'; + } } int @@ -53,11 +105,15 @@ pci_id_cache_load(struct pci_access *a, int flags) FILE *f; int lino; + if (a->id_cache_status > 0) + return 0; a->id_cache_status = 1; + name = get_cache_name(a); if (!name) return 0; a->debug("Using cache %s\n", name); + if (flags & PCI_LOOKUP_REFRESH_CACHE) { a->debug("Not loading cache, will refresh everything\n"); @@ -130,6 +186,8 @@ pci_id_cache_flush(struct pci_access *a) if (!name) return; + create_parent_dirs(a, name); + this_pid = getpid(); if (gethostname(hostname, sizeof(hostname)) < 0) hostname[0] = 0; @@ -194,6 +252,8 @@ int pci_id_cache_load(struct pci_access *a UNUSED, int flags UNUSED) void pci_id_cache_flush(struct pci_access *a) { a->id_cache_status = 0; + pci_mfree(a->id_cache_name); + a->id_cache_name = NULL; } #endif diff --git a/lib/names-parse.c b/lib/names-parse.c index f50b8ec..1f8925a 100644 --- a/lib/names-parse.c +++ b/lib/names-parse.c @@ -223,7 +223,7 @@ pci_load_name_list(struct pci_access *a) const char *err; pci_free_name_list(a); - a->id_load_failed = 1; + a->id_load_attempted = 1; if (!(f = pci_open(a))) return 0; err = id_parse_list(a, f, &lino); @@ -231,7 +231,6 @@ pci_load_name_list(struct pci_access *a) pci_close(f); if (err) a->error("%s at %s, line %d\n", err, a->id_file_name, lino); - a->id_load_failed = 0; return 1; } @@ -241,7 +240,7 @@ pci_free_name_list(struct pci_access *a) pci_id_cache_flush(a); pci_id_hash_free(a); pci_id_hwdb_free(a); - a->id_load_failed = 0; + a->id_load_attempted = 0; } void diff --git a/lib/names.c b/lib/names.c index f8d3997..a287cb0 100644 --- a/lib/names.c +++ b/lib/names.c @@ -143,7 +143,7 @@ pci_lookup_name(struct pci_access *a, char *buf, int size, int flags, ...) if (flags & PCI_LOOKUP_MIXED) flags &= ~PCI_LOOKUP_NUMERIC; - if (!a->id_hash && !(flags & (PCI_LOOKUP_NUMERIC | PCI_LOOKUP_SKIP_LOCAL)) && !a->id_load_failed) + if (!a->id_load_attempted && !(flags & (PCI_LOOKUP_NUMERIC | PCI_LOOKUP_SKIP_LOCAL))) pci_load_name_list(a); switch (flags & 0xffff) diff --git a/lib/params.c b/lib/params.c index ac756ad..9b4c2e2 100644 --- a/lib/params.c +++ b/lib/params.c @@ -25,7 +25,7 @@ pci_get_param(struct pci_access *acc, char *param) return NULL; } -void +struct pci_param * pci_define_param(struct pci_access *acc, char *param, char *value, char *help) { struct pci_param *p, **pp; @@ -37,7 +37,7 @@ pci_define_param(struct pci_access *acc, char *param, char *value, char *help) { if (strcmp(p->value, value) || strcmp(p->help, help)) acc->error("Parameter %s re-defined differently", param); - return; + return p; } if (cmp > 0) break; @@ -50,6 +50,7 @@ pci_define_param(struct pci_access *acc, char *param, char *value, char *help) p->value = value; p->value_malloced = 0; p->help = help; + return p; } int @@ -82,8 +82,9 @@ struct pci_access { struct pci_param *params; struct id_entry **id_hash; /* names.c */ struct id_bucket *current_id_bucket; - int id_load_failed; + int id_load_attempted; int id_cache_status; /* 0=not read, 1=read, 2=dirty */ + char *id_cache_name; struct udev *id_udev; /* names-hwdb.c */ struct udev_hwdb *id_udev_hwdb; int fd; /* proc/sys: fd for config space */ diff --git a/lib/physmem-access.h b/lib/physmem-access.h new file mode 100644 index 0000000..a4e9744 --- /dev/null +++ b/lib/physmem-access.h @@ -0,0 +1,52 @@ +/* + * The PCI Library -- Compiler-specific wrappers for memory mapped I/O + * + * Copyright (c) 2023 Pali Rohár <pali@kernel.org> + * + * Can be freely distributed and used under the terms of the GNU GPL v2+ + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * FIXME + * Unfortunately gcc does not provide architecture independent way to read from + * or write to memory mapped I/O. The best approximation is to use volatile and + * for the write operation follow it by the read operation from the same address. + */ + +static inline void +physmem_writeb(unsigned char value, volatile void *ptr) +{ + *(volatile unsigned char *)ptr = value; +} + +static inline void +physmem_writew(unsigned short value, volatile void *ptr) +{ + *(volatile unsigned short *)ptr = value; +} + +static inline void +physmem_writel(u32 value, volatile void *ptr) +{ + *(volatile u32 *)ptr = value; +} + +static inline unsigned char +physmem_readb(volatile void *ptr) +{ + return *(volatile unsigned char *)ptr; +} + +static inline unsigned short +physmem_readw(volatile void *ptr) +{ + return *(volatile unsigned short *)ptr; +} + +static inline u32 +physmem_readl(volatile void *ptr) +{ + return *(volatile u32 *)ptr; +} diff --git a/lib/physmem-posix.c b/lib/physmem-posix.c new file mode 100644 index 0000000..7cd7e99 --- /dev/null +++ b/lib/physmem-posix.c @@ -0,0 +1,95 @@ +/* + * The PCI Library -- Physical memory mapping for POSIX systems + * + * Copyright (c) 2023 Pali Rohár <pali@kernel.org> + * + * Can be freely distributed and used under the terms of the GNU GPL v2+ + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +/* + * Tell 32-bit platforms that we are interested in 64-bit variant of off_t type + * as 32-bit variant of off_t type is signed and so it cannot represent all + * possible 32-bit offsets. It is required because off_t type is used by mmap(). + */ +#define _FILE_OFFSET_BITS 64 + +#include "internal.h" +#include "physmem.h" + +#include <limits.h> +#include <errno.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> + +#ifndef OFF_MAX +#define OFF_MAX ((((off_t)1 << (sizeof(off_t) * CHAR_BIT - 2)) - 1) * 2 + 1) +#endif + +struct physmem { + int fd; +}; + +void +physmem_init_config(struct pci_access *a) +{ + pci_define_param(a, "devmem.path", PCI_PATH_DEVMEM_DEVICE, "Path to the /dev/mem device"); +} + +int +physmem_access(struct pci_access *a, int w) +{ + const char *devmem = pci_get_param(a, "devmem.path"); + a->debug("checking access permission of physical memory device %s for %s mode...", devmem, w ? "read/write" : "read-only"); + return access(devmem, R_OK | (w ? W_OK : 0)); +} + +struct physmem * +physmem_open(struct pci_access *a, int w) +{ + const char *devmem = pci_get_param(a, "devmem.path"); + struct physmem *physmem = pci_malloc(a, sizeof(struct physmem)); + + a->debug("trying to open physical memory device %s in %s mode...", devmem, w ? "read/write" : "read-only"); + physmem->fd = open(devmem, (w ? O_RDWR : O_RDONLY) | O_DSYNC); /* O_DSYNC bypass CPU cache for mmap() on Linux */ + if (physmem->fd < 0) + { + pci_mfree(physmem); + return NULL; + } + + return physmem; +} + +void +physmem_close(struct physmem *physmem) +{ + close(physmem->fd); + pci_mfree(physmem); +} + +long +physmem_get_pagesize(struct physmem *physmem UNUSED) +{ + return sysconf(_SC_PAGESIZE); +} + +void * +physmem_map(struct physmem *physmem, u64 addr, size_t length, int w) +{ + if (addr > OFF_MAX) + { + errno = EOVERFLOW; + return (void *)-1; + } + return mmap(NULL, length, PROT_READ | (w ? PROT_WRITE : 0), MAP_SHARED, physmem->fd, addr); +} + +int +physmem_unmap(struct physmem *physmem UNUSED, void *ptr, size_t length) +{ + return munmap(ptr, length); +} diff --git a/lib/physmem.h b/lib/physmem.h new file mode 100644 index 0000000..46ee021 --- /dev/null +++ b/lib/physmem.h @@ -0,0 +1,19 @@ +/* + * The PCI Library -- Physical memory mapping API + * + * Copyright (c) 2023 Pali Rohár <pali@kernel.org> + * + * Can be freely distributed and used under the terms of the GNU GPL v2+ + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +struct physmem; + +void physmem_init_config(struct pci_access *a); +int physmem_access(struct pci_access *a, int w); +struct physmem *physmem_open(struct pci_access *a, int w); +void physmem_close(struct physmem *physmem); +long physmem_get_pagesize(struct physmem *physmem); +void *physmem_map(struct physmem *physmem, u64 addr, size_t length, int w); +int physmem_unmap(struct physmem *physmem, void *ptr, size_t length); diff --git a/lib/sysdep.h b/lib/sysdep.h index 5695c30..40e1407 100644 --- a/lib/sysdep.h +++ b/lib/sysdep.h @@ -25,6 +25,13 @@ typedef u16 word; #ifdef PCI_OS_WINDOWS #define strcasecmp _strcmpi #define strncasecmp _strnicmp +#if defined(_MSC_VER) && _MSC_VER < 1800 +#if _MSC_VER < 1300 +#define strtoull strtoul +#else +#define strtoull _strtoui64 +#endif +#endif #if defined(_MSC_VER) && _MSC_VER < 1900 #define snprintf _snprintf #define vsnprintf _vsnprintf diff --git a/lib/types.h b/lib/types.h index 2004906..260c981 100644 --- a/lib/types.h +++ b/lib/types.h @@ -9,6 +9,7 @@ */ #include <sys/types.h> +#include <stddef.h> #ifndef PCI_HAVE_Uxx_TYPES @@ -22,7 +23,7 @@ typedef unsigned __int64 u64; #define PCI_U64_FMT_X "I64x" #define PCI_U64_FMT_U "I64u" -#elif defined(PCI_HAVE_STDINT_H) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +#else /* Use standard types in C99 and newer */ #include <stdint.h> #include <inttypes.h> @@ -32,25 +33,6 @@ typedef uint32_t u32; typedef uint64_t u64; #define PCI_U64_FMT_X PRIx64 #define PCI_U64_FMT_U PRIu64 - -#else -/* Hope for POSIX types from <sys/types.h> */ -typedef u_int8_t u8; -typedef u_int16_t u16; -typedef u_int32_t u32; - -/* u64 will be unsigned (long) long */ -#include <limits.h> -#if ULONG_MAX > 0xffffffff -typedef unsigned long u64; -#define PCI_U64_FMT_X "lx" -#define PCI_U64_FMT_U "lu" -#else -typedef unsigned long long u64; -#define PCI_U64_FMT_X "llx" -#define PCI_U64_FMT_U "llu" -#endif - #endif #endif /* PCI_HAVE_Uxx_TYPES */ diff --git a/lib/win32-cfgmgr32.c b/lib/win32-cfgmgr32.c index 4acac2e..a001187 100644 --- a/lib/win32-cfgmgr32.c +++ b/lib/win32-cfgmgr32.c @@ -17,6 +17,7 @@ #include <wchar.h> /* for wcslen(), wcscpy() */ #include "internal.h" +#include "win32-helpers.h" /* Unfortunately MinGW32 toolchain does not provide these cfgmgr32 constants. */ @@ -213,32 +214,6 @@ cr_strerror(CONFIGRET cr_error_id) return cr_errors[cr_error_id]; } -static const char * -win32_strerror(DWORD win32_error_id) -{ - /* - * Use static buffer which is large enough. - * Hopefully no Win32 API error message string is longer than 4 kB. - */ - static char buffer[4096]; - DWORD len; - - len = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, win32_error_id, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buffer, sizeof(buffer), NULL); - - /* FormatMessage() automatically appends ".\r\n" to the error message. */ - if (len && buffer[len-1] == '\n') - buffer[--len] = '\0'; - if (len && buffer[len-1] == '\r') - buffer[--len] = '\0'; - if (len && buffer[len-1] == '.') - buffer[--len] = '\0'; - - if (!len) - sprintf(buffer, "Unknown Win32 error %lu", win32_error_id); - - return buffer; -} - static int fmt_validate(const char *s, int len, const char *fmt) { @@ -267,56 +242,6 @@ seq_xdigit_validate(const char *s, int mult, int min) return 1; } -static BOOL -is_non_nt_system(void) -{ - OSVERSIONINFOA version; - version.dwOSVersionInfoSize = sizeof(version); - return GetVersionExA(&version) && version.dwPlatformId < VER_PLATFORM_WIN32_NT; -} - -static BOOL -is_32bit_on_win8_64bit_system(void) -{ -#ifdef _WIN64 - return FALSE; -#else - BOOL (WINAPI *MyIsWow64Process)(HANDLE, PBOOL); - OSVERSIONINFOA version; - HMODULE kernel32; - BOOL is_wow64; - - /* Check for Windows 8 (NT 6.2). */ - version.dwOSVersionInfoSize = sizeof(version); - if (!GetVersionExA(&version) || - version.dwPlatformId != VER_PLATFORM_WIN32_NT || - version.dwMajorVersion < 6 || - (version.dwMajorVersion == 6 && version.dwMinorVersion < 2)) - return FALSE; - - /* - * Check for 64-bit system via IsWow64Process() function exported - * from 32-bit kernel32.dll library available on the 64-bit systems. - * Resolve pointer to this function at runtime as this code path is - * primary running on 32-bit systems where are not available 64-bit - * functions. - */ - - kernel32 = GetModuleHandleA("kernel32.dll"); - if (!kernel32) - return FALSE; - - MyIsWow64Process = (void *)GetProcAddress(kernel32, "IsWow64Process"); - if (!MyIsWow64Process) - return FALSE; - - if (!MyIsWow64Process(GetCurrentProcess(), &is_wow64)) - return FALSE; - - return is_wow64; -#endif -} - static LPWSTR get_device_service_name(struct pci_access *a, DEVINST devinst, DEVINSTID_A devinst_id, BOOL *supported) { @@ -995,7 +920,7 @@ fill_resources(struct pci_dev *d, DEVINST devinst, DEVINSTID_A devinst_id) * application using the hardware resource APIs. For example: An AMD64 * application for AMD64 systems. */ - if (cr == CR_CALL_NOT_IMPLEMENTED && is_32bit_on_win8_64bit_system()) + if (cr == CR_CALL_NOT_IMPLEMENTED && win32_is_32bit_on_win8_64bit_system()) { static BOOL warn_once = FALSE; if (!warn_once) @@ -1010,7 +935,7 @@ fill_resources(struct pci_dev *d, DEVINST devinst, DEVINSTID_A devinst_id) } bar_res_count = 0; - non_nt_system = is_non_nt_system(); + non_nt_system = win32_is_non_nt_system(); is_bar_res = TRUE; if (non_nt_system) diff --git a/lib/win32-helpers.c b/lib/win32-helpers.c new file mode 100644 index 0000000..e12de1a --- /dev/null +++ b/lib/win32-helpers.c @@ -0,0 +1,1373 @@ +/* + * The PCI Library -- Win32 helper functions + * + * Copyright (c) 2023 Pali Rohár <pali@kernel.org> + * + * Can be freely distributed and used under the terms of the GNU GPL v2+ + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <windows.h> + +#include <stdio.h> /* for sprintf() */ + +#include "win32-helpers.h" + +/* Unfortunately i586-mingw32msvc toolchain does not provide this constant. */ +#ifndef PROCESS_QUERY_LIMITED_INFORMATION +#define PROCESS_QUERY_LIMITED_INFORMATION 0x1000 +#endif + +/* Unfortunately some toolchains do not provide this constant. */ +#ifndef SE_IMPERSONATE_NAME +#define SE_IMPERSONATE_NAME TEXT("SeImpersonatePrivilege") +#endif + +/* Unfortunately some toolchains do not provide these constants. */ +#ifndef SE_DACL_AUTO_INHERIT_REQ +#define SE_DACL_AUTO_INHERIT_REQ 0x0100 +#endif +#ifndef SE_SACL_AUTO_INHERIT_REQ +#define SE_SACL_AUTO_INHERIT_REQ 0x0200 +#endif +#ifndef SE_DACL_AUTO_INHERITED +#define SE_DACL_AUTO_INHERITED 0x0400 +#endif +#ifndef SE_SACL_AUTO_INHERITED +#define SE_SACL_AUTO_INHERITED 0x0800 +#endif + +/* + * These psapi functions are available in kernel32.dll library with K32 prefix + * on Windows 7 and higher systems. On older Windows systems these functions are + * available in psapi.dll libary without K32 prefix. So resolve pointers to + * these functions dynamically at runtime from the available system library. + * Function GetProcessImageFileNameW() is not available on Windows 2000 and + * older systems. + */ +typedef BOOL (WINAPI *EnumProcessesProt)(DWORD *lpidProcess, DWORD cb, DWORD *cbNeeded); +typedef DWORD (WINAPI *GetProcessImageFileNameWProt)(HANDLE hProcess, LPWSTR lpImageFileName, DWORD nSize); +typedef DWORD (WINAPI *GetModuleFileNameExWProt)(HANDLE hProcess, HMODULE hModule, LPWSTR lpImageFileName, DWORD nSize); + +/* + * These aclapi function is available in advapi.dll library on Windows 2000 + * and higher systems. + */ +typedef BOOL (WINAPI *SetSecurityDescriptorControlProt)(PSECURITY_DESCRIPTOR pSecurityDescriptor, SECURITY_DESCRIPTOR_CONTROL ControlBitsOfInterest, SECURITY_DESCRIPTOR_CONTROL ControlBitsToSet); + +/* + * This errhandlingapi function is available in kernel32.dll library on + * Windows 7 and higher systems. + */ +typedef BOOL (WINAPI *SetThreadErrorModeProt)(DWORD dwNewMode, LPDWORD lpOldMode); + + +const char * +win32_strerror(DWORD win32_error_id) +{ + /* + * Use static buffer which is large enough. + * Hopefully no Win32 API error message string is longer than 4 kB. + */ + static char buffer[4096]; + DWORD len; + + len = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, win32_error_id, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buffer, sizeof(buffer), NULL); + + /* FormatMessage() automatically appends ".\r\n" to the error message. */ + if (len && buffer[len-1] == '\n') + buffer[--len] = '\0'; + if (len && buffer[len-1] == '\r') + buffer[--len] = '\0'; + if (len && buffer[len-1] == '.') + buffer[--len] = '\0'; + + if (!len) + sprintf(buffer, "Unknown Win32 error %lu", win32_error_id); + + return buffer; +} + +BOOL +win32_is_non_nt_system(void) +{ + OSVERSIONINFOA version; + version.dwOSVersionInfoSize = sizeof(version); + return GetVersionExA(&version) && version.dwPlatformId < VER_PLATFORM_WIN32_NT; +} + +BOOL +win32_is_32bit_on_64bit_system(void) +{ + BOOL (WINAPI *MyIsWow64Process)(HANDLE, PBOOL); + HMODULE kernel32; + BOOL is_wow64; + + /* + * Check for 64-bit system via IsWow64Process() function exported + * from 32-bit kernel32.dll library available on the 64-bit systems. + * Resolve pointer to this function at runtime as this code path is + * primary running on 32-bit systems where are not available 64-bit + * functions. + */ + + kernel32 = GetModuleHandle(TEXT("kernel32.dll")); + if (!kernel32) + return FALSE; + + MyIsWow64Process = (void *)GetProcAddress(kernel32, "IsWow64Process"); + if (!MyIsWow64Process) + return FALSE; + + if (!MyIsWow64Process(GetCurrentProcess(), &is_wow64)) + return FALSE; + + return is_wow64; +} + +BOOL +win32_is_32bit_on_win8_64bit_system(void) +{ +#ifdef _WIN64 + return FALSE; +#else + OSVERSIONINFOA version; + + /* Check for Windows 8 (NT 6.2). */ + version.dwOSVersionInfoSize = sizeof(version); + if (!GetVersionExA(&version) || + version.dwPlatformId != VER_PLATFORM_WIN32_NT || + version.dwMajorVersion < 6 || + (version.dwMajorVersion == 6 && version.dwMinorVersion < 2)) + return FALSE; + + return win32_is_32bit_on_64bit_system(); +#endif +} + +/* + * Change error mode of the current thread. If it is not possible then change + * error mode of the whole process. Always returns previous error mode. + */ +UINT +win32_change_error_mode(UINT new_mode) +{ + SetThreadErrorModeProt MySetThreadErrorMode = NULL; + HMODULE kernel32; + DWORD old_mode; + + /* + * Function SetThreadErrorMode() was introduced in Windows 7, so use + * GetProcAddress() for compatibility with older systems. + */ + kernel32 = GetModuleHandle(TEXT("kernel32.dll")); + if (kernel32) + MySetThreadErrorMode = (SetThreadErrorModeProt)(LPVOID)GetProcAddress(kernel32, "SetThreadErrorMode"); + + if (MySetThreadErrorMode && + MySetThreadErrorMode(new_mode, &old_mode)) + return old_mode; + + /* + * Fallback to function SetErrorMode() which modifies error mode of the + * whole process and returns old mode. + */ + return SetErrorMode(new_mode); +} + +/* + * Check if the current thread has particular privilege in current active access + * token. Case when it not possible to determinate it (e.g. current thread does + * not have permission to open its own current active access token) is evaluated + * as thread does not have that privilege. + */ +BOOL +win32_have_privilege(LUID luid_privilege) +{ + PRIVILEGE_SET priv; + HANDLE token; + BOOL ret; + + /* + * If the current thread does not have active access token then thread + * uses primary process access token for all permission checks. + */ + if (!OpenThreadToken(GetCurrentThread(), TOKEN_QUERY, TRUE, &token) && + (GetLastError() != ERROR_NO_TOKEN || + !OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token))) + return FALSE; + + priv.PrivilegeCount = 1; + priv.Control = PRIVILEGE_SET_ALL_NECESSARY; + priv.Privilege[0].Luid = luid_privilege; + priv.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED; + + if (!PrivilegeCheck(token, &priv, &ret)) + return FALSE; + + return ret; +} + +/* + * Enable or disable particular privilege in specified access token. + * + * Note that it is not possible to disable privilege in access token with + * SE_PRIVILEGE_ENABLED_BY_DEFAULT attribute. This function does not check + * this case and incorrectly returns no error even when disabling failed. + * Rationale for this decision: Simplification of this function as WinAPI + * call AdjustTokenPrivileges() does not signal error in this case too. + */ +static BOOL +set_privilege(HANDLE token, LUID luid_privilege, BOOL enable) +{ + TOKEN_PRIVILEGES token_privileges; + + token_privileges.PrivilegeCount = 1; + token_privileges.Privileges[0].Luid = luid_privilege; + token_privileges.Privileges[0].Attributes = enable ? SE_PRIVILEGE_ENABLED : 0; + + /* + * WinAPI function AdjustTokenPrivileges() success also when not all + * privileges were enabled. It is always required to check for failure + * via GetLastError() call. AdjustTokenPrivileges() always sets error + * also when it success, as opposite to other WinAPI functions. + */ + if (!AdjustTokenPrivileges(token, FALSE, &token_privileges, sizeof(token_privileges), NULL, NULL) || + GetLastError() != ERROR_SUCCESS) + return FALSE; + + return TRUE; +} + +/* + * Change access token for the current thread to new specified access token. + * Previously active access token is stored in old_token variable and can be + * used for reverting to this access token. It is set to NULL if the current + * thread previously used primary process access token. + */ +BOOL +win32_change_token(HANDLE new_token, HANDLE *old_token) +{ + HANDLE token; + + if (!OpenThreadToken(GetCurrentThread(), TOKEN_IMPERSONATE, TRUE, &token)) + { + if (GetLastError() != ERROR_NO_TOKEN) + return FALSE; + token = NULL; + } + + if (!ImpersonateLoggedOnUser(new_token)) + { + if (token) + CloseHandle(token); + return FALSE; + } + + *old_token = token; + return TRUE; +} + +/* + * Change access token for the current thread to the primary process access + * token. This function fails also when the current thread already uses primary + * process access token. + */ +static BOOL +change_token_to_primary(HANDLE *old_token) +{ + HANDLE token; + + if (!OpenThreadToken(GetCurrentThread(), TOKEN_IMPERSONATE, TRUE, &token)) + return FALSE; + + RevertToSelf(); + + *old_token = token; + return TRUE; +} + +/* + * Revert to the specified access token for the current thread. When access + * token is specified as NULL then revert to the primary process access token. + * Use to revert after win32_change_token() or change_token_to_primary() call. + */ +VOID +win32_revert_to_token(HANDLE token) +{ + /* + * If SetThreadToken() call fails then there is no option to revert to + * the specified previous thread access token. So in this case revert to + * the primary process access token. + */ + if (!token || !SetThreadToken(NULL, token)) + RevertToSelf(); + if (token) + CloseHandle(token); +} + +/* + * Enable particular privilege for the current thread. And set method how to + * revert this privilege (if to revert whole token or only privilege). + */ +BOOL +win32_enable_privilege(LUID luid_privilege, HANDLE *revert_token, BOOL *revert_only_privilege) +{ + HANDLE thread_token; + HANDLE new_token; + + if (OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, TRUE, &thread_token)) + { + if (set_privilege(thread_token, luid_privilege, TRUE)) + { + /* + * Indicate that correct revert method is just to + * disable privilege in access token. + */ + if (revert_token && revert_only_privilege) + { + *revert_token = thread_token; + *revert_only_privilege = TRUE; + } + else + { + CloseHandle(thread_token); + } + return TRUE; + } + CloseHandle(thread_token); + /* + * If enabling privilege failed then try to enable it via + * primary process access token. + */ + } + + /* + * If the current thread has already active thread access token then + * open it with just impersonate right as it would be used only for + * future revert. + */ + if (revert_token && revert_only_privilege) + { + if (!OpenThreadToken(GetCurrentThread(), TOKEN_IMPERSONATE, TRUE, &thread_token)) + { + if (GetLastError() != ERROR_NO_TOKEN) + return FALSE; + thread_token = NULL; + } + + /* + * If current thread has no access token (and uses primary + * process access token) or it does not have permission to + * adjust privileges or it does not have specified privilege + * then create a copy of the primary process access token, + * assign it for the current thread (= impersonate self) + * and then try adjusting privilege again. + */ + if (!ImpersonateSelf(SecurityImpersonation)) + { + if (thread_token) + CloseHandle(thread_token); + return FALSE; + } + } + + if (!OpenThreadToken(GetCurrentThread(), TOKEN_ADJUST_PRIVILEGES, TRUE, &new_token)) + { + /* thread_token is set only when we were asked for revert method. */ + if (revert_token && revert_only_privilege) + win32_revert_to_token(thread_token); + return FALSE; + } + + if (!set_privilege(new_token, luid_privilege, TRUE)) + { + CloseHandle(new_token); + /* thread_token is set only when we were asked for revert method. */ + if (revert_token && revert_only_privilege) + win32_revert_to_token(thread_token); + return FALSE; + } + + /* + * Indicate that correct revert method is to change to the previous + * access token. Either to the primary process access token or to the + * previous thread access token. + */ + if (revert_token && revert_only_privilege) + { + *revert_token = thread_token; + *revert_only_privilege = FALSE; + } + return TRUE; +} + +/* + * Revert particular privilege for the current thread was previously enabled by + * win32_enable_privilege() call. Either disable privilege in specified access token + * or revert to previous access token. + */ +VOID +win32_revert_privilege(LUID luid_privilege, HANDLE revert_token, BOOL revert_only_privilege) +{ + if (revert_only_privilege) + { + set_privilege(revert_token, luid_privilege, FALSE); + CloseHandle(revert_token); + } + else + { + win32_revert_to_token(revert_token); + } +} + +/* + * Return owner of the access token used by the current thread. Buffer for + * returned owner needs to be released by LocalFree() call. + */ +static TOKEN_OWNER * +get_current_token_owner(VOID) +{ + HANDLE token; + DWORD length; + TOKEN_OWNER *owner; + + /* + * If the current thread does not have active access token then thread + * uses primary process access token for all permission checks. + */ + if (!OpenThreadToken(GetCurrentThread(), TOKEN_QUERY, TRUE, &token) && + (GetLastError() != ERROR_NO_TOKEN || + !OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &token))) + return NULL; + + if (!GetTokenInformation(token, TokenOwner, NULL, 0, &length) && + GetLastError() != ERROR_INSUFFICIENT_BUFFER) + { + CloseHandle(token); + return NULL; + } + +retry: + owner = (TOKEN_OWNER *)LocalAlloc(LPTR, length); + if (!owner) + { + CloseHandle(token); + return NULL; + } + + if (!GetTokenInformation(token, TokenOwner, owner, length, &length)) + { + /* + * Length of token owner (SID) buffer between two get calls may + * changes (e.g. by another thread of process), so retry. + */ + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + { + LocalFree(owner); + goto retry; + } + LocalFree(owner); + CloseHandle(token); + return NULL; + } + + CloseHandle(token); + return owner; +} + +/* + * Create a new security descriptor in absolute form from relative form. + * Newly created security descriptor in absolute form is stored in linear buffer. + */ +static PSECURITY_DESCRIPTOR +create_relsd_from_abssd(PSECURITY_DESCRIPTOR rel_security_descriptor) +{ + PBYTE abs_security_descriptor_buffer; + DWORD abs_security_descriptor_size=0, abs_dacl_size=0, abs_sacl_size=0, abs_owner_size=0, abs_primary_group_size=0; + + if (!MakeAbsoluteSD(rel_security_descriptor, + NULL, &abs_security_descriptor_size, + NULL, &abs_dacl_size, + NULL, &abs_sacl_size, + NULL, &abs_owner_size, + NULL, &abs_primary_group_size) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) + return NULL; + + abs_security_descriptor_buffer = (PBYTE)LocalAlloc(LPTR, abs_security_descriptor_size+abs_dacl_size+abs_sacl_size+abs_owner_size+abs_primary_group_size); + if (!abs_security_descriptor_buffer) + return NULL; + + if (!MakeAbsoluteSD(rel_security_descriptor, + (PSECURITY_DESCRIPTOR)abs_security_descriptor_buffer, &abs_security_descriptor_size, + (PACL)(abs_security_descriptor_buffer+abs_security_descriptor_size), &abs_dacl_size, + (PACL)(abs_security_descriptor_buffer+abs_security_descriptor_size+abs_dacl_size), &abs_sacl_size, + (PSID)(abs_security_descriptor_buffer+abs_security_descriptor_size+abs_dacl_size+abs_sacl_size), &abs_owner_size, + (PSID)(abs_security_descriptor_buffer+abs_security_descriptor_size+abs_dacl_size+abs_sacl_size+abs_owner_size), &abs_primary_group_size)) + return NULL; + + return (PSECURITY_DESCRIPTOR)abs_security_descriptor_buffer; +} + +/* + * Prepare security descriptor obtained by GetKernelObjectSecurity() so it can be + * passed to SetKernelObjectSecurity() as identity operation. It modifies control + * flags of security descriptor, which is needed for Windows 2000 and new. + */ +static BOOL +prepare_security_descriptor_for_set_operation(PSECURITY_DESCRIPTOR security_descriptor) +{ + SetSecurityDescriptorControlProt MySetSecurityDescriptorControl; + SECURITY_DESCRIPTOR_CONTROL bits_mask; + SECURITY_DESCRIPTOR_CONTROL bits_set; + SECURITY_DESCRIPTOR_CONTROL control; + OSVERSIONINFO version; + HMODULE advapi32; + DWORD revision; + + /* + * SE_DACL_AUTO_INHERITED and SE_SACL_AUTO_INHERITED are flags introduced in + * Windows 2000 to control client-side automatic inheritance (client - user + * process - is responsible for propagating inherited ACEs to subobjects). + * To prevent applications which do not understand client-side automatic + * inheritance (applications created prior Windows 2000 or which use low + * level API like SetKernelObjectSecurity()) to unintentionally set those + * SE_DACL_AUTO_INHERITED and SE_SACL_AUTO_INHERITED control flags when + * coping them from other security descriptor. + * + * As we are not modifying existing ACEs, we are compatible with Windows 2000 + * client-side automatic inheritance model and therefore prepare security + * descriptor for SetKernelObjectSecurity() to not clear existing automatic + * inheritance control flags. + * + * Control flags SE_DACL_AUTO_INHERITED and SE_SACL_AUTO_INHERITED are set + * into security object only when they are set together with set-only flags + * SE_DACL_AUTO_INHERIT_REQ and SE_SACL_AUTO_INHERIT_REQ. Those flags are + * never received by GetKernelObjectSecurity() and are just commands for + * SetKernelObjectSecurity() how to interpret SE_DACL_AUTO_INHERITED and + * SE_SACL_AUTO_INHERITED flags. + * + * Function symbol SetSecurityDescriptorControl is not available in the + * older versions of advapi32.dll library, so resolve it at runtime. + */ + + version.dwOSVersionInfoSize = sizeof(version); + if (!GetVersionEx(&version) || + version.dwPlatformId != VER_PLATFORM_WIN32_NT || + version.dwMajorVersion < 5) + return TRUE; + + if (!GetSecurityDescriptorControl(security_descriptor, &control, &revision)) + return FALSE; + + bits_mask = 0; + bits_set = 0; + + if (control & SE_DACL_AUTO_INHERITED) + { + bits_mask |= SE_DACL_AUTO_INHERIT_REQ; + bits_set |= SE_DACL_AUTO_INHERIT_REQ; + } + + if (control & SE_SACL_AUTO_INHERITED) + { + bits_mask |= SE_SACL_AUTO_INHERIT_REQ; + bits_set |= SE_SACL_AUTO_INHERIT_REQ; + } + + if (!bits_mask) + return TRUE; + + advapi32 = GetModuleHandle(TEXT("advapi32.dll")); + if (!advapi32) + return FALSE; + + MySetSecurityDescriptorControl = (SetSecurityDescriptorControlProt)(LPVOID)GetProcAddress(advapi32, "SetSecurityDescriptorControl"); + if (!MySetSecurityDescriptorControl) + return FALSE; + + if (!MySetSecurityDescriptorControl(security_descriptor, bits_mask, bits_set)) + return FALSE; + + return TRUE; +} + +/* + * Grant particular permissions in the primary access token of the specified + * process for the owner of current thread token and set old DACL of the + * process access token for reverting permissions. Security descriptor is + * just memory buffer for old DACL. + */ +static BOOL +grant_process_token_dacl_permissions(HANDLE process, DWORD permissions, HANDLE *token, PSECURITY_DESCRIPTOR *old_security_descriptor) +{ + TOKEN_OWNER *owner; + PACL old_dacl; + BOOL old_dacl_present; + BOOL old_dacl_defaulted; + PACL new_dacl; + WORD new_dacl_size; + PSECURITY_DESCRIPTOR new_security_descriptor; + DWORD length; + + owner = get_current_token_owner(); + if (!owner) + return FALSE; + + /* + * READ_CONTROL is required for GetSecurityInfo(DACL_SECURITY_INFORMATION) + * and WRITE_DAC is required for SetSecurityInfo(DACL_SECURITY_INFORMATION). + */ + if (!OpenProcessToken(process, READ_CONTROL | WRITE_DAC, token)) + { + LocalFree(owner); + return FALSE; + } + + if (!GetKernelObjectSecurity(*token, DACL_SECURITY_INFORMATION, NULL, 0, &length) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) + { + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + +retry: + *old_security_descriptor = (PSECURITY_DESCRIPTOR)LocalAlloc(LPTR, length); + if (!*old_security_descriptor) + { + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + if (!GetKernelObjectSecurity(*token, DACL_SECURITY_INFORMATION, *old_security_descriptor, length, &length)) + { + /* + * Length of the security descriptor between two get calls + * may changes (e.g. by another thread of process), so retry. + */ + if (GetLastError() == ERROR_INSUFFICIENT_BUFFER) + { + LocalFree(*old_security_descriptor); + goto retry; + } + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + if (!prepare_security_descriptor_for_set_operation(*old_security_descriptor)) + { + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + /* Retrieve the current DACL from security descriptor including present and defaulted properties. */ + if (!GetSecurityDescriptorDacl(*old_security_descriptor, &old_dacl_present, &old_dacl, &old_dacl_defaulted)) + { + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + /* + * If DACL is not present then system grants full access to everyone. It this + * case do not modify DACL as it just adds one ACL allow rule for us, which + * automatically disallow access to anybody else which had access before. + */ + if (!old_dacl_present || !old_dacl) + { + LocalFree(*old_security_descriptor); + LocalFree(owner); + *old_security_descriptor = NULL; + return TRUE; + } + + /* Create new DACL which would be copy of the current old one. */ + new_dacl_size = old_dacl->AclSize + sizeof(ACCESS_ALLOWED_ACE) + GetLengthSid(owner->Owner) - sizeof(DWORD); + new_dacl = (PACL)LocalAlloc(LPTR, new_dacl_size); + if (!new_dacl) + { + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + /* + * Initialize new DACL structure to the same format as was the old one. + * Set new explicit access for the owner of the current thread access + * token with non-inherited granting access to specified permissions. + * This permission is added in the first ACE, so has the highest priority. + */ + if (!InitializeAcl(new_dacl, new_dacl_size, old_dacl->AclRevision) || + !AddAccessAllowedAce(new_dacl, ACL_REVISION2, permissions, owner->Owner)) + { + LocalFree(new_dacl); + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + /* + * Now (after setting our new permissions) append all ACE entries from the + * old DACL to the new DACL, which preserve all other existing permissions. + */ + if (old_dacl->AceCount > 0) + { + WORD ace_index; + LPVOID ace; + + for (ace_index = 0; ace_index < old_dacl->AceCount; ace_index++) + { + if (!GetAce(old_dacl, ace_index, &ace) || + !AddAce(new_dacl, old_dacl->AclRevision, MAXDWORD, ace, ((PACE_HEADER)ace)->AceSize)) + { + LocalFree(new_dacl); + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + } + } + + /* + * Create copy of the old security descriptor, so we can modify its DACL. + * Function SetSecurityDescriptorDacl() works only with security descriptors + * in absolute format. So use our helper function create_relsd_from_abssd() + * for converting security descriptor from relative format (which is returned + * by GetKernelObjectSecurity() function) to the absolute format. + */ + new_security_descriptor = create_relsd_from_abssd(*old_security_descriptor); + if (!new_security_descriptor) + { + LocalFree(new_dacl); + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + /* + * In the new security descriptor replace old DACL by the new DACL (which has + * new permissions) and then set this new security descriptor to the token, + * so token would have new access permissions. + */ + if (!SetSecurityDescriptorDacl(new_security_descriptor, TRUE, new_dacl, FALSE) || + !SetKernelObjectSecurity(*token, DACL_SECURITY_INFORMATION, new_security_descriptor)) + { + LocalFree(new_security_descriptor); + LocalFree(new_dacl); + LocalFree(*old_security_descriptor); + LocalFree(owner); + CloseHandle(*token); + return FALSE; + } + + LocalFree(new_security_descriptor); + LocalFree(new_dacl); + LocalFree(owner); + return TRUE; +} + +/* + * Revert particular granted permissions in specified access token done by + * grant_process_token_dacl_permissions() call. + */ +static VOID +revert_token_dacl_permissions(HANDLE token, PSECURITY_DESCRIPTOR old_security_descriptor) +{ + SetKernelObjectSecurity(token, DACL_SECURITY_INFORMATION, old_security_descriptor); + LocalFree(old_security_descriptor); + CloseHandle(token); +} + +/* + * Open process handle specified by the process id with the query right and + * optionally also with vm read right. + */ +static HANDLE +open_process_for_query(DWORD pid, BOOL with_vm_read) +{ + BOOL revert_only_privilege; + LUID luid_debug_privilege; + OSVERSIONINFO version; + DWORD process_right; + HANDLE revert_token; + HANDLE process; + + /* + * Some processes on Windows Vista and higher systems can be opened only + * with PROCESS_QUERY_LIMITED_INFORMATION right. This right is enough + * for accessing primary process token. But this right is not supported + * on older pre-Vista systems. When the current thread on these older + * systems does not have Debug privilege then OpenProcess() fails with + * ERROR_ACCESS_DENIED. If the current thread has Debug privilege then + * OpenProcess() success and returns handle to requested process. + * Problem is that this handle does not have PROCESS_QUERY_INFORMATION + * right and so cannot be used for accessing primary process token + * on those older systems. Moreover it has zero rights and therefore + * such handle is fully useless. So never try to use open process with + * PROCESS_QUERY_LIMITED_INFORMATION right on older systems than + * Windows Vista (NT 6.0). + */ + version.dwOSVersionInfoSize = sizeof(version); + if (GetVersionEx(&version) && + version.dwPlatformId == VER_PLATFORM_WIN32_NT && + version.dwMajorVersion >= 6) + process_right = PROCESS_QUERY_LIMITED_INFORMATION; + else + process_right = PROCESS_QUERY_INFORMATION; + + if (with_vm_read) + process_right |= PROCESS_VM_READ; + + process = OpenProcess(process_right, FALSE, pid); + if (process) + return process; + + /* + * It is possible to open only processes to which owner of the current + * thread access token has permissions. For opening other processing it + * is required to have Debug privilege enabled. By default local + * administrators have this privilege, but it is disabled. So try to + * enable it and then try to open process again. + */ + + if (!LookupPrivilegeValue(NULL, SE_DEBUG_NAME, &luid_debug_privilege)) + return NULL; + + if (!win32_enable_privilege(luid_debug_privilege, &revert_token, &revert_only_privilege)) + return NULL; + + process = OpenProcess(process_right, FALSE, pid); + + win32_revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); + + return process; +} + +/* + * Check if process image path name (wide string) matches exe file name + * (7-bit ASCII string). Do case-insensitive string comparison. Process + * image path name can be in any namespace format (DOS, Win32, UNC, ...). + */ +static BOOL +check_process_name(LPCWSTR path, DWORD path_length, LPCSTR exe_file) +{ + DWORD exe_file_length; + WCHAR c1; + UCHAR c2; + DWORD i; + + exe_file_length = 0; + while (exe_file[exe_file_length] != '\0') + exe_file_length++; + + /* Path must have backslash before exe file name. */ + if (exe_file_length >= path_length || + path[path_length-exe_file_length-1] != L'\\') + return FALSE; + + for (i = 0; i < exe_file_length; i++) + { + c1 = path[path_length-exe_file_length+i]; + c2 = exe_file[i]; + /* + * Input string for comparison is 7-bit ASCII and file name part + * of path must not contain backslash as it is path separator. + */ + if (c1 >= 0x80 || c2 >= 0x80 || c1 == L'\\') + return FALSE; + if (c1 >= L'a' && c1 <= L'z') + c1 -= L'a' - L'A'; + if (c2 >= 'a' && c2 <= 'z') + c2 -= 'a' - 'A'; + if (c1 != c2) + return FALSE; + } + + return TRUE; +} + +/* Open process handle with the query right specified by process exe file. */ +HANDLE +win32_find_and_open_process_for_query(LPCSTR exe_file) +{ + GetProcessImageFileNameWProt MyGetProcessImageFileNameW; + GetModuleFileNameExWProt MyGetModuleFileNameExW; + EnumProcessesProt MyEnumProcesses; + HMODULE kernel32, psapi; + UINT prev_error_mode; + DWORD partial_retry; + BOOL found_process; + DWORD size, length; + DWORD *processes; + HANDLE process; + LPWSTR path; + DWORD error; + DWORD count; + DWORD i; + + psapi = NULL; + kernel32 = GetModuleHandle(TEXT("kernel32.dll")); + if (!kernel32) + return NULL; + + /* + * On Windows 7 and higher systems these functions are available in + * kernel32.dll library with K32 prefix. + */ + MyGetModuleFileNameExW = NULL; + MyGetProcessImageFileNameW = (GetProcessImageFileNameWProt)(LPVOID)GetProcAddress(kernel32, "K32GetProcessImageFileNameW"); + MyEnumProcesses = (EnumProcessesProt)(LPVOID)GetProcAddress(kernel32, "K32EnumProcesses"); + if (!MyGetProcessImageFileNameW || !MyEnumProcesses) + { + /* + * On older NT-based systems these functions are available in + * psapi.dll library without K32 prefix. + */ + prev_error_mode = win32_change_error_mode(SEM_FAILCRITICALERRORS); + psapi = LoadLibrary(TEXT("psapi.dll")); + win32_change_error_mode(prev_error_mode); + + if (!psapi) + return NULL; + + /* + * Function GetProcessImageFileNameW() is available in + * Windows XP and higher systems. On older versions is + * available function GetModuleFileNameExW(). + */ + MyGetProcessImageFileNameW = (GetProcessImageFileNameWProt)(LPVOID)GetProcAddress(psapi, "GetProcessImageFileNameW"); + MyGetModuleFileNameExW = (GetModuleFileNameExWProt)(LPVOID)GetProcAddress(psapi, "GetModuleFileNameExW"); + MyEnumProcesses = (EnumProcessesProt)(LPVOID)GetProcAddress(psapi, "EnumProcesses"); + if ((!MyGetProcessImageFileNameW && !MyGetModuleFileNameExW) || !MyEnumProcesses) + { + FreeLibrary(psapi); + return NULL; + } + } + + /* Make initial buffer size for 1024 processes. */ + size = 1024 * sizeof(*processes); + +retry: + processes = (DWORD *)LocalAlloc(LPTR, size); + if (!processes) + { + if (psapi) + FreeLibrary(psapi); + return NULL; + } + + if (!MyEnumProcesses(processes, size, &length)) + { + LocalFree(processes); + if (psapi) + FreeLibrary(psapi); + return NULL; + } + else if (size == length) + { + /* + * There is no indication given when the buffer is too small to + * store all process identifiers. Therefore if returned length + * is same as buffer size there can be more processes. Call + * again with larger buffer. + */ + LocalFree(processes); + size *= 2; + goto retry; + } + + process = NULL; + count = length / sizeof(*processes); + + for (i = 0; i < count; i++) + { + /* Skip System Idle Process. */ + if (processes[i] == 0) + continue; + + /* + * Function GetModuleFileNameExW() requires additional + * PROCESS_VM_READ right as opposite to function + * GetProcessImageFileNameW() which does not need it. + */ + process = open_process_for_query(processes[i], MyGetProcessImageFileNameW ? FALSE : TRUE); + if (!process) + continue; + + /* + * Set initial buffer size to 256 (wide) characters. + * Final path length on the modern NT-based systems can be also larger. + */ + size = 256; + found_process = FALSE; + partial_retry = 0; + +retry_path: + path = (LPWSTR)LocalAlloc(LPTR, size * sizeof(*path)); + if (!path) + goto end_path; + + if (MyGetProcessImageFileNameW) + length = MyGetProcessImageFileNameW(process, path, size); + else + length = MyGetModuleFileNameExW(process, NULL, path, size); + + error = GetLastError(); + + /* + * GetModuleFileNameEx() returns zero and signal error ERROR_PARTIAL_COPY + * when remote process is in the middle of updating its module table. + * Sleep 10 ms and try again, max 10 attempts. + */ + if (!MyGetProcessImageFileNameW) + { + if (length == 0 && error == ERROR_PARTIAL_COPY && partial_retry++ < 10) + { + Sleep(10); + goto retry_path; + } + partial_retry = 0; + } + + /* + * When buffer is too small then function GetModuleFileNameEx() returns + * its size argument on older systems (Windows XP) or its size minus + * argument one on new systems (Windows 10) without signalling any error. + * Function GetProcessImageFileNameW() on the other hand returns zero + * value and signals error ERROR_INSUFFICIENT_BUFFER. So in all these + * cases call function again with larger buffer. + */ + + if (MyGetProcessImageFileNameW && length == 0 && error != ERROR_INSUFFICIENT_BUFFER) + goto end_path; + + if ((MyGetProcessImageFileNameW && length == 0) || + (!MyGetProcessImageFileNameW && (length == size || length == size-1))) + { + LocalFree(path); + size *= 2; + goto retry_path; + } + + if (length && check_process_name(path, length, exe_file)) + found_process = TRUE; + +end_path: + if (path) + { + LocalFree(path); + path = NULL; + } + + if (found_process) + break; + + CloseHandle(process); + process = NULL; + } + + LocalFree(processes); + + if (psapi) + FreeLibrary(psapi); + + return process; +} + +/* + * Try to open primary access token of the particular process with specified + * rights. Before opening access token try to adjust DACL permissions of the + * primary process access token, so following open does not fail on error + * related to no open permissions. Revert DACL permissions after open attempt. + * As following steps are not atomic, try to execute them more times in case + * of possible race conditions caused by other threads or processes. + */ +static HANDLE +try_grant_permissions_and_open_process_token(HANDLE process, DWORD rights) +{ + PSECURITY_DESCRIPTOR old_security_descriptor; + HANDLE grant_token; + HANDLE token; + DWORD retry; + DWORD error; + + /* + * This code is not atomic. Between grant and open calls can other + * thread or process change or revert permissions. So try to execute + * it more times. + */ + for (retry = 0; retry < 10; retry++) + { + if (!grant_process_token_dacl_permissions(process, rights, &grant_token, &old_security_descriptor)) + return NULL; + if (!OpenProcessToken(process, rights, &token)) + { + token = NULL; + error = GetLastError(); + } + if (old_security_descriptor) + revert_token_dacl_permissions(grant_token, old_security_descriptor); + if (token) + return token; + else if (error != ERROR_ACCESS_DENIED) + return NULL; + } + + return NULL; +} + +/* + * Open primary access token of particular process handle with specified rights. + * If permissions for specified rights are missing then try to grant them. + */ +HANDLE +win32_open_process_token_with_rights(HANDLE process, DWORD rights) +{ + HANDLE old_token; + HANDLE token; + + /* First try to open primary access token of process handle directly. */ + if (OpenProcessToken(process, rights, &token)) + return token; + + /* + * If opening failed then it means that owner of the current thread + * access token does not have permission for it. Try it again with + * primary process access token. + */ + if (change_token_to_primary(&old_token)) + { + if (!OpenProcessToken(process, rights, &token)) + token = NULL; + win32_revert_to_token(old_token); + if (token) + return token; + } + + /* + * If opening is still failing then try to grant specified permissions + * for the current thread and try to open it again. + */ + token = try_grant_permissions_and_open_process_token(process, rights); + if (token) + return token; + + /* + * And if it is still failing then try it again with granting + * permissions for the primary process token of the current process. + */ + if (change_token_to_primary(&old_token)) + { + token = try_grant_permissions_and_open_process_token(process, rights); + win32_revert_to_token(old_token); + if (token) + return token; + } + + /* + * TODO: Sorry, no other option for now... + * It could be possible to use Take Ownership Name privilege to + * temporary change token owner of specified process to the owner of + * the current thread token, grant permissions for current thread in + * that process token, change ownership back to original one, open + * that process token and revert granted permissions. But this is + * not implemented yet. + */ + return NULL; +} + +/* + * Call supplied function with its argument and if it fails with + * ERROR_PRIVILEGE_NOT_HELD then try to enable Tcb privilege and + * call function with its argument again. + */ +BOOL +win32_call_func_with_tcb_privilege(BOOL (*function)(LPVOID), LPVOID argument) +{ + LUID luid_tcb_privilege; + LUID luid_impersonate_privilege; + + HANDLE revert_token_tcb_privilege; + BOOL revert_only_tcb_privilege; + + HANDLE revert_token_impersonate_privilege; + BOOL revert_only_impersonate_privilege; + + BOOL impersonate_privilege_enabled; + + BOOL revert_to_old_token; + HANDLE old_token; + + HANDLE lsass_process; + HANDLE lsass_token; + + BOOL ret; + + impersonate_privilege_enabled = FALSE; + revert_to_old_token = FALSE; + lsass_token = NULL; + old_token = NULL; + + /* Call supplied function. */ + ret = function(argument); + if (ret || GetLastError() != ERROR_PRIVILEGE_NOT_HELD) + goto ret; + + /* + * If function call failed with ERROR_PRIVILEGE_NOT_HELD + * error then it means that the current thread token does not have + * Tcb privilege enabled. Try to enable it. + */ + + if (!LookupPrivilegeValue(NULL, SE_TCB_NAME, &luid_tcb_privilege)) + goto err_privilege_not_held; + + /* + * If the current thread has already Tcb privilege enabled then there + * is some additional unhanded restriction. + */ + if (win32_have_privilege(luid_tcb_privilege)) + goto err_privilege_not_held; + + /* Try to enable Tcb privilege and try function call again. */ + if (win32_enable_privilege(luid_tcb_privilege, &revert_token_tcb_privilege, &revert_only_tcb_privilege)) + { + ret = function(argument); + win32_revert_privilege(luid_tcb_privilege, revert_token_tcb_privilege, revert_only_tcb_privilege); + goto ret; + } + + /* + * If enabling of Tcb privilege failed then it means that current thread + * does not have this privilege. But current process may have it. So try it + * again with primary process access token. + */ + + /* + * If system supports Impersonate privilege (Windows 2000 SP4 or higher) then + * all future actions in this function require this Impersonate privilege. + * So try to enable it in case it is currently disabled. + */ + if (LookupPrivilegeValue(NULL, SE_IMPERSONATE_NAME, &luid_impersonate_privilege) && + !win32_have_privilege(luid_impersonate_privilege)) + { + /* + * If current thread does not have Impersonate privilege enabled + * then first try to enable it just for the current thread. If + * it is not possible to enable it just for the current thread + * then try it to enable globally for whole process (which + * affects all process threads). Both actions will be reverted + * at the end of this function. + */ + if (win32_enable_privilege(luid_impersonate_privilege, &revert_token_impersonate_privilege, &revert_only_impersonate_privilege)) + { + impersonate_privilege_enabled = TRUE; + } + else if (win32_enable_privilege(luid_impersonate_privilege, NULL, NULL)) + { + impersonate_privilege_enabled = TRUE; + revert_token_impersonate_privilege = NULL; + revert_only_impersonate_privilege = TRUE; + } + else + { + goto err_privilege_not_held; + } + + /* + * Now when Impersonate privilege is enabled, try to enable Tcb + * privilege again. Enabling other privileges for the current + * thread requires Impersonate privilege, so enabling Tcb again + * could now pass. + */ + if (win32_enable_privilege(luid_tcb_privilege, &revert_token_tcb_privilege, &revert_only_tcb_privilege)) + { + ret = function(argument); + win32_revert_privilege(luid_tcb_privilege, revert_token_tcb_privilege, revert_only_tcb_privilege); + goto ret; + } + } + + /* + * If enabling Tcb privilege failed then it means that the current + * thread access token does not have this privilege or does not + * have permission to adjust privileges. + * + * Try to use more privileged token from Local Security Authority + * Subsystem Service process (lsass.exe) which has Tcb privilege. + * Retrieving this more privileged token is possible for local + * administrators (unless it was disabled by local administrators). + */ + + lsass_process = win32_find_and_open_process_for_query("lsass.exe"); + if (!lsass_process) + goto err_privilege_not_held; + + /* + * Open primary lsass.exe process access token with query and duplicate + * rights. Just these two rights are required for impersonating other + * primary process token (impersonate right is really not required!). + */ + lsass_token = win32_open_process_token_with_rights(lsass_process, TOKEN_QUERY | TOKEN_DUPLICATE); + + CloseHandle(lsass_process); + + if (!lsass_token) + goto err_privilege_not_held; + + /* + * After successful open of the primary lsass.exe process access token, + * assign its copy for the current thread. + */ + if (!win32_change_token(lsass_token, &old_token)) + goto err_privilege_not_held; + + revert_to_old_token = TRUE; + + ret = function(argument); + if (ret || GetLastError() != ERROR_PRIVILEGE_NOT_HELD) + goto ret; + + /* + * Now current thread is not using primary process token anymore + * but is using custom access token. There is no need to revert + * enabled Tcb privilege as the whole custom access token would + * be reverted. So there is no need to setup revert method for + * enabling privilege. + */ + if (win32_have_privilege(luid_tcb_privilege) || + !win32_enable_privilege(luid_tcb_privilege, NULL, NULL)) + goto err_privilege_not_held; + + ret = function(argument); + goto ret; + +err_privilege_not_held: + SetLastError(ERROR_PRIVILEGE_NOT_HELD); + ret = FALSE; + goto ret; + +ret: + if (revert_to_old_token) + win32_revert_to_token(old_token); + + if (impersonate_privilege_enabled) + win32_revert_privilege(luid_impersonate_privilege, revert_token_impersonate_privilege, revert_only_impersonate_privilege); + + if (lsass_token) + CloseHandle(lsass_token); + + return ret; +} diff --git a/lib/win32-helpers.h b/lib/win32-helpers.h new file mode 100644 index 0000000..c415439 --- /dev/null +++ b/lib/win32-helpers.h @@ -0,0 +1,13 @@ +const char *win32_strerror(DWORD win32_error_id); +BOOL win32_is_non_nt_system(void); +BOOL win32_is_32bit_on_64bit_system(void); +BOOL win32_is_32bit_on_win8_64bit_system(void); +UINT win32_change_error_mode(UINT new_mode); +BOOL win32_have_privilege(LUID luid_privilege); +BOOL win32_enable_privilege(LUID luid_privilege, HANDLE *revert_token, BOOL *revert_only_privilege); +VOID win32_revert_privilege(LUID luid_privilege, HANDLE revert_token, BOOL revert_only_privilege); +BOOL win32_change_token(HANDLE new_token, HANDLE *old_token); +VOID win32_revert_to_token(HANDLE token); +HANDLE win32_find_and_open_process_for_query(LPCSTR exe_file); +HANDLE win32_open_process_token_with_rights(HANDLE process, DWORD rights); +BOOL win32_call_func_with_tcb_privilege(BOOL (*function)(LPVOID), LPVOID argument); diff --git a/lib/win32-kldbg.c b/lib/win32-kldbg.c index c051f1a..33f5751 100644 --- a/lib/win32-kldbg.c +++ b/lib/win32-kldbg.c @@ -15,7 +15,7 @@ #include <string.h> /* for memset() and memcpy() */ #include "internal.h" -#include "i386-io-windows.h" +#include "win32-helpers.h" #ifndef ERROR_NOT_FOUND #define ERROR_NOT_FOUND 1168 @@ -108,53 +108,6 @@ static HANDLE kldbg_dev = INVALID_HANDLE_VALUE; static BOOL win32_kldbg_pci_bus_data(BOOL WriteBusData, USHORT SegmentNumber, BYTE BusNumber, BYTE DeviceNumber, BYTE FunctionNumber, USHORT Address, PVOID Buffer, ULONG BufferSize, LPDWORD Length); -static const char * -win32_strerror(DWORD win32_error_id) -{ - /* - * Use static buffer which is large enough. - * Hopefully no Win32 API error message string is longer than 4 kB. - */ - static char buffer[4096]; - DWORD len; - - len = FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS, NULL, win32_error_id, MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), buffer, sizeof(buffer), NULL); - - /* FormatMessage() automatically appends ".\r\n" to the error message. */ - if (len && buffer[len-1] == '\n') - buffer[--len] = '\0'; - if (len && buffer[len-1] == '\r') - buffer[--len] = '\0'; - if (len && buffer[len-1] == '.') - buffer[--len] = '\0'; - - if (!len) - sprintf(buffer, "Unknown Win32 error %lu", win32_error_id); - - return buffer; -} - -static BOOL -win32_is_32bit_on_64bit_system(void) -{ - BOOL (WINAPI *MyIsWow64Process)(HANDLE, PBOOL); - HMODULE kernel32; - BOOL is_wow64; - - kernel32 = GetModuleHandle(TEXT("kernel32.dll")); - if (!kernel32) - return FALSE; - - MyIsWow64Process = (void *)GetProcAddress(kernel32, "IsWow64Process"); - if (!MyIsWow64Process) - return FALSE; - - if (!MyIsWow64Process(GetCurrentProcess(), &is_wow64)) - return FALSE; - - return is_wow64; -} - static WORD win32_get_current_process_machine(void) { @@ -526,7 +479,7 @@ win32_kldbg_setup(struct pci_access *a) return 0; } - if (!enable_privilege(luid_debug_privilege, &revert_token, &revert_only_privilege)) + if (!win32_enable_privilege(luid_debug_privilege, &revert_token, &revert_only_privilege)) { a->debug("Process does not have right to enable Debug privilege."); CloseHandle(kldbg_dev); @@ -548,7 +501,7 @@ win32_kldbg_setup(struct pci_access *a) CloseHandle(kldbg_dev); kldbg_dev = INVALID_HANDLE_VALUE; - revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); + win32_revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); revert_token = NULL; revert_only_privilege = FALSE; return 0; @@ -584,7 +537,7 @@ win32_kldbg_cleanup(struct pci_access *a UNUSED) if (debug_privilege_enabled) { - revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); + win32_revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); revert_token = NULL; revert_only_privilege = FALSE; debug_privilege_enabled = FALSE; diff --git a/lib/win32-sysdbg.c b/lib/win32-sysdbg.c index 6b17f37..99ce607 100644 --- a/lib/win32-sysdbg.c +++ b/lib/win32-sysdbg.c @@ -11,7 +11,7 @@ #include <windows.h> #include "internal.h" -#include "i386-io-windows.h" +#include "win32-helpers.h" #ifndef NTSTATUS #define NTSTATUS LONG @@ -125,9 +125,9 @@ win32_sysdbg_setup(struct pci_access *a) if (win32_sysdbg_initialized) return 1; - prev_error_mode = change_error_mode(SEM_FAILCRITICALERRORS); + prev_error_mode = win32_change_error_mode(SEM_FAILCRITICALERRORS); ntdll = LoadLibrary(TEXT("ntdll.dll")); - change_error_mode(prev_error_mode); + win32_change_error_mode(prev_error_mode); if (!ntdll) { a->debug("Cannot open ntdll.dll library."); @@ -179,7 +179,7 @@ win32_sysdbg_setup(struct pci_access *a) return 0; } - if (!enable_privilege(luid_debug_privilege, &revert_token, &revert_only_privilege)) + if (!win32_enable_privilege(luid_debug_privilege, &revert_token, &revert_only_privilege)) { a->debug("Cannot enable Debug privilege."); FreeLibrary(ntdll); @@ -197,7 +197,7 @@ win32_sysdbg_setup(struct pci_access *a) return 1; } - revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); + win32_revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); revert_token = NULL; revert_only_privilege = FALSE; @@ -244,7 +244,7 @@ win32_sysdbg_cleanup(struct pci_access *a UNUSED) if (debug_privilege_enabled) { - revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); + win32_revert_privilege(luid_debug_privilege, revert_token, revert_only_privilege); revert_token = NULL; revert_only_privilege = FALSE; debug_privilege_enabled = FALSE; diff --git a/lmr/lmr.h b/lmr/lmr.h new file mode 100644 index 0000000..7375c33 --- /dev/null +++ b/lmr/lmr.h @@ -0,0 +1,228 @@ +/* + * The PCI Utilities -- Margining utility main header + * + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _LMR_H +#define _LMR_H + +#include <stdbool.h> + +#include "pciutils.h" + +#define MARGIN_STEP_MS 1000 + +#define MARGIN_TIM_MIN 20 +#define MARGIN_TIM_RECOMMEND 30 +#define MARGIN_VOLT_MIN 50 + +enum margin_hw { MARGIN_HW_DEFAULT, MARGIN_ICE_LAKE_RC }; + +/* PCI Device wrapper for margining functions */ +struct margin_dev { + struct pci_dev *dev; + int lmr_cap_addr; + u8 width; + u8 retimers_n; + u8 link_speed; + + enum margin_hw hw; + + /* Saved Device settings to restore after margining */ + u8 aspm; + bool hasd; // Hardware Autonomous Speed Disable + bool hawd; // Hardware Autonomous Width Disable +}; + +struct margin_link { + struct margin_dev down_port; + struct margin_dev up_port; +}; + +/* Specification Revision 5.0 Table 8-11 */ +struct margin_params { + bool ind_error_sampler; + bool sample_report_method; + bool ind_left_right_tim; + bool ind_up_down_volt; + bool volt_support; + + u8 max_lanes; + + u8 timing_steps; + u8 timing_offset; + + u8 volt_steps; + u8 volt_offset; + + u8 sample_rate_v; + u8 sample_rate_t; +}; + +/* Step Margin Execution Status - Step command response */ +enum margin_step_exec_sts { + MARGIN_NAK = 0, // NAK/Set up for margin + MARGIN_LIM, // Too many errors (device limit) + MARGIN_THR // Test threshold has been reached +}; + +enum margin_dir { VOLT_UP = 0, VOLT_DOWN, TIM_LEFT, TIM_RIGHT }; + +/* Margining results of one lane of the receiver */ +struct margin_res_lane { + u8 lane; + u8 steps[4]; + enum margin_step_exec_sts statuses[4]; +}; + +/* Reason not to run margining test on the Link/Receiver */ +enum margin_test_status { + MARGIN_TEST_OK = 0, + MARGIN_TEST_READY_BIT, + MARGIN_TEST_CAPS, + + // Couldn't run test + MARGIN_TEST_PREREQS, + MARGIN_TEST_ARGS_LANES, + MARGIN_TEST_ARGS_RECVS, + MARGIN_TEST_ASPM +}; + +/* All lanes Receiver results */ +struct margin_results { + u8 recvn; // Receiver Number + struct margin_params params; + bool lane_reversal; + u8 link_speed; + + enum margin_test_status test_status; + + /* Used to convert steps to physical quantity. + Calculated from MaxOffset and NumSteps */ + double tim_coef; + double volt_coef; + + bool tim_off_reported; + bool volt_off_reported; + + u8 lanes_n; + struct margin_res_lane *lanes; +}; + +/* pcilmr arguments */ +struct margin_args { + u8 steps_t; // 0 == use NumTimingSteps + u8 steps_v; // 0 == use NumVoltageSteps + u8 parallel_lanes; // [1; MaxLanes + 1] + u8 error_limit; // [0; 63] + u8 recvs[6]; // Receivers Numbers + u8 recvs_n; // 0 == margin all available receivers + u8 lanes[32]; // Lanes to Margin + u8 lanes_n; // 0 == margin all available lanes + bool run_margin; // Or print params only + u8 verbosity; // 0 - basic; + // 1 - add info about remaining time and lanes in progress during margining + + u64 *steps_utility; // For ETA logging +}; + +/* Receiver structure */ +struct margin_recv { + struct margin_dev *dev; + u8 recvn; // Receiver Number + bool lane_reversal; + struct margin_params *params; + + u8 parallel_lanes; + u8 error_limit; +}; + +struct margin_lanes_data { + struct margin_recv *recv; + + struct margin_res_lane *results; + u8 *lanes_numbers; + u8 lanes_n; + + bool ind; + enum margin_dir dir; + + u8 steps_lane_done; + u8 steps_lane_total; + u64 *steps_utility; + + u8 verbosity; +}; + +/* margin_hw */ + +/* Verify that devices form the link with 16 GT/s or 32 GT/s data rate */ +bool margin_verify_link(struct pci_dev *down_port, struct pci_dev *up_port); + +/* Check Margining Ready bit from Margining Port Status Register */ +bool margin_check_ready_bit(struct pci_dev *dev); + +/* Verify link and fill wrappers */ +bool margin_fill_link(struct pci_dev *down_port, struct pci_dev *up_port, + struct margin_link *wrappers); + +/* Disable ASPM, set Hardware Autonomous Speed/Width Disable bits */ +bool margin_prep_link(struct margin_link *link); + +/* Restore ASPM, Hardware Autonomous Speed/Width settings */ +void margin_restore_link(struct margin_link *link); + +/* margin */ + +/* Fill margin_params without calling other functions */ +bool margin_read_params(struct pci_access *pacc, struct pci_dev *dev, u8 recvn, + struct margin_params *params); + +enum margin_test_status margin_process_args(struct margin_dev *dev, struct margin_args *args); + +/* Awaits that args are prepared through process_args. + Returns number of margined Receivers through recvs_n */ +struct margin_results *margin_test_link(struct margin_link *link, struct margin_args *args, + u8 *recvs_n); + +void margin_free_results(struct margin_results *results, u8 results_n); + +/* margin_log */ + +extern bool margin_global_logging; +extern bool margin_print_domain; + +void margin_log(char *format, ...); + +/* b:d.f -> b:d.f */ +void margin_log_bdfs(struct pci_dev *down_port, struct pci_dev *up_port); + +/* Print Link header (bdfs, width, speed) */ +void margin_log_link(struct margin_link *link); + +void margin_log_params(struct margin_params *params); + +/* Print receiver number */ +void margin_log_recvn(struct margin_recv *recv); + +/* Print full info from Receiver struct */ +void margin_log_receiver(struct margin_recv *recv); + +/* Margining in progress log */ +void margin_log_margining(struct margin_lanes_data arg); + +void margin_log_hw_quirks(struct margin_recv *recv); + +/* margin_results */ + +void margin_results_print_brief(struct margin_results *results, u8 recvs_n); + +void margin_results_save_csv(struct margin_results *results, u8 recvs_n, char *dir, + struct pci_dev *up_port); + +#endif diff --git a/lmr/margin.c b/lmr/margin.c new file mode 100644 index 0000000..a8c6571 --- /dev/null +++ b/lmr/margin.c @@ -0,0 +1,588 @@ +/* + * The PCI Utilities -- Obtain the margin information of the Link + * + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <errno.h> +#include <stdlib.h> +#include <time.h> + +#include "lmr.h" + +#ifdef PCI_OS_DJGPP +#include <unistd.h> +#endif + +/* Macro helpers for Margining command parsing */ + +typedef u16 margin_cmd; + +/* Margining command parsing */ + +#define LMR_CMD_RECVN MASK(2, 0) +#define LMR_CMD_TYPE MASK(5, 3) +#define LMR_CMD_PAYLOAD MASK(15, 8) + +// Payload parsing + +// Report Capabilities +#define LMR_PLD_VOLT_SUPPORT BIT(8) +#define LMR_PLD_IND_U_D_VOLT BIT(9) +#define LMR_PLD_IND_L_R_TIM BIT(10) +#define LMR_PLD_SAMPLE_REPORT_METHOD BIT(11) +#define LMR_PLD_IND_ERR_SAMPLER BIT(12) + +#define LMR_PLD_MAX_T_STEPS MASK(13, 8) +#define LMR_PLD_MAX_V_STEPS MASK(14, 8) +#define LMR_PLD_MAX_OFFSET MASK(14, 8) +#define LMR_PLD_MAX_LANES MASK(12, 8) +#define LMR_PLD_SAMPLE_RATE MASK(13, 8) + +// Timing Step +#define LMR_PLD_MARGIN_T_STEPS MASK(13, 8) +#define LMR_PLD_T_GO_LEFT BIT(14) + +// Voltage Timing +#define LMR_PLD_MARGIN_V_STEPS MASK(14, 8) +#define LMR_PLD_V_GO_DOWN BIT(15) + +// Step Response +#define LMR_PLD_ERR_CNT MASK(13, 8) +#define LMR_PLD_MARGIN_STS MASK(15, 14) + +/* Address calc macro for Lanes Margining registers */ + +#define LMR_LANE_CTRL(lmr_cap_addr, lane) ((lmr_cap_addr) + 8 + 4 * (lane)) +#define LMR_LANE_STATUS(lmr_cap_addr, lane) ((lmr_cap_addr) + 10 + 4 * (lane)) + +/* Margining Commands */ + +#define MARG_TIM(go_left, step, recvn) margin_make_cmd(((go_left) << 6) | (step), 3, recvn) +#define MARG_VOLT(go_down, step, recvn) margin_make_cmd(((go_down) << 7) | (step), 4, recvn) + +// Report commands +#define REPORT_CAPS(recvn) margin_make_cmd(0x88, 1, recvn) +#define REPORT_VOL_STEPS(recvn) margin_make_cmd(0x89, 1, recvn) +#define REPORT_TIM_STEPS(recvn) margin_make_cmd(0x8A, 1, recvn) +#define REPORT_TIM_OFFSET(recvn) margin_make_cmd(0x8B, 1, recvn) +#define REPORT_VOL_OFFSET(recvn) margin_make_cmd(0x8C, 1, recvn) +#define REPORT_SAMPL_RATE_V(recvn) margin_make_cmd(0x8D, 1, recvn) +#define REPORT_SAMPL_RATE_T(recvn) margin_make_cmd(0x8E, 1, recvn) +#define REPORT_SAMPLE_CNT(recvn) margin_make_cmd(0x8F, 1, recvn) +#define REPORT_MAX_LANES(recvn) margin_make_cmd(0x90, 1, recvn) + +// Set commands +#define NO_COMMAND margin_make_cmd(0x9C, 7, 0) +#define CLEAR_ERROR_LOG(recvn) margin_make_cmd(0x55, 2, recvn) +#define GO_TO_NORMAL_SETTINGS(recvn) margin_make_cmd(0xF, 2, recvn) +#define SET_ERROR_LIMIT(error_limit, recvn) margin_make_cmd(0xC0 | (error_limit), 2, recvn) + +static int +msleep(long msec) +{ +#if defined(PCI_OS_WINDOWS) + Sleep(msec); + return 0; +#elif defined(PCI_OS_DJGPP) + if (msec * 1000 < 11264) + usleep(11264); + else + usleep(msec * 1000); + return 0; +#else + struct timespec ts; + int res; + + if (msec < 0) + { + errno = EINVAL; + return -1; + } + + ts.tv_sec = msec / 1000; + ts.tv_nsec = (msec % 1000) * 1000000; + + do + { + res = nanosleep(&ts, &ts); + } while (res && errno == EINTR); + + return res; +#endif +} + +static margin_cmd +margin_make_cmd(u8 payload, u8 type, u8 recvn) +{ + return SET_REG_MASK(0, LMR_CMD_PAYLOAD, payload) | SET_REG_MASK(0, LMR_CMD_TYPE, type) + | SET_REG_MASK(0, LMR_CMD_RECVN, recvn); +} + +static bool +margin_set_cmd(struct margin_dev *dev, u8 lane, margin_cmd cmd) +{ + pci_write_word(dev->dev, LMR_LANE_CTRL(dev->lmr_cap_addr, lane), cmd); + msleep(10); + return pci_read_word(dev->dev, LMR_LANE_STATUS(dev->lmr_cap_addr, lane)) == cmd; +} + +static bool +margin_report_cmd(struct margin_dev *dev, u8 lane, margin_cmd cmd, margin_cmd *result) +{ + pci_write_word(dev->dev, LMR_LANE_CTRL(dev->lmr_cap_addr, lane), cmd); + msleep(10); + *result = pci_read_word(dev->dev, LMR_LANE_STATUS(dev->lmr_cap_addr, lane)); + return GET_REG_MASK(*result, LMR_CMD_TYPE) == GET_REG_MASK(cmd, LMR_CMD_TYPE) + && GET_REG_MASK(*result, LMR_CMD_RECVN) == GET_REG_MASK(cmd, LMR_CMD_RECVN) + && margin_set_cmd(dev, lane, NO_COMMAND); +} + +static void +margin_apply_hw_quirks(struct margin_recv *recv) +{ + switch (recv->dev->hw) + { + case MARGIN_ICE_LAKE_RC: + if (recv->recvn == 1) + recv->params->volt_offset = 12; + break; + default: + break; + } +} + +static bool +read_params_internal(struct margin_dev *dev, u8 recvn, bool lane_reversal, + struct margin_params *params) +{ + margin_cmd resp; + u8 lane = lane_reversal ? dev->width - 1 : 0; + margin_set_cmd(dev, lane, NO_COMMAND); + bool status = margin_report_cmd(dev, lane, REPORT_CAPS(recvn), &resp); + if (status) + { + params->volt_support = GET_REG_MASK(resp, LMR_PLD_VOLT_SUPPORT); + params->ind_up_down_volt = GET_REG_MASK(resp, LMR_PLD_IND_U_D_VOLT); + params->ind_left_right_tim = GET_REG_MASK(resp, LMR_PLD_IND_L_R_TIM); + params->sample_report_method = GET_REG_MASK(resp, LMR_PLD_SAMPLE_REPORT_METHOD); + params->ind_error_sampler = GET_REG_MASK(resp, LMR_PLD_IND_ERR_SAMPLER); + status = margin_report_cmd(dev, lane, REPORT_VOL_STEPS(recvn), &resp); + } + if (status) + { + params->volt_steps = GET_REG_MASK(resp, LMR_PLD_MAX_V_STEPS); + status = margin_report_cmd(dev, lane, REPORT_TIM_STEPS(recvn), &resp); + } + if (status) + { + params->timing_steps = GET_REG_MASK(resp, LMR_PLD_MAX_T_STEPS); + status = margin_report_cmd(dev, lane, REPORT_TIM_OFFSET(recvn), &resp); + } + if (status) + { + params->timing_offset = GET_REG_MASK(resp, LMR_PLD_MAX_OFFSET); + status = margin_report_cmd(dev, lane, REPORT_VOL_OFFSET(recvn), &resp); + } + if (status) + { + params->volt_offset = GET_REG_MASK(resp, LMR_PLD_MAX_OFFSET); + status = margin_report_cmd(dev, lane, REPORT_SAMPL_RATE_V(recvn), &resp); + } + if (status) + { + params->sample_rate_v = GET_REG_MASK(resp, LMR_PLD_SAMPLE_RATE); + status = margin_report_cmd(dev, lane, REPORT_SAMPL_RATE_T(recvn), &resp); + } + if (status) + { + params->sample_rate_t = GET_REG_MASK(resp, LMR_PLD_SAMPLE_RATE); + status = margin_report_cmd(dev, lane, REPORT_MAX_LANES(recvn), &resp); + } + if (status) + params->max_lanes = GET_REG_MASK(resp, LMR_PLD_MAX_LANES); + return status; +} + +/* Margin all lanes_n lanes simultaneously */ +static void +margin_test_lanes(struct margin_lanes_data arg) +{ + u8 steps_done = 0; + margin_cmd lane_status; + u8 marg_type; + margin_cmd step_cmd; + bool timing = (arg.dir == TIM_LEFT || arg.dir == TIM_RIGHT); + + if (timing) + { + marg_type = 3; + step_cmd = MARG_TIM(arg.dir == TIM_LEFT, steps_done, arg.recv->recvn); + } + else + { + marg_type = 4; + step_cmd = MARG_VOLT(arg.dir == VOLT_DOWN, steps_done, arg.recv->recvn); + } + + bool failed_lanes[32] = { 0 }; + u8 alive_lanes = arg.lanes_n; + + for (int i = 0; i < arg.lanes_n; i++) + { + margin_set_cmd(arg.recv->dev, arg.results[i].lane, NO_COMMAND); + margin_set_cmd(arg.recv->dev, arg.results[i].lane, + SET_ERROR_LIMIT(arg.recv->error_limit, arg.recv->recvn)); + margin_set_cmd(arg.recv->dev, arg.results[i].lane, NO_COMMAND); + arg.results[i].steps[arg.dir] = arg.steps_lane_total; + arg.results[i].statuses[arg.dir] = MARGIN_THR; + } + + while (alive_lanes > 0 && steps_done < arg.steps_lane_total) + { + alive_lanes = 0; + steps_done++; + if (timing) + step_cmd = SET_REG_MASK(step_cmd, LMR_PLD_MARGIN_T_STEPS, steps_done); + else + step_cmd = SET_REG_MASK(step_cmd, LMR_PLD_MARGIN_V_STEPS, steps_done); + + for (int i = 0; i < arg.lanes_n; i++) + { + if (!failed_lanes[i]) + { + alive_lanes++; + int ctrl_addr = LMR_LANE_CTRL(arg.recv->dev->lmr_cap_addr, arg.results[i].lane); + pci_write_word(arg.recv->dev->dev, ctrl_addr, step_cmd); + } + } + msleep(MARGIN_STEP_MS); + + for (int i = 0; i < arg.lanes_n; i++) + { + if (!failed_lanes[i]) + { + int status_addr = LMR_LANE_STATUS(arg.recv->dev->lmr_cap_addr, arg.results[i].lane); + lane_status = pci_read_word(arg.recv->dev->dev, status_addr); + u8 step_status = GET_REG_MASK(lane_status, LMR_PLD_MARGIN_STS); + if (!(GET_REG_MASK(lane_status, LMR_CMD_TYPE) == marg_type + && GET_REG_MASK(lane_status, LMR_CMD_RECVN) == arg.recv->recvn + && step_status == 2 + && GET_REG_MASK(lane_status, LMR_PLD_ERR_CNT) <= arg.recv->error_limit + && margin_set_cmd(arg.recv->dev, arg.results[i].lane, NO_COMMAND))) + { + alive_lanes--; + failed_lanes[i] = true; + arg.results[i].steps[arg.dir] = steps_done - 1; + arg.results[i].statuses[arg.dir] + = (step_status == 3 || step_status == 1 ? MARGIN_NAK : MARGIN_LIM); + } + } + } + + arg.steps_lane_done = steps_done; + margin_log_margining(arg); + } + + for (int i = 0; i < arg.lanes_n; i++) + { + margin_set_cmd(arg.recv->dev, arg.results[i].lane, NO_COMMAND); + margin_set_cmd(arg.recv->dev, arg.results[i].lane, CLEAR_ERROR_LOG(arg.recv->recvn)); + margin_set_cmd(arg.recv->dev, arg.results[i].lane, NO_COMMAND); + margin_set_cmd(arg.recv->dev, arg.results[i].lane, GO_TO_NORMAL_SETTINGS(arg.recv->recvn)); + margin_set_cmd(arg.recv->dev, arg.results[i].lane, NO_COMMAND); + } +} + +/* Awaits that Receiver is prepared through prep_dev function */ +static bool +margin_test_receiver(struct margin_dev *dev, u8 recvn, struct margin_args *args, + struct margin_results *results) +{ + u8 *lanes_to_margin = args->lanes; + u8 lanes_n = args->lanes_n; + + struct margin_params params; + struct margin_recv recv = { .dev = dev, + .recvn = recvn, + .lane_reversal = false, + .params = ¶ms, + .parallel_lanes = args->parallel_lanes ? args->parallel_lanes : 1, + .error_limit = args->error_limit }; + + results->recvn = recvn; + results->lanes_n = lanes_n; + margin_log_recvn(&recv); + + if (!margin_check_ready_bit(dev->dev)) + { + margin_log("\nMargining Ready bit is Clear.\n"); + results->test_status = MARGIN_TEST_READY_BIT; + return false; + } + + if (!read_params_internal(dev, recvn, recv.lane_reversal, ¶ms)) + { + recv.lane_reversal = true; + if (!read_params_internal(dev, recvn, recv.lane_reversal, ¶ms)) + { + margin_log("\nError during caps reading.\n"); + results->test_status = MARGIN_TEST_CAPS; + return false; + } + } + + results->params = params; + + if (recv.parallel_lanes > params.max_lanes + 1) + recv.parallel_lanes = params.max_lanes + 1; + margin_apply_hw_quirks(&recv); + margin_log_hw_quirks(&recv); + + results->tim_off_reported = params.timing_offset != 0; + results->volt_off_reported = params.volt_offset != 0; + double tim_offset = results->tim_off_reported ? (double)params.timing_offset : 50.0; + double volt_offset = results->volt_off_reported ? (double)params.volt_offset : 50.0; + + results->tim_coef = tim_offset / (double)params.timing_steps; + results->volt_coef = volt_offset / (double)params.volt_steps * 10.0; + + results->lane_reversal = recv.lane_reversal; + results->link_speed = dev->link_speed; + results->test_status = MARGIN_TEST_OK; + + margin_log_receiver(&recv); + + results->lanes = xmalloc(sizeof(struct margin_res_lane) * lanes_n); + for (int i = 0; i < lanes_n; i++) + { + results->lanes[i].lane + = recv.lane_reversal ? dev->width - lanes_to_margin[i] - 1 : lanes_to_margin[i]; + } + + if (args->run_margin) + { + if (args->verbosity > 0) + margin_log("\n"); + struct margin_lanes_data lanes_data + = { .recv = &recv, .verbosity = args->verbosity, .steps_utility = args->steps_utility }; + + enum margin_dir dir[] = { TIM_LEFT, TIM_RIGHT, VOLT_UP, VOLT_DOWN }; + + u8 lanes_done = 0; + u8 use_lanes = 0; + u8 steps_t = args->steps_t ? args->steps_t : params.timing_steps; + u8 steps_v = args->steps_v ? args->steps_v : params.volt_steps; + + while (lanes_done != lanes_n) + { + use_lanes = (lanes_done + recv.parallel_lanes > lanes_n) ? lanes_n - lanes_done : + recv.parallel_lanes; + lanes_data.lanes_numbers = lanes_to_margin + lanes_done; + lanes_data.lanes_n = use_lanes; + lanes_data.results = results->lanes + lanes_done; + + for (int i = 0; i < 4; i++) + { + bool timing = dir[i] == TIM_LEFT || dir[i] == TIM_RIGHT; + if (!timing && !params.volt_support) + continue; + if (dir[i] == TIM_RIGHT && !params.ind_left_right_tim) + continue; + if (dir[i] == VOLT_DOWN && !params.ind_up_down_volt) + continue; + + lanes_data.ind = timing ? params.ind_left_right_tim : params.ind_up_down_volt; + lanes_data.dir = dir[i]; + lanes_data.steps_lane_total = timing ? steps_t : steps_v; + if (*args->steps_utility >= lanes_data.steps_lane_total) + *args->steps_utility -= lanes_data.steps_lane_total; + else + *args->steps_utility = 0; + margin_test_lanes(lanes_data); + } + lanes_done += use_lanes; + } + if (args->verbosity > 0) + margin_log("\n"); + if (recv.lane_reversal) + { + for (int i = 0; i < lanes_n; i++) + results->lanes[i].lane = lanes_to_margin[i]; + } + } + + return true; +} + +bool +margin_read_params(struct pci_access *pacc, struct pci_dev *dev, u8 recvn, + struct margin_params *params) +{ + struct pci_cap *cap = pci_find_cap(dev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); + if (!cap) + return false; + u8 dev_dir = GET_REG_MASK(pci_read_word(dev, cap->addr + PCI_EXP_FLAGS), PCI_EXP_FLAGS_TYPE); + + bool dev_down; + if (dev_dir == PCI_EXP_TYPE_ROOT_PORT || dev_dir == PCI_EXP_TYPE_DOWNSTREAM) + dev_down = true; + else + dev_down = false; + + if (recvn == 0) + { + if (dev_down) + recvn = 1; + else + recvn = 6; + } + + if (recvn > 6) + return false; + if (dev_down && recvn == 6) + return false; + if (!dev_down && recvn != 6) + return false; + + struct pci_dev *down = NULL; + struct pci_dev *up = NULL; + struct margin_link link; + + for (struct pci_dev *p = pacc->devices; p; p = p->next) + { + if (dev_down && pci_read_byte(dev, PCI_SECONDARY_BUS) == p->bus && dev->domain == p->domain + && p->func == 0) + { + down = dev; + up = p; + break; + } + else if (!dev_down && pci_read_byte(p, PCI_SECONDARY_BUS) == dev->bus + && dev->domain == p->domain) + { + down = p; + up = dev; + break; + } + } + + if (!down) + return false; + + if (!margin_fill_link(down, up, &link)) + return false; + + struct margin_dev *dut = (dev_down ? &link.down_port : &link.up_port); + if (!margin_check_ready_bit(dut->dev)) + return false; + + if (!margin_prep_link(&link)) + return false; + + bool status; + bool lane_reversal = false; + status = read_params_internal(dut, recvn, lane_reversal, params); + if (!status) + { + lane_reversal = true; + status = read_params_internal(dut, recvn, lane_reversal, params); + } + + margin_restore_link(&link); + + return status; +} + +enum margin_test_status +margin_process_args(struct margin_dev *dev, struct margin_args *args) +{ + u8 receivers_n = 2 + 2 * dev->retimers_n; + + if (!args->recvs_n) + { + for (int i = 1; i < receivers_n; i++) + args->recvs[i - 1] = i; + args->recvs[receivers_n - 1] = 6; + args->recvs_n = receivers_n; + } + else + { + for (int i = 0; i < args->recvs_n; i++) + { + u8 recvn = args->recvs[i]; + if (recvn < 1 || recvn > 6 || (recvn != 6 && recvn > receivers_n - 1)) + { + return MARGIN_TEST_ARGS_RECVS; + } + } + } + + if (!args->lanes_n) + { + args->lanes_n = dev->width; + for (int i = 0; i < args->lanes_n; i++) + args->lanes[i] = i; + } + else + { + for (int i = 0; i < args->lanes_n; i++) + { + if (args->lanes[i] >= dev->width) + { + return MARGIN_TEST_ARGS_LANES; + } + } + } + + return MARGIN_TEST_OK; +} + +struct margin_results * +margin_test_link(struct margin_link *link, struct margin_args *args, u8 *recvs_n) +{ + bool status = margin_prep_link(link); + + u8 receivers_n = status ? args->recvs_n : 1; + u8 *receivers = args->recvs; + + margin_log_link(link); + + struct margin_results *results = xmalloc(sizeof(*results) * receivers_n); + + if (!status) + { + results[0].test_status = MARGIN_TEST_ASPM; + margin_log("\nCouldn't disable ASPM on the given Link.\n"); + } + + if (status) + { + struct margin_dev *dut; + for (int i = 0; i < receivers_n; i++) + { + dut = receivers[i] == 6 ? &link->up_port : &link->down_port; + margin_test_receiver(dut, receivers[i], args, &results[i]); + } + + margin_restore_link(link); + } + + *recvs_n = receivers_n; + return results; +} + +void +margin_free_results(struct margin_results *results, u8 results_n) +{ + for (int i = 0; i < results_n; i++) + { + if (results[i].test_status == MARGIN_TEST_OK) + free(results[i].lanes); + } + free(results); +} diff --git a/lmr/margin_hw.c b/lmr/margin_hw.c new file mode 100644 index 0000000..fc427c8 --- /dev/null +++ b/lmr/margin_hw.c @@ -0,0 +1,160 @@ +/* + * The PCI Utilities -- Verify and prepare devices before margining + * + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "lmr.h" + +static u16 special_hw[][4] = + // Vendor ID, Device ID, Revision ID, margin_hw + { { 0x8086, 0x347A, 0x4, MARGIN_ICE_LAKE_RC }, + { 0xFFFF, 0, 0, MARGIN_HW_DEFAULT } + }; + +static enum margin_hw +detect_unique_hw(struct pci_dev *dev) +{ + u16 vendor = pci_read_word(dev, PCI_VENDOR_ID); + u16 device = pci_read_word(dev, PCI_DEVICE_ID); + u8 revision = pci_read_byte(dev, PCI_REVISION_ID); + + for (int i = 0; special_hw[i][0] != 0xFFFF; i++) + { + if (vendor == special_hw[i][0] && device == special_hw[i][1] && revision == special_hw[i][2]) + return special_hw[i][3]; + } + return MARGIN_HW_DEFAULT; +} + +bool +margin_verify_link(struct pci_dev *down_port, struct pci_dev *up_port) +{ + struct pci_cap *cap = pci_find_cap(down_port, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); + if (!cap) + return false; + if ((pci_read_word(down_port, cap->addr + PCI_EXP_LNKSTA) & PCI_EXP_LNKSTA_SPEED) < 4) + return false; + if ((pci_read_word(down_port, cap->addr + PCI_EXP_LNKSTA) & PCI_EXP_LNKSTA_SPEED) > 5) + return false; + + u8 down_type = pci_read_byte(down_port, PCI_HEADER_TYPE) & 0x7F; + u8 down_sec = pci_read_byte(down_port, PCI_SECONDARY_BUS); + u8 down_dir + = GET_REG_MASK(pci_read_word(down_port, cap->addr + PCI_EXP_FLAGS), PCI_EXP_FLAGS_TYPE); + + // Verify that devices are linked, down_port is Root Port or Downstream Port of Switch, + // up_port is Function 0 of a Device + if (!(down_sec == up_port->bus && down_type == PCI_HEADER_TYPE_BRIDGE + && (down_dir == PCI_EXP_TYPE_ROOT_PORT || down_dir == PCI_EXP_TYPE_DOWNSTREAM) + && up_port->func == 0)) + return false; + + struct pci_cap *pm = pci_find_cap(up_port, PCI_CAP_ID_PM, PCI_CAP_NORMAL); + return pm && !(pci_read_word(up_port, pm->addr + PCI_PM_CTRL) & PCI_PM_CTRL_STATE_MASK); // D0 +} + +bool +margin_check_ready_bit(struct pci_dev *dev) +{ + struct pci_cap *lmr = pci_find_cap(dev, PCI_EXT_CAP_ID_LMR, PCI_CAP_EXTENDED); + return lmr && (pci_read_word(dev, lmr->addr + PCI_LMR_PORT_STS) & PCI_LMR_PORT_STS_READY); +} + +/* Awaits device at 16 GT/s or higher */ +static struct margin_dev +fill_dev_wrapper(struct pci_dev *dev) +{ + struct pci_cap *cap = pci_find_cap(dev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); + struct margin_dev res + = { .dev = dev, + .lmr_cap_addr = pci_find_cap(dev, PCI_EXT_CAP_ID_LMR, PCI_CAP_EXTENDED)->addr, + .width = GET_REG_MASK(pci_read_word(dev, cap->addr + PCI_EXP_LNKSTA), PCI_EXP_LNKSTA_WIDTH), + .retimers_n + = (!!(pci_read_word(dev, cap->addr + PCI_EXP_LNKSTA2) & PCI_EXP_LINKSTA2_RETIMER)) + + (!!(pci_read_word(dev, cap->addr + PCI_EXP_LNKSTA2) & PCI_EXP_LINKSTA2_2RETIMERS)), + .link_speed = (pci_read_word(dev, cap->addr + PCI_EXP_LNKSTA) & PCI_EXP_LNKSTA_SPEED), + .hw = detect_unique_hw(dev) }; + return res; +} + +bool +margin_fill_link(struct pci_dev *down_port, struct pci_dev *up_port, struct margin_link *wrappers) +{ + if (!margin_verify_link(down_port, up_port)) + return false; + wrappers->down_port = fill_dev_wrapper(down_port); + wrappers->up_port = fill_dev_wrapper(up_port); + return true; +} + +/* Disable ASPM, set Hardware Autonomous Speed/Width Disable bits */ +static bool +margin_prep_dev(struct margin_dev *dev) +{ + struct pci_cap *pcie = pci_find_cap(dev->dev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); + if (!pcie) + return false; + + u16 lnk_ctl = pci_read_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL); + dev->aspm = lnk_ctl & PCI_EXP_LNKCTL_ASPM; + dev->hawd = !!(lnk_ctl & PCI_EXP_LNKCTL_HWAUTWD); + lnk_ctl &= ~PCI_EXP_LNKCTL_ASPM; + pci_write_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL, lnk_ctl); + if (pci_read_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL) & PCI_EXP_LNKCTL_ASPM) + return false; + + lnk_ctl |= PCI_EXP_LNKCTL_HWAUTWD; + pci_write_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL, lnk_ctl); + + u16 lnk_ctl2 = pci_read_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL2); + dev->hasd = !!(lnk_ctl2 & PCI_EXP_LNKCTL2_SPEED_DIS); + lnk_ctl2 |= PCI_EXP_LNKCTL2_SPEED_DIS; + pci_write_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL2, lnk_ctl2); + + return true; +} + +/* Restore Device ASPM, Hardware Autonomous Speed/Width settings */ +static void +margin_restore_dev(struct margin_dev *dev) +{ + struct pci_cap *pcie = pci_find_cap(dev->dev, PCI_CAP_ID_EXP, PCI_CAP_NORMAL); + if (!pcie) + return; + + u16 lnk_ctl = pci_read_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL); + lnk_ctl = SET_REG_MASK(lnk_ctl, PCI_EXP_LNKCAP_ASPM, dev->aspm); + lnk_ctl = SET_REG_MASK(lnk_ctl, PCI_EXP_LNKCTL_HWAUTWD, dev->hawd); + pci_write_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL, lnk_ctl); + + u16 lnk_ctl2 = pci_read_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL2); + lnk_ctl2 = SET_REG_MASK(lnk_ctl2, PCI_EXP_LNKCTL2_SPEED_DIS, dev->hasd); + pci_write_word(dev->dev, pcie->addr + PCI_EXP_LNKCTL2, lnk_ctl2); +} + +bool +margin_prep_link(struct margin_link *link) +{ + if (!link) + return false; + if (!margin_prep_dev(&link->down_port)) + return false; + if (!margin_prep_dev(&link->up_port)) + { + margin_restore_dev(&link->down_port); + return false; + } + return true; +} + +void +margin_restore_link(struct margin_link *link) +{ + margin_restore_dev(&link->down_port); + margin_restore_dev(&link->up_port); +} diff --git a/lmr/margin_log.c b/lmr/margin_log.c new file mode 100644 index 0000000..b3c4bd5 --- /dev/null +++ b/lmr/margin_log.c @@ -0,0 +1,158 @@ +/* + * The PCI Utilities -- Log margining process + * + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdarg.h> +#include <stdio.h> + +#include "lmr.h" + +bool margin_global_logging = false; +bool margin_print_domain = true; + +void +margin_log(char *format, ...) +{ + va_list arg; + va_start(arg, format); + if (margin_global_logging) + vprintf(format, arg); + va_end(arg); +} + +void +margin_log_bdfs(struct pci_dev *down, struct pci_dev *up) +{ + if (margin_print_domain) + margin_log("%x:%x:%x.%x -> %x:%x:%x.%x", down->domain, down->bus, down->dev, down->func, + up->domain, up->bus, up->dev, up->func); + else + margin_log("%x:%x.%x -> %x:%x.%x", down->bus, down->dev, down->func, up->bus, up->dev, + up->func); +} + +void +margin_log_link(struct margin_link *link) +{ + margin_log("Link "); + margin_log_bdfs(link->down_port.dev, link->up_port.dev); + margin_log("\nNegotiated Link Width: %d\n", link->down_port.width); + margin_log("Link Speed: %d.0 GT/s = Gen %d\n", (link->down_port.link_speed - 3) * 16, + link->down_port.link_speed); + margin_log("Available receivers: "); + int receivers_n = 2 + 2 * link->down_port.retimers_n; + for (int i = 1; i < receivers_n; i++) + margin_log("Rx(%X) - %d, ", 10 + i - 1, i); + margin_log("Rx(F) - 6\n"); +} + +void +margin_log_params(struct margin_params *params) +{ + margin_log("Independent Error Sampler: %d\n", params->ind_error_sampler); + margin_log("Sample Reporting Method: %d\n", params->sample_report_method); + margin_log("Independent Left and Right Timing Margining: %d\n", params->ind_left_right_tim); + margin_log("Voltage Margining Supported: %d\n", params->volt_support); + margin_log("Independent Up and Down Voltage Margining: %d\n", params->ind_up_down_volt); + margin_log("Number of Timing Steps: %d\n", params->timing_steps); + margin_log("Number of Voltage Steps: %d\n", params->volt_steps); + margin_log("Max Timing Offset: %d\n", params->timing_offset); + margin_log("Max Voltage Offset: %d\n", params->volt_offset); + margin_log("Max Lanes: %d\n", params->max_lanes); +} + +void +margin_log_recvn(struct margin_recv *recv) +{ + margin_log("\nReceiver = Rx(%X)\n", 10 + recv->recvn - 1); +} + +void +margin_log_receiver(struct margin_recv *recv) +{ + margin_log("\nError Count Limit = %d\n", recv->error_limit); + margin_log("Parallel Lanes: %d\n\n", recv->parallel_lanes); + + margin_log_params(recv->params); + + if (recv->lane_reversal) + { + margin_log("\nWarning: device uses Lane Reversal.\n"); + margin_log("However, utility uses logical lane numbers in arguments and for logging.\n"); + } + + if (recv->params->timing_offset == 0) + margin_log("\nWarning: Vendor chose not to report the Max Timing Offset.\n" + "Utility will use its max possible value - 50 (50%% UI).\n"); + if (recv->params->volt_support && recv->params->volt_offset == 0) + margin_log("\nWarning: Vendor chose not to report the Max Voltage Offset.\n" + "Utility will use its max possible value - 50 (500 mV).\n"); +} + +void +margin_log_margining(struct margin_lanes_data arg) +{ + char *ind_dirs[] = { "Up", "Down", "Left", "Right" }; + char *non_ind_dirs[] = { "Voltage", "", "Timing" }; + + if (arg.verbosity > 0) + { + margin_log("\033[2K\rMargining - "); + if (arg.ind) + margin_log("%s", ind_dirs[arg.dir]); + else + margin_log("%s", non_ind_dirs[arg.dir]); + + u8 lanes_counter = 0; + margin_log(" - Lanes "); + margin_log("[%d", arg.lanes_numbers[0]); + for (int i = 1; i < arg.lanes_n; i++) + { + if (arg.lanes_numbers[i] - 1 == arg.lanes_numbers[i - 1]) + { + lanes_counter++; + if (lanes_counter == 1) + margin_log("-"); + if (i + 1 == arg.lanes_n) + margin_log("%d", arg.lanes_numbers[i]); + } + else + { + if (lanes_counter > 0) + margin_log("%d", arg.lanes_numbers[i - 1]); + margin_log(",%d", arg.lanes_numbers[i]); + lanes_counter = 0; + } + } + margin_log("]"); + + u64 lane_eta_s = (arg.steps_lane_total - arg.steps_lane_done) * MARGIN_STEP_MS / 1000; + u64 total_eta_s = *arg.steps_utility * MARGIN_STEP_MS / 1000 + lane_eta_s; + margin_log(" - ETA: %3ds Steps: %3d Total ETA: %3dm %2ds", lane_eta_s, arg.steps_lane_done, + total_eta_s / 60, total_eta_s % 60); + + fflush(stdout); + } +} + +void +margin_log_hw_quirks(struct margin_recv *recv) +{ + switch (recv->dev->hw) + { + case MARGIN_ICE_LAKE_RC: + if (recv->recvn == 1) + margin_log("\nRx(A) is Intel Ice Lake RC port.\n" + "Applying next quirks for margining process:\n" + " - Set MaxVoltageOffset to 12 (120 mV).\n"); + break; + default: + break; + } +} diff --git a/lmr/margin_results.c b/lmr/margin_results.c new file mode 100644 index 0000000..4d28f04 --- /dev/null +++ b/lmr/margin_results.c @@ -0,0 +1,283 @@ +/* + * The PCI Utilities -- Display/save margining results + * + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> + +#include "lmr.h" + +enum lane_rating { + BAD = 0, + OKAY, + PERFECT, + WEIRD, + INIT, +}; + +static char *const grades[] = { "Bad", "Okay", "Perfect", "Weird" }; +static char *const sts_strings[] = { "NAK", "LIM", "THR" }; +static const double ui[] = { 62.5 / 100, 31.25 / 100 }; + +static enum lane_rating +rate_lane(double value, double min, double recommended, enum lane_rating cur_rate) +{ + enum lane_rating res = PERFECT; + if (value < recommended) + res = OKAY; + if (value < min) + res = BAD; + if (cur_rate == INIT) + return res; + if (res < cur_rate) + return res; + else + return cur_rate; +} + +static bool +check_recv_weird(struct margin_results *results, double tim_min, double volt_min) +{ + bool result = true; + + struct margin_res_lane *lane; + for (int i = 0; i < results->lanes_n && result; i++) + { + lane = &(results->lanes[i]); + if (lane->steps[TIM_LEFT] * results->tim_coef != tim_min) + result = false; + if (results->params.ind_left_right_tim + && lane->steps[TIM_RIGHT] * results->tim_coef != tim_min) + result = false; + if (results->params.volt_support) + { + if (lane->steps[VOLT_UP] * results->volt_coef != volt_min) + result = false; + if (results->params.ind_up_down_volt + && lane->steps[VOLT_DOWN] * results->volt_coef != volt_min) + result = false; + } + } + return result; +} + +void +margin_results_print_brief(struct margin_results *results, u8 recvs_n) +{ + struct margin_res_lane *lane; + struct margin_results *res; + struct margin_params params; + + enum lane_rating lane_rating; + + u8 link_speed; + + char *no_test_msgs[] = { "", + "Margining Ready bit is Clear", + "Error during caps reading", + "Margining prerequisites are not satisfied (16/32 GT/s, D0)", + "Invalid lanes specified with arguments", + "Invalid receivers specified with arguments", + "Couldn't disable ASPM" }; + + for (int i = 0; i < recvs_n; i++) + { + res = &(results[i]); + params = res->params; + link_speed = res->link_speed - 4; + + if (res->test_status != MARGIN_TEST_OK) + { + if (res->test_status < MARGIN_TEST_PREREQS) + printf("Rx(%X) -", 10 + res->recvn - 1); + printf(" Couldn't run test (%s)\n\n", no_test_msgs[res->test_status]); + continue; + } + + if (res->lane_reversal) + printf("Rx(%X) - Lane Reversal\n", 10 + res->recvn - 1); + + if (!res->tim_off_reported) + printf("Rx(%X) - Attention: Vendor chose not to report the Max Timing Offset.\n" + "Utility used its max possible value (50%% UI) for calculations of %% UI and ps.\n" + "Keep in mind that for timing results of this receiver only steps values are " + "reliable.\n\n", + 10 + res->recvn - 1); + if (params.volt_support && !res->volt_off_reported) + printf("Rx(%X) - Attention: Vendor chose not to report the Max Voltage Offset.\n" + "Utility used its max possible value (500 mV) for calculations of mV.\n" + "Keep in mind that for voltage results of this receiver only steps values are " + "reliable.\n\n", + 10 + res->recvn - 1); + + if (check_recv_weird(res, MARGIN_TIM_MIN, MARGIN_VOLT_MIN)) + lane_rating = WEIRD; + else + lane_rating = INIT; + + for (u8 j = 0; j < res->lanes_n; j++) + { + lane = &(res->lanes[j]); + double left_ui = lane->steps[TIM_LEFT] * res->tim_coef; + double right_ui = lane->steps[TIM_RIGHT] * res->tim_coef; + double up_volt = lane->steps[VOLT_UP] * res->volt_coef; + double down_volt = lane->steps[VOLT_DOWN] * res->volt_coef; + + if (lane_rating != WEIRD) + { + lane_rating = rate_lane(left_ui, MARGIN_TIM_MIN, MARGIN_TIM_RECOMMEND, INIT); + if (params.ind_left_right_tim) + lane_rating + = rate_lane(right_ui, MARGIN_TIM_MIN, MARGIN_TIM_RECOMMEND, lane_rating); + if (params.volt_support) + { + lane_rating = rate_lane(up_volt, MARGIN_VOLT_MIN, MARGIN_VOLT_MIN, lane_rating); + if (params.ind_up_down_volt) + lane_rating + = rate_lane(down_volt, MARGIN_VOLT_MIN, MARGIN_VOLT_MIN, lane_rating); + } + } + + printf("Rx(%X) Lane %2d - %s\t", 10 + res->recvn - 1, lane->lane, grades[lane_rating]); + if (params.ind_left_right_tim) + printf("L %4.1f%% UI - %5.2fps - %2dst %s, R %4.1f%% UI - %5.2fps - %2dst %s", left_ui, + left_ui * ui[link_speed], lane->steps[TIM_LEFT], + sts_strings[lane->statuses[TIM_LEFT]], right_ui, right_ui * ui[link_speed], + lane->steps[TIM_RIGHT], sts_strings[lane->statuses[TIM_RIGHT]]); + else + printf("T %4.1f%% UI - %5.2fps - %2dst %s", left_ui, left_ui * ui[link_speed], + lane->steps[TIM_LEFT], sts_strings[lane->statuses[TIM_LEFT]]); + if (params.volt_support) + { + if (params.ind_up_down_volt) + printf(", U %5.1f mV - %3dst %s, D %5.1f mV - %3dst %s", up_volt, + lane->steps[VOLT_UP], sts_strings[lane->statuses[VOLT_UP]], down_volt, + lane->steps[VOLT_DOWN], sts_strings[lane->statuses[VOLT_DOWN]]); + else + printf(", V %5.1f mV - %3dst %s", up_volt, lane->steps[VOLT_UP], + sts_strings[lane->statuses[VOLT_UP]]); + } + printf("\n"); + } + printf("\n"); + } +} + +void +margin_results_save_csv(struct margin_results *results, u8 recvs_n, char *dir, + struct pci_dev *up_port) +{ + char timestamp[64]; + time_t tim = time(NULL); + strftime(timestamp, sizeof(timestamp), "%Y-%m-%dT%H:%M:%S", gmtime(&tim)); + + size_t pathlen = strlen(dir) + 128; + char *path = xmalloc(pathlen); + FILE *csv; + + struct margin_res_lane *lane; + struct margin_results *res; + struct margin_params params; + + enum lane_rating lane_rating; + u8 link_speed; + + for (int i = 0; i < recvs_n; i++) + { + res = &(results[i]); + params = res->params; + link_speed = res->link_speed - 4; + + if (res->test_status != MARGIN_TEST_OK) + continue; + snprintf(path, pathlen, "%s/lmr_%0*x.%02x.%02x.%x_Rx%X_%s.csv", dir, + up_port->domain_16 == 0xffff ? 8 : 4, up_port->domain, up_port->bus, up_port->dev, + up_port->func, 10 + res->recvn - 1, timestamp); + csv = fopen(path, "w"); + if (!csv) + die("Error while saving %s\n", path); + + fprintf(csv, "Lane,Lane Status,Left %% UI,Left ps,Left Steps,Left Status," + "Right %% UI,Right ps,Right Steps,Right Status," + "Time %% UI,Time ps,Time Steps,Time Status," + "Up mV,Up Steps,Up Status,Down mV,Down Steps,Down Status," + "Voltage mV,Voltage Steps,Voltage Status\n"); + + if (check_recv_weird(res, MARGIN_TIM_MIN, MARGIN_VOLT_MIN)) + lane_rating = WEIRD; + else + lane_rating = INIT; + + for (int j = 0; j < res->lanes_n; j++) + { + lane = &(res->lanes[j]); + double left_ui = lane->steps[TIM_LEFT] * res->tim_coef; + double right_ui = lane->steps[TIM_RIGHT] * res->tim_coef; + double up_volt = lane->steps[VOLT_UP] * res->volt_coef; + double down_volt = lane->steps[VOLT_DOWN] * res->volt_coef; + + if (lane_rating != WEIRD) + { + lane_rating = rate_lane(left_ui, MARGIN_TIM_MIN, MARGIN_TIM_RECOMMEND, INIT); + if (params.ind_left_right_tim) + lane_rating + = rate_lane(right_ui, MARGIN_TIM_MIN, MARGIN_TIM_RECOMMEND, lane_rating); + if (params.volt_support) + { + lane_rating = rate_lane(up_volt, MARGIN_VOLT_MIN, MARGIN_VOLT_MIN, lane_rating); + if (params.ind_up_down_volt) + lane_rating + = rate_lane(down_volt, MARGIN_VOLT_MIN, MARGIN_VOLT_MIN, lane_rating); + } + } + + fprintf(csv, "%d,%s,", lane->lane, grades[lane_rating]); + if (params.ind_left_right_tim) + { + fprintf(csv, "%f,%f,%d,%s,%f,%f,%d,%s,NA,NA,NA,NA,", left_ui, + left_ui * ui[link_speed], lane->steps[TIM_LEFT], + sts_strings[lane->statuses[TIM_LEFT]], right_ui, right_ui * ui[link_speed], + lane->steps[TIM_RIGHT], sts_strings[lane->statuses[TIM_RIGHT]]); + } + else + { + for (int k = 0; k < 8; k++) + fprintf(csv, "NA,"); + fprintf(csv, "%f,%f,%d,%s,", left_ui, left_ui * ui[link_speed], lane->steps[TIM_LEFT], + sts_strings[lane->statuses[TIM_LEFT]]); + } + if (params.volt_support) + { + if (params.ind_up_down_volt) + { + fprintf(csv, "%f,%d,%s,%f,%d,%s,NA,NA,NA\n", up_volt, lane->steps[VOLT_UP], + sts_strings[lane->statuses[VOLT_UP]], down_volt, lane->steps[VOLT_DOWN], + sts_strings[lane->statuses[VOLT_DOWN]]); + } + else + { + for (int k = 0; k < 6; k++) + fprintf(csv, "NA,"); + fprintf(csv, "%f,%d,%s\n", up_volt, lane->steps[VOLT_UP], + sts_strings[lane->statuses[VOLT_UP]]); + } + } + else + { + for (int k = 0; k < 8; k++) + fprintf(csv, "NA,"); + fprintf(csv, "NA\n"); + } + } + fclose(csv); + } + free(path); +} @@ -692,6 +692,26 @@ cap_rcec(struct device *d, int where) } static void +cap_lmr(struct device *d, int where) +{ + printf("Lane Margining at the Receiver\n"); + + if (verbose < 2) + return; + + if (!config_fetch(d, where, 8)) + return; + + u16 port_caps = get_conf_word(d, where + PCI_LMR_CAPS); + u16 port_status = get_conf_word(d, where + PCI_LMR_PORT_STS); + + printf("\t\tPortCap: Uses Driver%c\n", FLAG(port_caps, PCI_LMR_CAPS_DRVR)); + printf("\t\tPortSta: MargReady%c MargSoftReady%c\n", + FLAG(port_status, PCI_LMR_PORT_STS_READY), + FLAG(port_status, PCI_LMR_PORT_STS_SOFT_READY)); +} + +static void cxl_range(u64 base, u64 size, int n) { u32 interleave[] = { 0, 256, 4096, 512, 1024, 2048, 8192, 16384 }; @@ -1607,7 +1627,7 @@ show_ext_caps(struct device *d, int type) printf("Physical Layer 16.0 GT/s <?>\n"); break; case PCI_EXT_CAP_ID_LMR: - printf("Lane Margining at the Receiver <?>\n"); + cap_lmr(d, where); break; case PCI_EXT_CAP_ID_HIER_ID: printf("Hierarchy ID <?>\n"); @@ -107,6 +107,7 @@ config_fetch(struct device *d, unsigned int pos, unsigned int len) d->config = xrealloc(d->config, d->config_bufsize); d->present = xrealloc(d->present, d->config_bufsize); memset(d->present + orig_size, 0, d->config_bufsize - orig_size); + pci_setup_cache(d->dev, d->config, d->dev->cache_len); } result = pci_read_block(d->dev, pos, d->config + pos, len); if (result) @@ -58,12 +58,6 @@ u32 get_conf_long(struct device *d, unsigned int pos); word get_conf_word(struct device *d, unsigned int pos); byte get_conf_byte(struct device *d, unsigned int pos); -/* Useful macros for decoding of bits and bit fields */ - -#define FLAG(x,y) ((x & y) ? '+' : '-') -#define BITS(x,at,width) (((x) >> (at)) & ((1 << (width)) - 1)) -#define TABLE(tab,x,buf) ((x) < sizeof(tab)/sizeof((tab)[0]) ? (tab)[x] : (sprintf((buf), "??%d", (x)), (buf))) - /* ls-vpd.c */ void cap_vpd(struct device *d); diff --git a/maint/push-to-public b/maint/push-to-public new file mode 100755 index 0000000..d1ab4f3 --- /dev/null +++ b/maint/push-to-public @@ -0,0 +1,4 @@ +#!/bin/sh +set -e +git push public +git push github @@ -237,7 +237,9 @@ Configuration Manager. DNS domain containing the ID database. .TP .B net.cache_name -Name of the file used for caching of resolved ID's. +Name of the file used for caching of resolved ID's. An initial +.B ~/ +is expanded to the user's home directory. .SS Parameters for resolving of ID's via UDEV's HWDB .TP diff --git a/pcilmr.c b/pcilmr.c new file mode 100644 index 0000000..cb8bd77 --- /dev/null +++ b/pcilmr.c @@ -0,0 +1,481 @@ +/* + * The PCI Utilities -- Margining utility main function + * + * Copyright (c) 2023 KNS Group LLC (YADRO) + * + * Can be freely distributed and used under the terms of the GNU GPL v2+. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include <memory.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "lmr/lmr.h" + +const char program_name[] = "pcilmr"; + +enum mode { MARGIN, FULL, SCAN }; + +static const char usage_msg[] + = "! Utility requires preliminary preparation of the system. Refer to the pcilmr man page !\n\n" + "Usage:\n" + "pcilmr [--margin] [<margining options>] <downstream component> ...\n" + "pcilmr --full [<margining options>]\n" + "pcilmr --scan\n\n" + "Device Specifier:\n" + "<device/component>:\t[<domain>:]<bus>:<dev>.<func>\n\n" + "Modes:\n" + "--margin\t\tMargin selected Links\n" + "--full\t\t\tMargin all ready for testing Links in the system (one by one)\n" + "--scan\t\t\tScan for Links available for margining\n\n" + "Margining options:\n\n" + "Margining Test settings:\n" + "-c\t\t\tPrint Device Lane Margining Capabilities only. Do not run margining.\n" + "-l <lane>[,<lane>...]\tSpecify lanes for margining. Default: all link lanes.\n" + "\t\t\tRemember that Device may use Lane Reversal for Lane numbering.\n" + "\t\t\tHowever, utility uses logical lane numbers in arguments and for logging.\n" + "\t\t\tUtility will automatically determine Lane Reversal and tune its calls.\n" + "-e <errors>\t\tSpecify Error Count Limit for margining. Default: 4.\n" + "-r <recvn>[,<recvn>...]\tSpecify Receivers to select margining targets.\n" + "\t\t\tDefault: all available Receivers (including Retimers).\n" + "-p <parallel_lanes>\tSpecify number of lanes to margin simultaneously.\n" + "\t\t\tDefault: 1.\n" + "\t\t\tAccording to spec it's possible for Receiver to margin up\n" + "\t\t\tto MaxLanes + 1 lanes simultaneously, but usually this works\n" + "\t\t\tbad, so this option is for experiments mostly.\n" + "-T\t\t\tTime Margining will continue until the Error Count is no more\n" + "\t\t\tthan an Error Count Limit. Use this option to find Link limit.\n" + "-V\t\t\tSame as -T option, but for Voltage.\n" + "-t <steps>\t\tSpecify maximum number of steps for Time Margining.\n" + "-v <steps>\t\tSpecify maximum number of steps for Voltage Margining.\n" + "Use only one of -T/-t options at the same time (same for -V/-v).\n" + "Without these options utility will use MaxSteps from Device\n" + "capabilities as test limit.\n\n" + "Margining Log settings:\n" + "-o <directory>\t\tSave margining results in csv form into the\n" + "\t\t\tspecified directory. Utility will generate file with the\n" + "\t\t\tname in form of 'lmr_<downstream component>_Rx#_<timestamp>.csv'\n" + "\t\t\tfor each successfully tested receiver.\n"; + +static struct pci_dev * +dev_for_filter(struct pci_access *pacc, char *filter) +{ + struct pci_filter pci_filter; + pci_filter_init(pacc, &pci_filter); + if (pci_filter_parse_slot(&pci_filter, filter)) + die("Invalid device ID: %s\n", filter); + + if (pci_filter.bus == -1 || pci_filter.slot == -1 || pci_filter.func == -1) + die("Invalid device ID: %s\n", filter); + + if (pci_filter.domain == -1) + pci_filter.domain = 0; + + for (struct pci_dev *p = pacc->devices; p; p = p->next) + { + if (pci_filter_match(&pci_filter, p)) + return p; + } + + die("No such PCI device: %s or you don't have enough privileges.\n", filter); +} + +static struct pci_dev * +find_down_port_for_up(struct pci_access *pacc, struct pci_dev *up) +{ + struct pci_dev *down = NULL; + for (struct pci_dev *p = pacc->devices; p; p = p->next) + { + if (pci_read_byte(p, PCI_SECONDARY_BUS) == up->bus && up->domain == p->domain) + { + down = p; + break; + } + } + return down; +} + +static u8 +parse_csv_arg(char *arg, u8 *vals) +{ + u8 cnt = 0; + char *token = strtok(arg, ","); + while (token) + { + vals[cnt] = atoi(token); + cnt++; + token = strtok(NULL, ","); + } + return cnt; +} + +static void +scan_links(struct pci_access *pacc, bool only_ready) +{ + if (only_ready) + printf("Links ready for margining:\n"); + else + printf("Links with Lane Margining at the Receiver capabilities:\n"); + bool flag = true; + for (struct pci_dev *up = pacc->devices; up; up = up->next) + { + if (pci_find_cap(up, PCI_EXT_CAP_ID_LMR, PCI_CAP_EXTENDED)) + { + struct pci_dev *down = find_down_port_for_up(pacc, up); + + if (down && margin_verify_link(down, up)) + { + margin_log_bdfs(down, up); + if (!only_ready && (margin_check_ready_bit(down) || margin_check_ready_bit(up))) + printf(" - Ready"); + printf("\n"); + flag = false; + } + } + } + if (flag) + printf("Links not found or you don't have enough privileges.\n"); + pci_cleanup(pacc); + exit(0); +} + +static u8 +find_ready_links(struct pci_access *pacc, struct pci_dev **down_ports, struct pci_dev **up_ports, + bool cnt_only) +{ + u8 cnt = 0; + for (struct pci_dev *up = pacc->devices; up; up = up->next) + { + if (pci_find_cap(up, PCI_EXT_CAP_ID_LMR, PCI_CAP_EXTENDED)) + { + struct pci_dev *down = find_down_port_for_up(pacc, up); + + if (down && margin_verify_link(down, up) + && (margin_check_ready_bit(down) || margin_check_ready_bit(up))) + { + if (!cnt_only) + { + up_ports[cnt] = up; + down_ports[cnt] = down; + } + cnt++; + } + } + } + return cnt; +} + +int +main(int argc, char **argv) +{ + struct pci_access *pacc; + + struct pci_dev **up_ports; + struct pci_dev **down_ports; + u8 ports_n = 0; + + struct margin_link *links; + bool *checks_status_ports; + + bool status = true; + enum mode mode; + + /* each link has several receivers -> several results */ + struct margin_results **results; + u8 *results_n; + + struct margin_args *args; + + u8 steps_t_arg = 0; + u8 steps_v_arg = 0; + u8 parallel_lanes_arg = 1; + u8 error_limit = 4; + u8 lanes_arg[32]; + u8 recvs_arg[6]; + + u8 lanes_n = 0; + u8 recvs_n = 0; + + bool run_margin = true; + + char *dir_for_csv = NULL; + bool save_csv = false; + + u64 total_steps = 0; + + pacc = pci_alloc(); + pci_init(pacc); + pci_scan_bus(pacc); + + margin_print_domain = false; + for (struct pci_dev *dev = pacc->devices; dev; dev = dev->next) + { + if (dev->domain != 0) + { + margin_print_domain = true; + break; + } + } + + margin_global_logging = true; + + struct option long_options[] + = { { .name = "margin", .has_arg = no_argument, .flag = NULL, .val = 0 }, + { .name = "scan", .has_arg = no_argument, .flag = NULL, .val = 1 }, + { .name = "full", .has_arg = no_argument, .flag = NULL, .val = 2 }, + { 0, 0, 0, 0 } }; + + int c; + c = getopt_long(argc, argv, ":", long_options, NULL); + + switch (c) + { + case -1: /* no options (strings like component are possible) */ + /* FALLTHROUGH */ + case 0: + mode = MARGIN; + break; + case 1: + mode = SCAN; + if (optind == argc) + scan_links(pacc, false); + optind--; + break; + case 2: + mode = FULL; + break; + default: /* unknown option symbol */ + mode = MARGIN; + optind--; + break; + } + + while ((c = getopt(argc, argv, ":r:e:l:cp:t:v:VTo:")) != -1) + { + switch (c) + { + case 't': + steps_t_arg = atoi(optarg); + break; + case 'T': + steps_t_arg = 63; + break; + case 'v': + steps_v_arg = atoi(optarg); + break; + case 'V': + steps_v_arg = 127; + break; + case 'p': + parallel_lanes_arg = atoi(optarg); + break; + case 'c': + run_margin = false; + break; + case 'l': + lanes_n = parse_csv_arg(optarg, lanes_arg); + break; + case 'e': + error_limit = atoi(optarg); + break; + case 'r': + recvs_n = parse_csv_arg(optarg, recvs_arg); + break; + case 'o': + dir_for_csv = optarg; + save_csv = true; + break; + default: + die("Invalid arguments\n\n%s", usage_msg); + } + } + + if (mode == FULL && optind != argc) + status = false; + if (mode == MARGIN && optind == argc) + status = false; + if (!status && argc > 1) + die("Invalid arguments\n\n%s", usage_msg); + if (!status) + { + printf("%s", usage_msg); + exit(0); + } + + if (mode == FULL) + { + ports_n = find_ready_links(pacc, NULL, NULL, true); + if (ports_n == 0) + { + die("Links not found or you don't have enough privileges.\n"); + } + else + { + up_ports = xmalloc(ports_n * sizeof(*up_ports)); + down_ports = xmalloc(ports_n * sizeof(*down_ports)); + find_ready_links(pacc, down_ports, up_ports, false); + } + } + else if (mode == MARGIN) + { + ports_n = argc - optind; + up_ports = xmalloc(ports_n * sizeof(*up_ports)); + down_ports = xmalloc(ports_n * sizeof(*down_ports)); + + u8 cnt = 0; + while (optind != argc) + { + up_ports[cnt] = dev_for_filter(pacc, argv[optind]); + down_ports[cnt] = find_down_port_for_up(pacc, up_ports[cnt]); + if (!down_ports[cnt]) + die("Cannot find Upstream Component for the specified device: %s\n", argv[optind]); + cnt++; + optind++; + } + } + else + die("Bug in the args parsing!\n"); + + if (!pci_find_cap(up_ports[0], PCI_CAP_ID_EXP, PCI_CAP_NORMAL)) + die("Looks like you don't have enough privileges to access " + "Device Configuration Space.\nTry to run utility as root.\n"); + + results = xmalloc(ports_n * sizeof(*results)); + results_n = xmalloc(ports_n * sizeof(*results_n)); + links = xmalloc(ports_n * sizeof(*links)); + checks_status_ports = xmalloc(ports_n * sizeof(*checks_status_ports)); + args = xmalloc(ports_n * sizeof(*args)); + + for (int i = 0; i < ports_n; i++) + { + args[i].error_limit = error_limit; + args[i].parallel_lanes = parallel_lanes_arg; + args[i].run_margin = run_margin; + args[i].verbosity = 1; + args[i].steps_t = steps_t_arg; + args[i].steps_v = steps_v_arg; + for (int j = 0; j < recvs_n; j++) + args[i].recvs[j] = recvs_arg[j]; + args[i].recvs_n = recvs_n; + for (int j = 0; j < lanes_n; j++) + args[i].lanes[j] = lanes_arg[j]; + args[i].lanes_n = lanes_n; + args[i].steps_utility = &total_steps; + + enum margin_test_status args_status; + + if (!margin_fill_link(down_ports[i], up_ports[i], &links[i])) + { + checks_status_ports[i] = false; + results[i] = xmalloc(sizeof(*results[i])); + results[i]->test_status = MARGIN_TEST_PREREQS; + continue; + } + + if ((args_status = margin_process_args(&links[i].down_port, &args[i])) != MARGIN_TEST_OK) + { + checks_status_ports[i] = false; + results[i] = xmalloc(sizeof(*results[i])); + results[i]->test_status = args_status; + continue; + } + + checks_status_ports[i] = true; + struct margin_params params; + + for (int j = 0; j < args[i].recvs_n; j++) + { + if (margin_read_params(pacc, args[i].recvs[j] == 6 ? up_ports[i] : down_ports[i], + args[i].recvs[j], ¶ms)) + { + u8 steps_t = steps_t_arg ? steps_t_arg : params.timing_steps; + u8 steps_v = steps_v_arg ? steps_v_arg : params.volt_steps; + u8 parallel_recv = parallel_lanes_arg > params.max_lanes + 1 ? params.max_lanes + 1 : + parallel_lanes_arg; + + u8 step_multiplier + = args[i].lanes_n / parallel_recv + ((args[i].lanes_n % parallel_recv) > 0); + + total_steps += steps_t * step_multiplier; + if (params.ind_left_right_tim) + total_steps += steps_t * step_multiplier; + if (params.volt_support) + { + total_steps += steps_v * step_multiplier; + if (params.ind_up_down_volt) + total_steps += steps_v * step_multiplier; + } + } + } + } + + for (int i = 0; i < ports_n; i++) + { + if (checks_status_ports[i]) + results[i] = margin_test_link(&links[i], &args[i], &results_n[i]); + else + { + results_n[i] = 1; + if (results[i]->test_status == MARGIN_TEST_PREREQS) + { + printf("Link "); + margin_log_bdfs(down_ports[i], up_ports[i]); + printf(" is not ready for margining.\n" + "Link data rate must be 16 GT/s or 32 GT/s.\n" + "Downstream Component must be at D0 PM state.\n"); + } + else if (results[i]->test_status == MARGIN_TEST_ARGS_RECVS) + { + margin_log_link(&links[i]); + printf("\nInvalid RecNums specified.\n"); + } + else if (results[i]->test_status == MARGIN_TEST_ARGS_LANES) + { + margin_log_link(&links[i]); + printf("\nInvalid lanes specified.\n"); + } + } + printf("\n----\n\n"); + } + + if (run_margin) + { + printf("Results:\n"); + printf("\nPass/fail criteria:\nTiming:\n"); + printf("Minimum Offset (spec): %d %% UI\nRecommended Offset: %d %% UI\n", MARGIN_TIM_MIN, + MARGIN_TIM_RECOMMEND); + printf("\nVoltage:\nMinimum Offset (spec): %d mV\n\n", MARGIN_VOLT_MIN); + printf( + "Margining statuses:\nLIM -\tErrorCount exceeded Error Count Limit (found device limit)\n"); + printf("NAK -\tDevice didn't execute last command, \n\tso result may be less reliable\n"); + printf("THR -\tThe set (using the utility options) \n\tstep threshold has been reached\n\n"); + printf("Notations:\nst - steps\n\n"); + + for (int i = 0; i < ports_n; i++) + { + printf("Link "); + margin_log_bdfs(down_ports[i], up_ports[i]); + printf(":\n\n"); + margin_results_print_brief(results[i], results_n[i]); + if (save_csv) + margin_results_save_csv(results[i], results_n[i], dir_for_csv, up_ports[i]); + printf("\n"); + } + } + + for (int i = 0; i < ports_n; i++) + margin_free_results(results[i], results_n[i]); + free(results_n); + free(results); + free(up_ports); + free(down_ports); + free(links); + free(checks_status_ports); + free(args); + + pci_cleanup(pacc); + return 0; +} diff --git a/pcilmr.man b/pcilmr.man new file mode 100644 index 0000000..673262f --- /dev/null +++ b/pcilmr.man @@ -0,0 +1,182 @@ +.TH PCILMR 8 "@TODAY@" "@VERSION@" "The PCI Utilities" +.SH NAME +pcilmr \- margin PCIe Links +.SH SYNOPSIS +.B pcilmr +.RB [ "--margin" ] +.RI [ "<margining options>" ] " <downstream component> ..." +.br +.B pcilmr --full +.RI [ "<margining options>" ] +.br +.B pcilmr --scan +.SH CONFIGURATION +List of the requirements for links and system settings +to run the margining test. + +.B BIOS settings +(depends on the system, relevant for server baseboards +with Xeon CPUs): +.IP \[bu] 3 +Turn off PCIe Leaky Bucket Feature, Re-Equalization and Link Degradation; +.IP \[bu] +Set Error Thresholds to 0; +.IP \[bu] +Intel VMD for NVMe SSDs - in case of strange behavior of the +.BR pcilmr, +try to run it with the VMD turned off. +.PP +.B Device (link) requirements: +.IP +.I "Configured by the user before running the utility, the utility does not change them:" +.RS +.IP \[bu] 3 +The current Link data rate must be 16.0 GT/s or higher (right now +utility supports 16 GT/s and 32 GT/s Links); +.IP \[bu] +Link Downstream Component must be at D0 Power Management State. +.RE +.IP +.I "Configured by the utility during operation, utility set them to their original " +.I "state after receiving the results:" +.RS +.IP \[bu] 3 +The ASPM must be disabled in both the Downstream Port and Upstream Port; +.IP \[bu] +The Hardware Autonomous Speed Disable bit of the Link Control 2 register must be Set in both the +Downstream Port and Upstream Port; +.IP \[bu] +The Hardware Autonomous Width Disable bit of the Link Control register must be Set in both the +Downstream Port and Upstream Port. +.SH DESCRIPTION +.B pcilmr +utility allows you to take advantage of the PCIe Lane Margining at the Receiver +capability which is mandatory for all Ports supporting a data rate of 16.0 GT/s or +higher, including Pseudo Ports (Retimers). Lane Margining at Receiver enables system +software to obtain the margin information of a given Receiver while the Link is in the +L0 state. The margin information includes both voltage and time, in either direction from +the current Receiver position. Margining support for timing is required, while support +for voltage is optional at 16.0 GT/s and required at 32.0 GT/s and higher data rates. Also, +independent time margining and independent voltage margining is optional. + +Utility allows to get an approximation of the eye margin diagram in the form of a rhombus +(by four points). Lane Margining at the Receiver capability enables users to margin PCIe +links without a hardware debugger and without the need to stop the target system. Utility +can be useful to debug link issues due to receiver margins. + +However, the utility results may be not particularly accurate and, as it was found out during +testing, specific devices provide rather dubious capability support and the reliability of +the information they provide is questionable. The PCIe specification provides reference values +for the eye diagram, which are also used by the +.B pcilmr +to evaluate the results, but it seems that it makes sense to contact the +manufacturer of a particular device for references. + +The PCIe Base Specification Revision 5.0 sets allowed range for Timing Margin from 20%\~UI to 50%\~UI and +for Voltage Margin from 50\~mV to 500\~mV. Utility uses 30%\~UI as the recommended +value for Timing - taken from NVIDIA presentation ("PCIe 4.0 Mass Electrical Margins Data +Collection"). + +.B pcilmr +requires root privileges (to access Extended Configuration Space), but during our testing +there were no problems with the devices and they successfully returned to their normal initial +state after the end of testing. + +.SH OPTIONS +.SS Device Specifier +.B "<device/component>" \t +.RI [ "<domain>" :] <bus> : <dev> . <func> +(see +.BR lspci (8)) +.SS Utility Modes +.TP +.BI --margin " <downstream component> ..." +Margin selected Links. +.TP +.B --full +Margin all ready for testing (in a meaning similar to the +.B --scan +option) Links in the system (one by one). +.TP +.B --scan +Scan for Links with negotiated speed 16 GT/s or higher. Mark "Ready" those of them +in which at least one of the Link sides have Margining Ready bit set meaning that +these Links are ready for testing and you can run utility on them. +.SS Margining Test options +.TP +.B -c +Print Device Lane Margining Capabilities only. Do not run margining. +.TP +\fB\-l\fI <lane>\fP[\fI,<lane>...\fP] +Specify lanes for margining. +.br +Remember that Device may use Lane Reversal for Lane numbering. However, utility +uses logical lane numbers in arguments and for logging. Utility will automatically +determine Lane Reversal and tune its calls. +.br +Default: all link lanes. +.TP +.BI -e " <errors>" +Specify Error Count Limit for margining. +.br +Default: 4. +.TP +\fB-r\fI <recvn>\fP[\fI,<recvn>...\fP] +Specify Receivers to select margining targets. +.br +Default: all available Receivers (including Retimers). +.TP +.BI -p " <parallel_lanes>" +Specify number of lanes to margin simultaneously. +.br +According to spec it's possible for Receiver to margin up to MaxLanes + 1 +lanes simultaneously, but during testing, performing margining on several +lanes simultaneously led to results that were different from sequential +margining, so this feature requires additional verification and +.I -p +option right now is for experiments mostly. +.br +Default: 1. +.PP +.B "Use only one of -T/-t options at the same time (same for -V/-v)." +.br +.B "Without these options utility will use MaxSteps from Device" +.B "capabilities as test limit." +.TP +.B -T +Time Margining will continue until the Error Count is no more +than an Error Count Limit. Use this option to find Link limit. +.TP +.BI -t " <steps>" +Specify maximum number of steps for Time Margining. +.TP +.B -V +Same as +.I -T +option, but for Voltage. +.TP +.BI -v " <steps>" +Specify maximum number of steps for Voltage Margining. +.SS Margining Log options +.TP +.BI -o " <directory>" +Save margining results in csv form into the specified directory. Utility +will generate file with the name in form of +.RI "\[dq]lmr_" "<downstream component>" "_Rx" # _ <timestamp> ".csv\[dq]" +for each successfully tested receiver. + +.SH EXAMPLES +Utility syntax example: +.RS +.BI "pcilmr -l" " 0,1 " "-r" " 1,6 " "-TV" " ab:0.0 52:0.0" +.RE + +.UR https://gist.github.com/bombanya/f2b15263712757ffba1a11eea011c419 +Examples of collected results on different systems. +.UE + +.SH SEE ALSO +.nh +.BR lspci (8), +.B PCI Express Base Specification (Lane Margining at Receiver) +.hy @@ -10,6 +10,7 @@ #include "lib/pci.h" #include "lib/sysdep.h" +#include "bitops.h" /* * gcc predefines macro __MINGW32__ for all MinGW targets. @@ -23,11 +24,12 @@ /* * On Windows only MinGW 3.0 and higher versions provides <getopt.h> * header file. Older MinGW versions and MSVC do not have it. + * DJGPP does not provide <getopt.h>. */ -#if defined(PCI_OS_WINDOWS) && !(defined(__MINGW32_MAJOR_VERSION) && __MINGW32_MAJOR_VERSION >= 3) +#if defined(PCI_OS_DJGPP) || (defined(PCI_OS_WINDOWS) && !(defined(__MINGW32_MAJOR_VERSION) && __MINGW32_MAJOR_VERSION >= 3)) #include "compat/getopt.h" #else -#include <unistd.h> +#include <getopt.h> #endif #define PCIUTILS_VERSION PCILIB_VERSION |