€•@µŒsphinx.addnodes”Œdocument”“”)”}”(Œ rawsource”Œ”Œchildren”]”(Œ translations”Œ LanguagesNode”“”)”}”(hhh]”(hŒ pending_xref”“”)”}”(hhh]”Œdocutils.nodes”ŒText”“”ŒChinese (Simplified)”…””}”Œparent”hsbaŒ attributes”}”(Œids”]”Œclasses”]”Œnames”]”Œdupnames”]”Œbackrefs”]”Œ refdomain”Œstd”Œreftype”Œdoc”Œ reftarget”Œ%/translations/zh_CN/virt/hyperv/vmbus”Œmodname”NŒ classname”NŒ refexplicit”ˆuŒtagname”hhh ubh)”}”(hhh]”hŒChinese (Traditional)”…””}”hh2sbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ%/translations/zh_TW/virt/hyperv/vmbus”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒItalian”…””}”hhFsbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ%/translations/it_IT/virt/hyperv/vmbus”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒJapanese”…””}”hhZsbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ%/translations/ja_JP/virt/hyperv/vmbus”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒKorean”…””}”hhnsbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ%/translations/ko_KR/virt/hyperv/vmbus”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubh)”}”(hhh]”hŒSpanish”…””}”hh‚sbah}”(h]”h ]”h"]”h$]”h&]”Œ refdomain”h)Œreftype”h+Œ reftarget”Œ%/translations/sp_SP/virt/hyperv/vmbus”Œmodname”NŒ classname”NŒ refexplicit”ˆuh1hhh ubeh}”(h]”h ]”h"]”h$]”h&]”Œcurrent_language”ŒEnglish”uh1h hhŒ _document”hŒsource”NŒline”NubhŒcomment”“”)”}”(hŒ SPDX-License-Identifier: GPL-2.0”h]”hŒ SPDX-License-Identifier: GPL-2.0”…””}”hh£sbah}”(h]”h ]”h"]”h$]”h&]”Œ xml:space”Œpreserve”uh1h¡hhhžhhŸŒ?/var/lib/git/docbuild/linux/Documentation/virt/hyperv/vmbus.rst”h KubhŒsection”“”)”}”(hhh]”(hŒtitle”“”)”}”(hŒVMBus”h]”hŒVMBus”…””}”(hh»hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hh¶hžhhŸh³h KubhŒ paragraph”“”)”}”(hX&VMBus is a software construct provided by Hyper-V to guest VMs. It consists of a control path and common facilities used by synthetic devices that Hyper-V presents to guest VMs. The control path is used to offer synthetic devices to the guest VM and, in some cases, to rescind those devices. The common facilities include software channels for communicating between the device driver in the guest VM and the synthetic device implementation that is part of Hyper-V, and signaling primitives to allow Hyper-V and the guest to interrupt each other.”h]”hX&VMBus is a software construct provided by Hyper-V to guest VMs. It consists of a control path and common facilities used by synthetic devices that Hyper-V presents to guest VMs. The control path is used to offer synthetic devices to the guest VM and, in some cases, to rescind those devices. The common facilities include software channels for communicating between the device driver in the guest VM and the synthetic device implementation that is part of Hyper-V, and signaling primitives to allow Hyper-V and the guest to interrupt each other.”…””}”(hhËhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khh¶hžhubhÊ)”}”(hXQVMBus is modeled in Linux as a bus, with the expected /sys/bus/vmbus entry in a running Linux guest. The VMBus driver (drivers/hv/vmbus_drv.c) establishes the VMBus control path with the Hyper-V host, then registers itself as a Linux bus driver. It implements the standard bus functions for adding and removing devices to/from the bus.”h]”hXQVMBus is modeled in Linux as a bus, with the expected /sys/bus/vmbus entry in a running Linux guest. The VMBus driver (drivers/hv/vmbus_drv.c) establishes the VMBus control path with the Hyper-V host, then registers itself as a Linux bus driver. It implements the standard bus functions for adding and removing devices to/from the bus.”…””}”(hhÙhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khh¶hžhubhÊ)”}”(hŒkMost synthetic devices offered by Hyper-V have a corresponding Linux device driver. These devices include:”h]”hŒkMost synthetic devices offered by Hyper-V have a corresponding Linux device driver. These devices include:”…””}”(hhçhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khh¶hžhubhŒ bullet_list”“”)”}”(hhh]”(hŒ list_item”“”)”}”(hŒSCSI controller”h]”hÊ)”}”(hhþh]”hŒSCSI controller”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khhüubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒNIC”h]”hÊ)”}”(hjh]”hŒNIC”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khjubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒGraphics frame buffer”h]”hÊ)”}”(hj,h]”hŒGraphics frame buffer”…””}”(hj.hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khj*ubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒKeyboard”h]”hÊ)”}”(hjCh]”hŒKeyboard”…””}”(hjEhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KhjAubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒMouse”h]”hÊ)”}”(hjZh]”hŒMouse”…””}”(hj\hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KhjXubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒPCI device pass-thru”h]”hÊ)”}”(hjqh]”hŒPCI device pass-thru”…””}”(hjshžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khjoubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒ Heartbeat”h]”hÊ)”}”(hjˆh]”hŒ Heartbeat”…””}”(hjŠhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khj†ubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒ Time Sync”h]”hÊ)”}”(hjŸh]”hŒ Time Sync”…””}”(hj¡hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Khjubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒShutdown”h]”hÊ)”}”(hj¶h]”hŒShutdown”…””}”(hj¸hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K hj´ubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒMemory balloon”h]”hÊ)”}”(hjÍh]”hŒMemory balloon”…””}”(hjÏhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K!hjËubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒ*Key/Value Pair (KVP) exchange with Hyper-V”h]”hÊ)”}”(hjäh]”hŒ*Key/Value Pair (KVP) exchange with Hyper-V”…””}”(hjæhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K"hjâubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubhû)”}”(hŒ#Hyper-V online backup (a.k.a. VSS) ”h]”hÊ)”}”(hŒ"Hyper-V online backup (a.k.a. VSS)”h]”hŒ"Hyper-V online backup (a.k.a. VSS)”…””}”(hjýhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K#hjùubah}”(h]”h ]”h"]”h$]”h&]”uh1húhh÷hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”Œbullet”Œ*”uh1hõhŸh³h Khh¶hžhubhÊ)”}”(hXQGuest VMs may have multiple instances of the synthetic SCSI controller, synthetic NIC, and PCI pass-thru devices. Other synthetic devices are limited to a single instance per VM. Not listed above are a small number of synthetic devices offered by Hyper-V that are used only by Windows guests and for which Linux does not have a driver.”h]”hXQGuest VMs may have multiple instances of the synthetic SCSI controller, synthetic NIC, and PCI pass-thru devices. Other synthetic devices are limited to a single instance per VM. Not listed above are a small number of synthetic devices offered by Hyper-V that are used only by Windows guests and for which Linux does not have a driver.”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K%hh¶hžhubhÊ)”}”(hX¹Hyper-V uses the terms "VSP" and "VSC" in describing synthetic devices. "VSP" refers to the Hyper-V code that implements a particular synthetic device, while "VSC" refers to the driver for the device in the guest VM. For example, the Linux driver for the synthetic NIC is referred to as "netvsc" and the Linux driver for the synthetic SCSI controller is "storvsc". These drivers contain functions with names like "storvsc_connect_to_vsp".”h]”hXÕHyper-V uses the terms “VSP†and “VSC†in describing synthetic devices. “VSP†refers to the Hyper-V code that implements a particular synthetic device, while “VSC†refers to the driver for the device in the guest VM. For example, the Linux driver for the synthetic NIC is referred to as “netvsc†and the Linux driver for the synthetic SCSI controller is “storvscâ€. These drivers contain functions with names like “storvsc_connect_to_vspâ€.”…””}”(hj'hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K,hh¶hžhubhµ)”}”(hhh]”(hº)”}”(hŒVMBus channels”h]”hŒVMBus channels”…””}”(hj8hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj5hžhhŸh³h K5ubhÊ)”}”(hXQAn instance of a synthetic device uses VMBus channels to communicate between the VSP and the VSC. Channels are bi-directional and used for passing messages. Most synthetic devices use a single channel, but the synthetic SCSI controller and synthetic NIC may use multiple channels to achieve higher performance and greater parallelism.”h]”hXQAn instance of a synthetic device uses VMBus channels to communicate between the VSP and the VSC. Channels are bi-directional and used for passing messages. Most synthetic devices use a single channel, but the synthetic SCSI controller and synthetic NIC may use multiple channels to achieve higher performance and greater parallelism.”…””}”(hjFhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K6hj5hžhubhÊ)”}”(hX†Each channel consists of two ring buffers. These are classic ring buffers from a university data structures textbook. If the read and writes pointers are equal, the ring buffer is considered to be empty, so a full ring buffer always has at least one byte unused. The "in" ring buffer is for messages from the Hyper-V host to the guest, and the "out" ring buffer is for messages from the guest to the Hyper-V host. In Linux, the "in" and "out" designations are as viewed by the guest side. The ring buffers are memory that is shared between the guest and the host, and they follow the standard paradigm where the memory is allocated by the guest, with the list of GPAs that make up the ring buffer communicated to the host. Each ring buffer consists of a header page (4 Kbytes) with the read and write indices and some control flags, followed by the memory for the actual ring. The size of the ring is determined by the VSC in the guest and is specific to each synthetic device. The list of GPAs making up the ring is communicated to the Hyper-V host over the VMBus control path as a GPA Descriptor List (GPADL). See function vmbus_establish_gpadl().”h]”hX–Each channel consists of two ring buffers. These are classic ring buffers from a university data structures textbook. If the read and writes pointers are equal, the ring buffer is considered to be empty, so a full ring buffer always has at least one byte unused. The “in†ring buffer is for messages from the Hyper-V host to the guest, and the “out†ring buffer is for messages from the guest to the Hyper-V host. In Linux, the “in†and “out†designations are as viewed by the guest side. The ring buffers are memory that is shared between the guest and the host, and they follow the standard paradigm where the memory is allocated by the guest, with the list of GPAs that make up the ring buffer communicated to the host. Each ring buffer consists of a header page (4 Kbytes) with the read and write indices and some control flags, followed by the memory for the actual ring. The size of the ring is determined by the VSC in the guest and is specific to each synthetic device. The list of GPAs making up the ring is communicated to the Hyper-V host over the VMBus control path as a GPA Descriptor List (GPADL). See function vmbus_establish_gpadl().”…””}”(hjThžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K 4 Kbytes, the header page must still be passed to Hyper-V as a 4 Kbyte area. But the memory for the actual ring must be aligned to PAGE_SIZE and have a size that is a multiple of PAGE_SIZE so that the duplicate mapping trick can be done. Hence a portion of the header page is unused and not communicated to Hyper-V. This case is handled by vmbus_establish_gpadl().”h]”hXŠOn arm64 with page sizes > 4 Kbytes, the header page must still be passed to Hyper-V as a 4 Kbyte area. But the memory for the actual ring must be aligned to PAGE_SIZE and have a size that is a multiple of PAGE_SIZE so that the duplicate mapping trick can be done. Hence a portion of the header page is unused and not communicated to Hyper-V. This case is handled by vmbus_establish_gpadl().”…””}”(hjphžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K[hj5hžhubhÊ)”}”(hXmHyper-V enforces a limit on the aggregate amount of guest memory that can be shared with the host via GPADLs. This limit ensures that a rogue guest can't force the consumption of excessive host resources. For Windows Server 2019 and later, this limit is approximately 1280 Mbytes. For versions prior to Windows Server 2019, the limit is approximately 384 Mbytes.”h]”hXoHyper-V enforces a limit on the aggregate amount of guest memory that can be shared with the host via GPADLs. This limit ensures that a rogue guest can’t force the consumption of excessive host resources. For Windows Server 2019 and later, this limit is approximately 1280 Mbytes. For versions prior to Windows Server 2019, the limit is approximately 384 Mbytes.”…””}”(hj~hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kbhj5hžhubeh}”(h]”Œvmbus-channels”ah ]”h"]”Œvmbus channels”ah$]”h&]”uh1h´hh¶hžhhŸh³h K5ubhµ)”}”(hhh]”(hº)”}”(hŒVMBus channel messages”h]”hŒVMBus channel messages”…””}”(hj—hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj”hžhhŸh³h KjubhÊ)”}”(hŒîAll messages sent in a VMBus channel have a standard header that includes the message length, the offset of the message payload, some flags, and a transactionID. The portion of the message after the header is unique to each VSP/VSC pair.”h]”hŒîAll messages sent in a VMBus channel have a standard header that includes the message length, the offset of the message payload, some flags, and a transactionID. The portion of the message after the header is unique to each VSP/VSC pair.”…””}”(hj¥hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kkhj”hžhubhÊ)”}”(hŒ$Messages follow one of two patterns:”h]”hŒ$Messages follow one of two patterns:”…””}”(hj³hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kphj”hžhubhö)”}”(hhh]”(hû)”}”(hŒSUnidirectional: Either side sends a message and does not expect a response message”h]”hÊ)”}”(hŒSUnidirectional: Either side sends a message and does not expect a response message”h]”hŒSUnidirectional: Either side sends a message and does not expect a response message”…””}”(hjÈhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KrhjÄubah}”(h]”h ]”h"]”h$]”h&]”uh1húhjÁhžhhŸh³h Nubhû)”}”(hŒWRequest/response: One side (usually the guest) sends a message and expects a response ”h]”hÊ)”}”(hŒVRequest/response: One side (usually the guest) sends a message and expects a response”h]”hŒVRequest/response: One side (usually the guest) sends a message and expects a response”…””}”(hjàhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KthjÜubah}”(h]”h ]”h"]”h$]”h&]”uh1húhjÁhžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”jjuh1hõhŸh³h Krhj”hžhubhÊ)”}”(hX!The transactionID (a.k.a. "requestID") is for matching requests & responses. Some synthetic devices allow multiple requests to be in- flight simultaneously, so the guest specifies a transactionID when sending a request. Hyper-V sends back the same transactionID in the matching response.”h]”hX%The transactionID (a.k.a. “requestIDâ€) is for matching requests & responses. Some synthetic devices allow multiple requests to be in- flight simultaneously, so the guest specifies a transactionID when sending a request. Hyper-V sends back the same transactionID in the matching response.”…””}”(hjúhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kwhj”hžhubhÊ)”}”(hXŽMessages passed between the VSP and VSC are control messages. For example, a message sent from the storvsc driver might be "execute this SCSI command". If a message also implies some data transfer between the guest and the Hyper-V host, the actual data to be transferred may be embedded with the control message, or it may be specified as a separate data buffer that the Hyper-V host will access as a DMA operation. The former case is used when the size of the data is small and the cost of copying the data to and from the ring buffer is minimal. For example, time sync messages from the Hyper-V host to the guest contain the actual time value. When the data is larger, a separate data buffer is used. In this case, the control message contains a list of GPAs that describe the data buffer. For example, the storvsc driver uses this approach to specify the data buffers to/from which disk I/O is done.”h]”hX’Messages passed between the VSP and VSC are control messages. For example, a message sent from the storvsc driver might be “execute this SCSI commandâ€. If a message also implies some data transfer between the guest and the Hyper-V host, the actual data to be transferred may be embedded with the control message, or it may be specified as a separate data buffer that the Hyper-V host will access as a DMA operation. The former case is used when the size of the data is small and the cost of copying the data to and from the ring buffer is minimal. For example, time sync messages from the Hyper-V host to the guest contain the actual time value. When the data is larger, a separate data buffer is used. In this case, the control message contains a list of GPAs that describe the data buffer. For example, the storvsc driver uses this approach to specify the data buffers to/from which disk I/O is done.”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K}hj”hžhubhÊ)”}”(hŒ5Three functions exist to send VMBus channel messages:”h]”hŒ5Three functions exist to send VMBus channel messages:”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KŒhj”hžhubhŒenumerated_list”“”)”}”(hhh]”(hû)”}”(hŒUvmbus_sendpacket(): Control-only messages and messages with embedded data -- no GPAs”h]”hÊ)”}”(hŒUvmbus_sendpacket(): Control-only messages and messages with embedded data -- no GPAs”h]”hŒUvmbus_sendpacket(): Control-only messages and messages with embedded data -- no GPAs”…””}”(hj-hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KŽhj)ubah}”(h]”h ]”h"]”h$]”h&]”uh1húhj&hžhhŸh³h Nubhû)”}”(hŒÎvmbus_sendpacket_pagebuffer(): Message with list of GPAs identifying data to transfer. An offset and length is associated with each GPA so that multiple discontinuous areas of guest memory can be targeted.”h]”hÊ)”}”(hŒÎvmbus_sendpacket_pagebuffer(): Message with list of GPAs identifying data to transfer. An offset and length is associated with each GPA so that multiple discontinuous areas of guest memory can be targeted.”h]”hŒÎvmbus_sendpacket_pagebuffer(): Message with list of GPAs identifying data to transfer. An offset and length is associated with each GPA so that multiple discontinuous areas of guest memory can be targeted.”…””}”(hjEhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KhjAubah}”(h]”h ]”h"]”h$]”h&]”uh1húhj&hžhhŸh³h Nubhû)”}”(hŒâvmbus_sendpacket_mpb_desc(): Message with list of GPAs identifying data to transfer. A single offset and length is associated with a list of GPAs. The GPAs must describe a single logical area of guest memory to be targeted. ”h]”hÊ)”}”(hŒávmbus_sendpacket_mpb_desc(): Message with list of GPAs identifying data to transfer. A single offset and length is associated with a list of GPAs. The GPAs must describe a single logical area of guest memory to be targeted.”h]”hŒávmbus_sendpacket_mpb_desc(): Message with list of GPAs identifying data to transfer. A single offset and length is associated with a list of GPAs. The GPAs must describe a single logical area of guest memory to be targeted.”…””}”(hj]hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K”hjYubah}”(h]”h ]”h"]”h$]”h&]”uh1húhj&hžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”Œenumtype”Œarabic”Œprefix”hŒsuffix”Œ.”uh1j$hj”hžhhŸh³h KŽubhÊ)”}”(hXaHistorically, Linux guests have trusted Hyper-V to send well-formed and valid messages, and Linux drivers for synthetic devices did not fully validate messages. With the introduction of processor technologies that fully encrypt guest memory and that allow the guest to not trust the hypervisor (AMD SEV-SNP, Intel TDX), trusting the Hyper-V host is no longer a valid assumption. The drivers for VMBus synthetic devices are being updated to fully validate any values read from memory that is shared with Hyper-V, which includes messages from VMBus devices. To facilitate such validation, messages read by the guest from the "in" ring buffer are copied to a temporary buffer that is not shared with Hyper-V. Validation is performed in this temporary buffer without the risk of Hyper-V maliciously modifying the message after it is validated but before it is used.”h]”hXeHistorically, Linux guests have trusted Hyper-V to send well-formed and valid messages, and Linux drivers for synthetic devices did not fully validate messages. With the introduction of processor technologies that fully encrypt guest memory and that allow the guest to not trust the hypervisor (AMD SEV-SNP, Intel TDX), trusting the Hyper-V host is no longer a valid assumption. The drivers for VMBus synthetic devices are being updated to fully validate any values read from memory that is shared with Hyper-V, which includes messages from VMBus devices. To facilitate such validation, messages read by the guest from the “in†ring buffer are copied to a temporary buffer that is not shared with Hyper-V. Validation is performed in this temporary buffer without the risk of Hyper-V maliciously modifying the message after it is validated but before it is used.”…””}”(hj|hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K™hj”hžhubeh}”(h]”Œvmbus-channel-messages”ah ]”h"]”Œvmbus channel messages”ah$]”h&]”uh1h´hh¶hžhhŸh³h Kjubhµ)”}”(hhh]”(hº)”}”(hŒ&Synthetic Interrupt Controller (synic)”h]”hŒ&Synthetic Interrupt Controller (synic)”…””}”(hj•hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj’hžhhŸh³h K©ubhÊ)”}”(hXFHyper-V provides each guest CPU with a synthetic interrupt controller that is used by VMBus for host-guest communication. While each synic defines 16 synthetic interrupts (SINT), Linux uses only one of the 16 (VMBUS_MESSAGE_SINT). All interrupts related to communication between the Hyper-V host and a guest CPU use that SINT.”h]”hXFHyper-V provides each guest CPU with a synthetic interrupt controller that is used by VMBus for host-guest communication. While each synic defines 16 synthetic interrupts (SINT), Linux uses only one of the 16 (VMBUS_MESSAGE_SINT). All interrupts related to communication between the Hyper-V host and a guest CPU use that SINT.”…””}”(hj£hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kªhj’hžhubhÊ)”}”(hXÛThe SINT is mapped to a single per-CPU architectural interrupt (i.e, an 8-bit x86/x64 interrupt vector, or an arm64 PPI INTID). Because each CPU in the guest has a synic and may receive VMBus interrupts, they are best modeled in Linux as per-CPU interrupts. This model works well on arm64 where a single per-CPU Linux IRQ is allocated for VMBUS_MESSAGE_SINT. This IRQ appears in /proc/interrupts as an IRQ labelled "Hyper-V VMbus". Since x86/x64 lacks support for per-CPU IRQs, an x86 interrupt vector is statically allocated (HYPERVISOR_CALLBACK_VECTOR) across all CPUs and explicitly coded to call vmbus_isr(). In this case, there's no Linux IRQ, and the interrupts are visible in aggregate in /proc/interrupts on the "HYP" line.”h]”hXåThe SINT is mapped to a single per-CPU architectural interrupt (i.e, an 8-bit x86/x64 interrupt vector, or an arm64 PPI INTID). Because each CPU in the guest has a synic and may receive VMBus interrupts, they are best modeled in Linux as per-CPU interrupts. This model works well on arm64 where a single per-CPU Linux IRQ is allocated for VMBUS_MESSAGE_SINT. This IRQ appears in /proc/interrupts as an IRQ labelled “Hyper-V VMbusâ€. Since x86/x64 lacks support for per-CPU IRQs, an x86 interrupt vector is statically allocated (HYPERVISOR_CALLBACK_VECTOR) across all CPUs and explicitly coded to call vmbus_isr(). In this case, there’s no Linux IRQ, and the interrupts are visible in aggregate in /proc/interrupts on the “HYP†line.”…””}”(hj±hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K°hj’hžhubhÊ)”}”(hXThe synic provides the means to demultiplex the architectural interrupt into one or more logical interrupts and route the logical interrupt to the proper VMBus handler in Linux. This demultiplexing is done by vmbus_isr() and related functions that access synic data structures.”h]”hXThe synic provides the means to demultiplex the architectural interrupt into one or more logical interrupts and route the logical interrupt to the proper VMBus handler in Linux. This demultiplexing is done by vmbus_isr() and related functions that access synic data structures.”…””}”(hj¿hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h K¼hj’hžhubhÊ)”}”(hX2The synic is not modeled in Linux as an irq chip or irq domain, and the demultiplexed logical interrupts are not Linux IRQs. As such, they don't appear in /proc/interrupts or /proc/irq. The CPU affinity for one of these logical interrupts is controlled via an entry under /sys/bus/vmbus as described below.”h]”hX4The synic is not modeled in Linux as an irq chip or irq domain, and the demultiplexed logical interrupts are not Linux IRQs. As such, they don’t appear in /proc/interrupts or /proc/irq. The CPU affinity for one of these logical interrupts is controlled via an entry under /sys/bus/vmbus as described below.”…””}”(hjÍhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KÁhj’hžhubeh}”(h]”Œ$synthetic-interrupt-controller-synic”ah ]”h"]”Œ&synthetic interrupt controller (synic)”ah$]”h&]”uh1h´hh¶hžhhŸh³h K©ubhµ)”}”(hhh]”(hº)”}”(hŒVMBus interrupts”h]”hŒVMBus interrupts”…””}”(hjæhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hjãhžhhŸh³h KÈubhÊ)”}”(hX VMBus provides a mechanism for the guest to interrupt the host when the guest has queued new messages in a ring buffer. The host expects that the guest will send an interrupt only when an "out" ring buffer transitions from empty to non-empty. If the guest sends interrupts at other times, the host deems such interrupts to be unnecessary. If a guest sends an excessive number of unnecessary interrupts, the host may throttle that guest by suspending its execution for a few seconds to prevent a denial-of-service attack.”h]”hXVMBus provides a mechanism for the guest to interrupt the host when the guest has queued new messages in a ring buffer. The host expects that the guest will send an interrupt only when an “out†ring buffer transitions from empty to non-empty. If the guest sends interrupts at other times, the host deems such interrupts to be unnecessary. If a guest sends an excessive number of unnecessary interrupts, the host may throttle that guest by suspending its execution for a few seconds to prevent a denial-of-service attack.”…””}”(hjôhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KÉhjãhžhubhÊ)”}”(hXSimilarly, the host will interrupt the guest via the synic when it sends a new message on the VMBus control path, or when a VMBus channel "in" ring buffer transitions from empty to non-empty due to the host inserting a new VMBus channel message. The control message stream and each VMBus channel "in" ring buffer are separate logical interrupts that are demultiplexed by vmbus_isr(). It demultiplexes by first checking for channel interrupts by calling vmbus_chan_sched(), which looks at a synic bitmap to determine which channels have pending interrupts on this CPU. If multiple channels have pending interrupts for this CPU, they are processed sequentially. When all channel interrupts have been processed, vmbus_isr() checks for and processes any messages received on the VMBus control path.”h]”hX#Similarly, the host will interrupt the guest via the synic when it sends a new message on the VMBus control path, or when a VMBus channel “in†ring buffer transitions from empty to non-empty due to the host inserting a new VMBus channel message. The control message stream and each VMBus channel “in†ring buffer are separate logical interrupts that are demultiplexed by vmbus_isr(). It demultiplexes by first checking for channel interrupts by calling vmbus_chan_sched(), which looks at a synic bitmap to determine which channels have pending interrupts on this CPU. If multiple channels have pending interrupts for this CPU, they are processed sequentially. When all channel interrupts have been processed, vmbus_isr() checks for and processes any messages received on the VMBus control path.”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KÒhjãhžhubhÊ)”}”(hŒËThe guest CPU that a VMBus channel will interrupt is selected by the guest when the channel is created, and the host is informed of that selection. VMBus devices are broadly grouped into two categories:”h]”hŒËThe guest CPU that a VMBus channel will interrupt is selected by the guest when the channel is created, and the host is informed of that selection. VMBus devices are broadly grouped into two categories:”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kßhjãhžhubj%)”}”(hhh]”(hû)”}”(hŒ÷"Slow" devices that need only one VMBus channel. The devices (such as keyboard, mouse, heartbeat, and timesync) generate relatively few interrupts. Their VMBus channels are all assigned to interrupt the VMBUS_CONNECT_CPU, which is always CPU 0. ”h]”hÊ)”}”(hŒö"Slow" devices that need only one VMBus channel. The devices (such as keyboard, mouse, heartbeat, and timesync) generate relatively few interrupts. Their VMBus channels are all assigned to interrupt the VMBUS_CONNECT_CPU, which is always CPU 0.”h]”hŒú“Slow†devices that need only one VMBus channel. The devices (such as keyboard, mouse, heartbeat, and timesync) generate relatively few interrupts. Their VMBus channels are all assigned to interrupt the VMBUS_CONNECT_CPU, which is always CPU 0.”…””}”(hj%hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kãhj!ubah}”(h]”h ]”h"]”h$]”h&]”uh1húhjhžhhŸh³h Nubhû)”}”(hX]"High speed" devices that may use multiple VMBus channels for higher parallelism and performance. These devices include the synthetic SCSI controller and synthetic NIC. Their VMBus channels interrupts are assigned to CPUs that are spread out among the available CPUs in the VM so that interrupts on multiple channels can be processed in parallel. ”h]”hÊ)”}”(hX\"High speed" devices that may use multiple VMBus channels for higher parallelism and performance. These devices include the synthetic SCSI controller and synthetic NIC. Their VMBus channels interrupts are assigned to CPUs that are spread out among the available CPUs in the VM so that interrupts on multiple channels can be processed in parallel.”h]”hX`“High speed†devices that may use multiple VMBus channels for higher parallelism and performance. These devices include the synthetic SCSI controller and synthetic NIC. Their VMBus channels interrupts are assigned to CPUs that are spread out among the available CPUs in the VM so that interrupts on multiple channels can be processed in parallel.”…””}”(hj=hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Kéhj9ubah}”(h]”h ]”h"]”h$]”h&]”uh1húhjhžhhŸh³h Nubeh}”(h]”h ]”h"]”h$]”h&]”jwjxjyhjzj{uh1j$hjãhžhhŸh³h KãubhÊ)”}”(hŒòThe assignment of VMBus channel interrupts to CPUs is done in the function init_vp_index(). This assignment is done outside of the normal Linux interrupt affinity mechanism, so the interrupts are neither "unmanaged" nor "managed" interrupts.”h]”hŒúThe assignment of VMBus channel interrupts to CPUs is done in the function init_vp_index(). This assignment is done outside of the normal Linux interrupt affinity mechanism, so the interrupts are neither “unmanaged†nor “managed†interrupts.”…””}”(hjWhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KðhjãhžhubhÊ)”}”(hX‹The CPU that a VMBus channel will interrupt can be seen in /sys/bus/vmbus/devices// channels//cpu. When running on later versions of Hyper-V, the CPU can be changed by writing a new value to this sysfs entry. Because VMBus channel interrupts are not Linux IRQs, there are no entries in /proc/interrupts or /proc/irq corresponding to individual VMBus channel interrupts.”h]”hX‹The CPU that a VMBus channel will interrupt can be seen in /sys/bus/vmbus/devices// channels//cpu. When running on later versions of Hyper-V, the CPU can be changed by writing a new value to this sysfs entry. Because VMBus channel interrupts are not Linux IRQs, there are no entries in /proc/interrupts or /proc/irq corresponding to individual VMBus channel interrupts.”…””}”(hjehžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KõhjãhžhubhÊ)”}”(hXAn online CPU in a Linux guest may not be taken offline if it has VMBus channel interrupts assigned to it. Any such channel interrupts must first be manually reassigned to another CPU as described above. When no channel interrupts are assigned to the CPU, it can be taken offline.”h]”hXAn online CPU in a Linux guest may not be taken offline if it has VMBus channel interrupts assigned to it. Any such channel interrupts must first be manually reassigned to another CPU as described above. When no channel interrupts are assigned to the CPU, it can be taken offline.”…””}”(hjshžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h KühjãhžhubhÊ)”}”(hX?The VMBus channel interrupt handling code is designed to work correctly even if an interrupt is received on a CPU other than the CPU assigned to the channel. Specifically, the code does not use CPU-based exclusion for correctness. In normal operation, Hyper-V will interrupt the assigned CPU. But when the CPU assigned to a channel is being changed via sysfs, the guest doesn't know exactly when Hyper-V will make the transition. The code must work correctly even if there is a time lag before Hyper-V starts interrupting the new CPU. See comments in target_cpu_store().”h]”hXAThe VMBus channel interrupt handling code is designed to work correctly even if an interrupt is received on a CPU other than the CPU assigned to the channel. Specifically, the code does not use CPU-based exclusion for correctness. In normal operation, Hyper-V will interrupt the assigned CPU. But when the CPU assigned to a channel is being changed via sysfs, the guest doesn’t know exactly when Hyper-V will make the transition. The code must work correctly even if there is a time lag before Hyper-V starts interrupting the new CPU. See comments in target_cpu_store().”…””}”(hjhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhjãhžhubeh}”(h]”Œvmbus-interrupts”ah ]”h"]”Œvmbus interrupts”ah$]”h&]”uh1h´hh¶hžhhŸh³h KÈubhµ)”}”(hhh]”(hº)”}”(hŒVMBus device creation/deletion”h]”hŒVMBus device creation/deletion”…””}”(hjšhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1h¹hj—hžhhŸh³h M ubhÊ)”}”(hŒÏHyper-V and the Linux guest have a separate message-passing path that is used for synthetic device creation and deletion. This path does not use a VMBus channel. See vmbus_post_msg() and vmbus_on_msg_dpc().”h]”hŒÏHyper-V and the Linux guest have a separate message-passing path that is used for synthetic device creation and deletion. This path does not use a VMBus channel. See vmbus_post_msg() and vmbus_on_msg_dpc().”…””}”(hj¨hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhj—hžhubhÊ)”}”(hX"The first step is for the guest to connect to the generic Hyper-V VMBus mechanism. As part of establishing this connection, the guest and Hyper-V agree on a VMBus protocol version they will use. This negotiation allows newer Linux kernels to run on older Hyper-V versions, and vice versa.”h]”hX"The first step is for the guest to connect to the generic Hyper-V VMBus mechanism. As part of establishing this connection, the guest and Hyper-V agree on a VMBus protocol version they will use. This negotiation allows newer Linux kernels to run on older Hyper-V versions, and vice versa.”…””}”(hj¶hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhj—hžhubhÊ)”}”(hXyThe guest then tells Hyper-V to "send offers". Hyper-V sends an offer message to the guest for each synthetic device that the VM is configured to have. Each VMBus device type has a fixed GUID known as the "class ID", and each VMBus device instance is also identified by a GUID. The offer message from Hyper-V contains both GUIDs to uniquely (within the VM) identify the device. There is one offer message for each device instance, so a VM with two synthetic NICs will get two offers messages with the NIC class ID. The ordering of offer messages can vary from boot-to-boot and must not be assumed to be consistent in Linux code. Offer messages may also arrive long after Linux has initially booted because Hyper-V supports adding devices, such as synthetic NICs, to running VMs. A new offer message is processed by vmbus_process_offer(), which indirectly invokes vmbus_add_channel_work().”h]”hXThe guest then tells Hyper-V to “send offersâ€. Hyper-V sends an offer message to the guest for each synthetic device that the VM is configured to have. Each VMBus device type has a fixed GUID known as the “class IDâ€, and each VMBus device instance is also identified by a GUID. The offer message from Hyper-V contains both GUIDs to uniquely (within the VM) identify the device. There is one offer message for each device instance, so a VM with two synthetic NICs will get two offers messages with the NIC class ID. The ordering of offer messages can vary from boot-to-boot and must not be assumed to be consistent in Linux code. Offer messages may also arrive long after Linux has initially booted because Hyper-V supports adding devices, such as synthetic NICs, to running VMs. A new offer message is processed by vmbus_process_offer(), which indirectly invokes vmbus_add_channel_work().”…””}”(hjÄhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h Mhj—hžhubhÊ)”}”(hŒÛUpon receipt of an offer message, the guest identifies the device type based on the class ID, and invokes the correct driver to set up the device. Driver/device matching is performed using the standard Linux mechanism.”h]”hŒÛUpon receipt of an offer message, the guest identifies the device type based on the class ID, and invokes the correct driver to set up the device. Driver/device matching is performed using the standard Linux mechanism.”…””}”(hjÒhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M(hj—hžhubhÊ)”}”(hXThe device driver probe function opens the primary VMBus channel to the corresponding VSP. It allocates guest memory for the channel ring buffers and shares the ring buffer with the Hyper-V host by giving the host a list of GPAs for the ring buffer memory. See vmbus_establish_gpadl().”h]”hXThe device driver probe function opens the primary VMBus channel to the corresponding VSP. It allocates guest memory for the channel ring buffers and shares the ring buffer with the Hyper-V host by giving the host a list of GPAs for the ring buffer memory. See vmbus_establish_gpadl().”…””}”(hjàhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M-hj—hžhubhÊ)”}”(hX½Once the ring buffer is set up, the device driver and VSP exchange setup messages via the primary channel. These messages may include negotiating the device protocol version to be used between the Linux VSC and the VSP on the Hyper-V host. The setup messages may also include creating additional VMBus channels, which are somewhat mis-named as "sub-channels" since they are functionally equivalent to the primary channel once they are created.”h]”hXÁOnce the ring buffer is set up, the device driver and VSP exchange setup messages via the primary channel. These messages may include negotiating the device protocol version to be used between the Linux VSC and the VSP on the Hyper-V host. The setup messages may also include creating additional VMBus channels, which are somewhat mis-named as “sub-channels†since they are functionally equivalent to the primary channel once they are created.”…””}”(hjîhžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M3hj—hžhubhÊ)”}”(hŒPFinally, the device driver may create entries in /dev as with any device driver.”h]”hŒPFinally, the device driver may create entries in /dev as with any device driver.”…””}”(hjühžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M;hj—hžhubhÊ)”}”(hXThe Hyper-V host can send a "rescind" message to the guest to remove a device that was previously offered. Linux drivers must handle such a rescind message at any time. Rescinding a device invokes the device driver "remove" function to cleanly shut down the device and remove it. Once a synthetic device is rescinded, neither Hyper-V nor Linux retains any state about its previous existence. Such a device might be re-added later, in which case it is treated as an entirely new device. See vmbus_onoffer_rescind().”h]”hX The Hyper-V host can send a “rescind†message to the guest to remove a device that was previously offered. Linux drivers must handle such a rescind message at any time. Rescinding a device invokes the device driver “remove†function to cleanly shut down the device and remove it. Once a synthetic device is rescinded, neither Hyper-V nor Linux retains any state about its previous existence. Such a device might be re-added later, in which case it is treated as an entirely new device. See vmbus_onoffer_rescind().”…””}”(hj hžhhŸNh Nubah}”(h]”h ]”h"]”h$]”h&]”uh1hÉhŸh³h M>hj—hžhubeh}”(h]”Œvmbus-device-creation-deletion”ah ]”h"]”Œvmbus device creation/deletion”ah$]”h&]”uh1h´hh¶hžhhŸh³h M ubeh}”(h]”Œvmbus”ah ]”h"]”Œvmbus”ah$]”h&]”uh1h´hhhžhhŸh³h Kubeh}”(h]”h ]”h"]”h$]”h&]”Œsource”h³uh1hŒcurrent_source”NŒ current_line”NŒsettings”Œdocutils.frontend”ŒValues”“”)”}”(h¹NŒ generator”NŒ datestamp”NŒ source_link”NŒ source_url”NŒ toc_backlinks”Œentry”Œfootnote_backlinks”KŒ sectnum_xform”KŒstrip_comments”NŒstrip_elements_with_classes”NŒ strip_classes”NŒ report_level”KŒ halt_level”KŒexit_status_level”KŒdebug”NŒwarning_stream”NŒ traceback”ˆŒinput_encoding”Œ utf-8-sig”Œinput_encoding_error_handler”Œstrict”Œoutput_encoding”Œutf-8”Œoutput_encoding_error_handler”jKŒerror_encoding”Œutf-8”Œerror_encoding_error_handler”Œbackslashreplace”Œ language_code”Œen”Œrecord_dependencies”NŒconfig”NŒ id_prefix”hŒauto_id_prefix”Œid”Œ dump_settings”NŒdump_internals”NŒdump_transforms”NŒdump_pseudo_xml”NŒexpose_internals”NŒstrict_visitor”NŒ_disable_config”NŒ_source”h³Œ _destination”NŒ _config_files”]”Œ7/var/lib/git/docbuild/linux/Documentation/docutils.conf”aŒfile_insertion_enabled”ˆŒ raw_enabled”KŒline_length_limit”M'Œpep_references”NŒ pep_base_url”Œhttps://peps.python.org/”Œpep_file_url_template”Œpep-%04d”Œrfc_references”NŒ rfc_base_url”Œ&https://datatracker.ietf.org/doc/html/”Œ tab_width”KŒtrim_footnote_reference_space”‰Œsyntax_highlight”Œlong”Œ smart_quotes”ˆŒsmartquotes_locales”]”Œcharacter_level_inline_markup”‰Œdoctitle_xform”‰Œ docinfo_xform”KŒsectsubtitle_xform”‰Œ image_loading”Œlink”Œembed_stylesheet”‰Œcloak_email_addresses”ˆŒsection_self_link”‰Œenv”NubŒreporter”NŒindirect_targets”]”Œsubstitution_defs”}”Œsubstitution_names”}”Œrefnames”}”Œrefids”}”Œnameids”}”(j%j"j‘jŽjjŒjàjÝj”j‘jjuŒ nametypes”}”(j%‰j‘‰j‰jà‰j”‰j‰uh}”(j"h¶jŽj5jŒj”jÝj’j‘jãjj—uŒ footnote_refs”}”Œ citation_refs”}”Œ autofootnotes”]”Œautofootnote_refs”]”Œsymbol_footnotes”]”Œsymbol_footnote_refs”]”Œ footnotes”]”Œ citations”]”Œautofootnote_start”KŒsymbol_footnote_start”KŒ id_counter”Œ collections”ŒCounter”“”}”…”R”Œparse_messages”]”Œtransform_messages”]”Œ transformer”NŒ include_log”]”Œ decoration”Nhžhub.