Skip to content

Instantly share code, notes, and snippets.

@CRTified
Last active May 5, 2024 15:57
Show Gist options
  • Save CRTified/43b7ce84cd238673f7f24652c85980b3 to your computer and use it in GitHub Desktop.
Save CRTified/43b7ce84cd238673f7f24652c85980b3 to your computer and use it in GitHub Desktop.
VFIO Passthrough on NixOS

VFIO Setup on NixOS

Disclaimer: Nobody else tested my setup so far, so this is a "works on my machine" scenario. I am not responsible for anything you break on your machine (although I'd not expect much harm).

Hardware

My system has the following hardware:

  • Board: ASRock X570 Pro4
  • Processor: AMD Ryzen 7 3700x
  • GPU (Primary): Palit GeForce GTX1080 Dual OC (at 0000:01:00.*, IDs are 10de:1b80 and 10de:10f0)
  • GPU (Secondary): AMD Radeon Pro WX3100 (at 0000:02:00.*, IDs are not relevant)

Files

  • usage.nix contains the relevant snippet of my configuration where I use preexisting modules and my custom ones.
  • win_vm.xml contains the libvirtd xml dump of my Windows 10 guest
    • It is customized to my setup, I only use this one for gaming
  • virtualisation.nix is a module to augment the virtualisations subtree by the ability to create shared memory files. This is required for looking-glass and pulseaudio-scream (or pulseaudio-ivshmem)
    • Audio integration is not tested by me. Last time I checked, neither pa-scream nor pa-ivshmem were available and it did not annoy me enough to play around with it
  • vfio.nix contains the biggest part of the config. It allows setting a few properties like:
    • IOMMUType, either intel or amd, sets the apropriate kernel parameters for IOMMU
    • devices, which is a list of PCI IDs that shall be bound to vfio-pci
    • disableEFIfb disables the EFI framebuffer. I pass through my primary GPU, so I need to prevent the kernel from touching it
    • blacklistNvidia additionally blacklists nvidia and nouveau kernel modules
    • ignoreMSRs toggles kvm.ignore_msrs as a kernel parameter
    • applyACSpatch applies the well known ACS patch to weaken the IOMMU grouping. IMPORTANT: This results in a kernel compilation in most cases.
  • libvirt.nix adds twovirtualisation.libvirtd options
    • deviceACL adds devices to the cgroup_device_acl option, which is often required to access the devices from qemu
    • clearEmulationCapabilities toggles the clear_emulation_capabilities setting for qemu

TODO

  • Bake the libvirt xml file into the system configuration
{ lib, pkgs, config, ... }:
with lib;
let
cfg = config.virtualisation.libvirtd;
boolToZeroOne = x: if x then "1" else "0";
aclString = with lib.strings;
concatMapStringsSep ''
,
'' escapeNixString cfg.deviceACL;
in {
options.virtualisation.libvirtd = {
deviceACL = mkOption {
type = types.listOf types.str;
default = [ ];
};
clearEmulationCapabilities = mkOption {
type = types.bool;
default = true;
};
};
# Add qemu-libvirtd to the input group if required
config.users.users."qemu-libvirtd" = {
extraGroups = optionals (!cfg.qemuRunAsRoot) [ "kvm" "input" ];
isSystemUser = true;
};
config.virtualisation.libvirtd.qemuVerbatimConfig = ''
clear_emulation_capabilities = ${
boolToZeroOne cfg.clearEmulationCapabilities
}
cgroup_device_acl = [
${aclString}
]
'';
}
{
# TODO: Use a hook so that it starts only *after* the shmem device is initialized
systemd.user.services.scream-ivshmem = {
enable = true;
description = "Scream IVSHMEM";
serviceConfig = {
ExecStart =
"${pkgs.scream}/bin/scream-ivshmem-pulse /dev/shm/scream";
Restart = "always";
};
wantedBy = [ "multi-user.target" ];
requires = [ "pulseaudio.service" ];
};
virtualisation = {
sharedMemoryFiles = {
scream = {
user = "richard";
group = "qemu-libvirtd";
mode = "666";
};
looking-glass = {
user = "richard";
group = "qemu-libvirtd";
mode = "666";
};
};
libvirtd = {
enable = true;
qemuOvmf = true;
qemuRunAsRoot = false;
onBoot = "ignore";
onShutdown = "shutdown";
clearEmulationCapabilities = false;
deviceACL = [
"/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.4:1.0-event-mouse" # Trackball
"/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.3:1.0-event-kbd" # Tastatur
"/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.3:1.1-event-mouse" # Tastatur
"/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.3:1.1-mouse" # Tastatur
"/dev/vfio/vfio"
"/dev/vfio/2"
"/dev/vfio/6"
"/dev/kvm"
"/dev/shm/scream"
"/dev/shm/looking-glass"
];
};
vfio = {
enable = true;
IOMMUType = "amd";
devices = [ "10de:1b80" "10de:10f0" ];
blacklistNvidia = true;
disableEFIfb = false;
ignoreMSRs = true;
applyACSpatch = false;
};
hugepages = {
enable = true;
defaultPageSize = "1G";
pageSize = "1G";
numPages = 16;
};
};
}
{ lib, pkgs, config, ... }:
with lib;
let
cfg = config.virtualisation.vfio;
acscommit = "1ec4cb0753488353e111496a90bdfbe2a074827e";
in {
options.virtualisation.vfio = {
enable = mkEnableOption "VFIO Configuration";
IOMMUType = mkOption {
type = types.enum [ "intel" "amd" ];
example = "intel";
description = "Type of the IOMMU used";
};
devices = mkOption {
type = types.listOf (types.strMatching "[0-9a-f]{4}:[0-9a-f]{4}");
default = [ ];
example = [ "10de:1b80" "10de:10f0" ];
description = "PCI IDs of devices to bind to vfio-pci";
};
disableEFIfb = mkOption {
type = types.bool;
default = false;
example = true;
description = "Disables the usage of the EFI framebuffer on boot.";
};
blacklistNvidia = mkOption {
type = types.bool;
default = false;
description = "Add Nvidia GPU modules to blacklist";
};
ignoreMSRs = mkOption {
type = types.bool;
default = false;
example = true;
description =
"Enables or disables kvm guest access to model-specific registers";
};
applyACSpatch = mkOption {
type = types.bool;
default = false;
description = ''
If set, the following things will happen:
- The ACS override patch is applied
- Applies the i915-vga-arbiter patch
- Adds pcie_acs_override=downstream to the command line
'';
};
};
config = lib.mkIf cfg.enable {
services.udev.extraRules = ''
SUBSYSTEM=="vfio", OWNER="root", GROUP="kvm"
'';
boot.kernelParams = (if cfg.IOMMUType == "intel" then [
"intel_iommu=on"
"intel_iommu=igfx_off"
] else
[ "amd_iommu=on" ]) ++ (optional (builtins.length cfg.devices > 0)
("vfio-pci.ids=" + builtins.concatStringsSep "," cfg.devices))
++ (optionals cfg.applyACSpatch [
"pcie_acs_override=downstream,multifunction"
"pci=nomsi"
]) ++ (optional cfg.disableEFIfb "video=efifb:off")
++ (optionals cfg.ignoreMSRs [
"kvm.ignore_msrs=1"
"kvm.report_ignored_msrs=0"
]);
boot.kernelModules = [ "vfio_virqfd" "vfio_pci" "vfio_iommu_type1" "vfio" ];
boot.initrd.kernelModules =
[ "vfio_virqfd" "vfio_pci" "vfio_iommu_type1" "vfio" ];
boot.blacklistedKernelModules =
optionals cfg.blacklistNvidia [ "nvidia" "nouveau" ];
boot.kernelPatches = optionals cfg.applyACSpatch [
{
name = "add-acs-overrides";
patch = pkgs.fetchurl {
name = "add-acs-overrides.patch";
url =
"https://raw.githubusercontent.com/slowbro/linux-vfio/v5.5.4-arch1/add-acs-overrides.patch";
#url =
# "https://aur.archlinux.org/cgit/aur.git/plain/add-acs-overrides.patch?h=linux-vfio&id=${acscommit}";
sha256 = "0nbmc5bwv7pl84l1mfhacvyp8vnzwhar0ahqgckvmzlhgf1n1bii";
};
}
{
name = "i915-vga-arbiter";
patch = pkgs.fetchurl {
name = "i915-vga-arbiter.patch";
url =
"https://raw.githubusercontent.com/slowbro/linux-vfio/v5.5.4-arch1/i915-vga-arbiter.patch";
#url =
# "https://aur.archlinux.org/cgit/aur.git/plain/i915-vga-arbiter.patch?h=linux-vfio&id=${acscommit}";
sha256 = "1m5nn9pfkf685g31y31ip70jv61sblvxgskqn8a0ca60mmr38krk";
};
}
];
};
}
{ lib, pkgs, config, ... }:
with lib;
let
cfg = config.virtualisation;
tmpfileEntry = name: f: "f /dev/shm/${name} ${f.mode} ${f.user} ${f.group} -";
in {
options.virtualisation = {
sharedMemoryFiles = mkOption {
type = types.attrsOf (types.submodule ({ name, ... }: {
options = {
name = mkOption {
visible = false;
default = name;
type = types.str;
};
user = mkOption {
type = types.str;
default = "root";
description = "Owner of the memory file";
};
group = mkOption {
type = types.str;
default = "root";
description = "Group of the memory file";
};
mode = mkOption {
type = types.str;
default = "0600";
description = "Group of the memory file";
};
};
}));
default = { };
};
hugepages = {
enable = mkEnableOption "Hugepages";
defaultPageSize = mkOption {
type = types.strMatching "[0-9]*[kKmMgG]";
default = "1M";
description =
"Default size of huge pages. You can use suffixes K, M, and G to specify KB, MB, and GB.";
};
pageSize = mkOption {
type = types.strMatching "[0-9]*[kKmMgG]";
default = "1M";
description =
"Size of huge pages that are allocated at boot. You can use suffixes K, M, and G to specify KB, MB, and GB.";
};
numPages = mkOption {
type = types.ints.positive;
default = 1;
description = "Number of huge pages to allocate at boot.";
};
};
};
config.systemd.tmpfiles.rules =
mapAttrsToList (tmpfileEntry) cfg.sharedMemoryFiles;
config.boot.kernelParams = optionals cfg.hugepages.enable [
"default_hugepagesz=${cfg.hugepages.defaultPageSize}"
"hugepagesz=${cfg.hugepages.pageSize}"
"hugepages=${toString cfg.hugepages.numPages}"
];
}
<domain type='kvm' xmlns:qemu='http://libvirt.org/schemas/domain/qemu/1.0'>
<name>vm_gaming</name>
<uuid>fd888352-4004-4047-a6e9-1aa45d6cd461</uuid>
<memory unit='KiB'>16777216</memory>
<currentMemory unit='KiB'>16777216</currentMemory>
<memoryBacking>
<hugepages/>
</memoryBacking>
<vcpu placement='static'>8</vcpu>
<iothreads>1</iothreads>
<cputune>
<vcpupin vcpu='0' cpuset='4'/>
<vcpupin vcpu='1' cpuset='5'/>
<vcpupin vcpu='2' cpuset='6'/>
<vcpupin vcpu='3' cpuset='7'/>
<emulatorpin cpuset='0-3'/>
<iothreadpin iothread='1' cpuset='0-3'/>
</cputune>
<os>
<type arch='x86_64' machine='pc-q35-3.0'>hvm</type>
<loader readonly='yes' type='pflash'>/run/libvirt/nix-ovmf/OVMF_CODE.fd</loader>
<nvram template='/run/libvirt/nix-ovmf/OVMF_VARS.fd'>/var/lib/libvirt/VARS_vm_gaming.fd</nvram>
<boot dev='hd'/>
</os>
<features>
<acpi/>
<apic/>
<hyperv>
<relaxed state='on'/>
<vapic state='on'/>
<spinlocks state='on' retries='8191'/>
<vpindex state='on'/>
<runtime state='on'/>
<synic state='on'/>
<stimer state='on'/>
<reset state='on'/>
<vendor_id state='on' value='whatever'/>
</hyperv>
<kvm>
<hidden state='on'/>
</kvm>
<vmport state='off'/>
</features>
<cpu mode='custom' match='exact' check='none'>
<model fallback='allow'>EPYC</model>
<topology sockets='1' dies='1' cores='4' threads='2'/>
</cpu>
<clock offset='localtime'>
<timer name='rtc' tickpolicy='catchup' track='guest'/>
<timer name='pit' tickpolicy='delay'/>
<timer name='hpet' present='no'/>
<timer name='hypervclock' present='yes'/>
</clock>
<on_poweroff>destroy</on_poweroff>
<on_reboot>restart</on_reboot>
<on_crash>restart</on_crash>
<pm>
<suspend-to-mem enabled='no'/>
<suspend-to-disk enabled='no'/>
</pm>
<devices>
<emulator>/run/libvirt/nix-emulators/qemu-kvm</emulator>
<disk type='block' device='disk'>
<driver name='qemu' type='raw' cache='writeback' io='threads' discard='unmap' detect_zeroes='on'/>
<source dev='/dev/nvme_vg/vm_windows'/>
<target dev='sda' bus='scsi'/>
<address type='drive' controller='0' bus='0' target='0' unit='0'/>
</disk>
<controller type='pci' index='0' model='pcie-root'/>
<controller type='pci' index='1' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='1' port='0x8'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x0' multifunction='on'/>
</controller>
<controller type='pci' index='2' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='2' port='0x9'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x1'/>
</controller>
<controller type='pci' index='3' model='pcie-root-port'>
<model name='ioh3420'/>
<target chassis='3' port='0xa'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x2'/>
</controller>
<controller type='pci' index='4' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='4' port='0xb'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x3'/>
</controller>
<controller type='pci' index='5' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='5' port='0xc'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x4'/>
</controller>
<controller type='pci' index='6' model='pcie-to-pci-bridge'>
<model name='pcie-pci-bridge'/>
<address type='pci' domain='0x0000' bus='0x04' slot='0x00' function='0x0'/>
</controller>
<controller type='pci' index='7' model='pci-bridge'>
<model name='pci-bridge'/>
<target chassisNr='7'/>
<address type='pci' domain='0x0000' bus='0x06' slot='0x01' function='0x0'/>
</controller>
<controller type='pci' index='8' model='pci-bridge'>
<model name='pci-bridge'/>
<target chassisNr='8'/>
<address type='pci' domain='0x0000' bus='0x06' slot='0x02' function='0x0'/>
</controller>
<controller type='pci' index='9' model='pci-bridge'>
<model name='pci-bridge'/>
<target chassisNr='9'/>
<address type='pci' domain='0x0000' bus='0x06' slot='0x03' function='0x0'/>
</controller>
<controller type='pci' index='10' model='pci-bridge'>
<model name='pci-bridge'/>
<target chassisNr='10'/>
<address type='pci' domain='0x0000' bus='0x06' slot='0x04' function='0x0'/>
</controller>
<controller type='pci' index='11' model='pci-bridge'>
<model name='pci-bridge'/>
<target chassisNr='11'/>
<address type='pci' domain='0x0000' bus='0x06' slot='0x05' function='0x0'/>
</controller>
<controller type='pci' index='12' model='pcie-root-port'>
<model name='pcie-root-port'/>
<target chassis='12' port='0xd'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x01' function='0x5'/>
</controller>
<controller type='virtio-serial' index='0'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x06' function='0x0'/>
</controller>
<controller type='usb' index='0' model='nec-xhci'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x05' function='0x0'/>
</controller>
<controller type='scsi' index='0' model='virtio-scsi'>
<driver queues='8' iothread='1'/>
<address type='pci' domain='0x0000' bus='0x02' slot='0x00' function='0x0'/>
</controller>
<controller type='sata' index='0'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x1f' function='0x2'/>
</controller>
<interface type='bridge'>
<mac address='52:54:00:b7:07:69'/>
<source bridge='br0'/>
<model type='virtio'/>
<address type='pci' domain='0x0000' bus='0x00' slot='0x03' function='0x0'/>
</interface>
<channel type='spicevmc'>
<target type='virtio' name='com.redhat.spice.0'/>
<address type='virtio-serial' controller='0' bus='0' port='2'/>
</channel>
<input type='keyboard' bus='virtio'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x0a' function='0x0'/>
</input>
<input type='mouse' bus='virtio'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x0b' function='0x0'/>
</input>
<input type='mouse' bus='ps2'/>
<input type='keyboard' bus='ps2'/>
<input type='tablet' bus='virtio'>
<address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
</input>
<graphics type='spice' port='5900' autoport='no' listen='127.0.0.1'>
<listen type='address' address='127.0.0.1'/>
<image compression='off'/>
<gl enable='no' rendernode='/dev/dri/by-path/pci-0000:04:00.0-render'/>
</graphics>
<video>
<model type='virtio' heads='1' primary='yes'>
<acceleration accel3d='yes'/>
</model>
<address type='pci' domain='0x0000' bus='0x05' slot='0x00' function='0x0'/>
</video>
<hostdev mode='subsystem' type='pci' managed='yes'>
<source>
<address domain='0x0000' bus='0x09' slot='0x00' function='0x0'/>
</source>
<rom file='/etc/nixos/misc/10de:1b80.rom'/>
<address type='pci' domain='0x0000' bus='0x03' slot='0x00' function='0x0'/>
</hostdev>
<redirdev bus='usb' type='spicevmc'>
<address type='usb' bus='0' port='2'/>
</redirdev>
<redirdev bus='usb' type='spicevmc'>
<address type='usb' bus='0' port='4'/>
</redirdev>
<memballoon model='virtio'>
<address type='pci' domain='0x0000' bus='0x00' slot='0x08' function='0x0'/>
</memballoon>
<shmem name='scream'>
<model type='ivshmem-plain'/>
<size unit='M'>2</size>
<address type='pci' domain='0x0000' bus='0x06' slot='0x06' function='0x0'/>
</shmem>
<shmem name='looking-glass'>
<model type='ivshmem-plain'/>
<size unit='M'>32</size>
<address type='pci' domain='0x0000' bus='0x00' slot='0x10' function='0x0'/>
</shmem>
</devices>
<qemu:commandline>
<qemu:arg value='-object'/>
<qemu:arg value='input-linux,id=mouse0,evdev=/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.4:1.0-event-mouse'/>
<qemu:arg value='-object'/>
<qemu:arg value='input-linux,id=keyboard0,evdev=/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.3:1.0-event-kbd,grab_all=on,repeat=on'/>
<qemu:arg value='-object'/>
<qemu:arg value='input-linux,id=mouse1,evdev=/dev/input/by-path/pci-0000:0b:00.3-usb-0:2.2.3:1.1-event-mouse'/>
</qemu:commandline>
</domain>
@CRTified
Copy link
Author

CRTified commented Dec 6, 2019

If you want to reach me in the #nixos channel on the freenode IRC server: I'm CRTified there.

@CRTified
Copy link
Author

CRTified commented Mar 3, 2020

By the way, I switched to pulse-scream for audio. Still have to update this gist.

@adamlwgriffiths
Copy link

Have you had any issues with 21.05?
My Windows VM works with single gpu-passthrough on 20.09, but updating to 21.05 breaks it. I can't get the GPU to detach from the host.

@CRTified
Copy link
Author

@adamlwgriffiths I'm using dual GPU passthrough, passing the first GPU to the VM. I'm on unstable and had problems at one time, which I was able to fix by using the latest kernel. I've also switched from Intel to AMD (as the 7700k failed and replacing it was not reasonable), using the X570 chipset with a Ryzen 7 3700x.

@CRTified
Copy link
Author

The modules are now available in my NUR repository

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment