Print this page
OS-6546 Use PCID if KPTI is enabled


  75  * stacks, since they can interrupt another ISR at any time. These stacks are
  76  * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in
  77  * their trampolines (and do it unconditionally), and don't bother pivoting
  78  * away. We're either going into the panic() path, or we're going to return
  79  * straight away without rescheduling, so it's fine to not be on our real
  80  * kthread stack (and some of the state we want to go find it with might be
  81  * corrupt!)
  82  *
  83  * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a
  84  * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to
  85  * point at the PML4 for kas early in boot and never touch it again. Hopefully
  86  * it survives whatever corruption brings down the rest of the kernel!
  87  *
  88  * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
  89  * cases) in that they do not push an interrupt frame (and also have some other
  90  * effects). In the syscall trampolines, we assume that we can only be taking
  91  * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
  92  * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
  93  * existing %rsp pivot untouched) -- instead we spill registers into
  94  * %gs:CPU_KPTI_* as we need to.



  95  */
  96 
  97 /*
  98  * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you
  99  * fix bugs here check to see if they should be fixed there as well.
 100  */
 101 
 102 #include <sys/asm_linkage.h>
 103 #include <sys/asm_misc.h>
 104 #include <sys/regset.h>
 105 #include <sys/privregs.h>
 106 #include <sys/psw.h>
 107 #include <sys/machbrand.h>
 108 #include <sys/param.h>
 109 
 110 #if defined(__lint)
 111 
 112 #include <sys/types.h>
 113 #include <sys/thread.h>
 114 #include <sys/systm.h>


 688         MKIVCT(176);    MKIVCT(177);    MKIVCT(178);    MKIVCT(179);
 689         MKIVCT(180);    MKIVCT(181);    MKIVCT(182);    MKIVCT(183);
 690         MKIVCT(184);    MKIVCT(185);    MKIVCT(186);    MKIVCT(187);
 691         MKIVCT(188);    MKIVCT(189);    MKIVCT(190);    MKIVCT(191);
 692         MKIVCT(192);    MKIVCT(193);    MKIVCT(194);    MKIVCT(195);
 693         MKIVCT(196);    MKIVCT(197);    MKIVCT(198);    MKIVCT(199);
 694         MKIVCT(200);    MKIVCT(201);    MKIVCT(202);    MKIVCT(203);
 695         MKIVCT(204);    MKIVCT(205);    MKIVCT(206);    MKIVCT(207);
 696         MKIVCT(208);    MKIVCT(209);    MKIVCT(210);    MKIVCT(211);
 697         MKIVCT(212);    MKIVCT(213);    MKIVCT(214);    MKIVCT(215);
 698         MKIVCT(216);    MKIVCT(217);    MKIVCT(218);    MKIVCT(219);
 699         MKIVCT(220);    MKIVCT(221);    MKIVCT(222);    MKIVCT(223);
 700         MKIVCT(224);    MKIVCT(225);    MKIVCT(226);    MKIVCT(227);
 701         MKIVCT(228);    MKIVCT(229);    MKIVCT(230);    MKIVCT(231);
 702         MKIVCT(232);    MKIVCT(233);    MKIVCT(234);    MKIVCT(235);
 703         MKIVCT(236);    MKIVCT(237);    MKIVCT(238);    MKIVCT(239);
 704         MKIVCT(240);    MKIVCT(241);    MKIVCT(242);    MKIVCT(243);
 705         MKIVCT(244);    MKIVCT(245);    MKIVCT(246);    MKIVCT(247);
 706         MKIVCT(248);    MKIVCT(249);    MKIVCT(250);    MKIVCT(251);
 707         MKIVCT(252);    MKIVCT(253);    MKIVCT(254);    MKIVCT(255);
































 708 
 709 .align MMU_PAGESIZE
 710 .global kpti_tramp_end
 711 kpti_tramp_end:
 712         nop
 713 
 714 #endif  /* __lint */


  75  * stacks, since they can interrupt another ISR at any time. These stacks are
  76  * full-sized, however, and not a little kpti_frame struct. We only set %cr3 in
  77  * their trampolines (and do it unconditionally), and don't bother pivoting
  78  * away. We're either going into the panic() path, or we're going to return
  79  * straight away without rescheduling, so it's fine to not be on our real
  80  * kthread stack (and some of the state we want to go find it with might be
  81  * corrupt!)
  82  *
  83  * Finally, for these "special" interrupts (NMI/MCE/double fault) we use a
  84  * special %cr3 value we stash here in the text (kpti_safe_cr3). We set this to
  85  * point at the PML4 for kas early in boot and never touch it again. Hopefully
  86  * it survives whatever corruption brings down the rest of the kernel!
  87  *
  88  * Syscalls are different to interrupts (at least in the SYSENTER/SYSCALL64
  89  * cases) in that they do not push an interrupt frame (and also have some other
  90  * effects). In the syscall trampolines, we assume that we can only be taking
  91  * the call from userland and use SWAPGS and an unconditional overwrite of %cr3.
  92  * We do not do any stack pivoting for syscalls (and we leave SYSENTER's
  93  * existing %rsp pivot untouched) -- instead we spill registers into
  94  * %gs:CPU_KPTI_* as we need to.
  95  *
  96  * Note that the normal %cr3 values do not cause invalidations with PCIDE - see
  97  * hat_switch().
  98  */
  99 
 100 /*
 101  * The macros here mostly line up with what's in kdi_idthdl.s, too, so if you
 102  * fix bugs here check to see if they should be fixed there as well.
 103  */
 104 
 105 #include <sys/asm_linkage.h>
 106 #include <sys/asm_misc.h>
 107 #include <sys/regset.h>
 108 #include <sys/privregs.h>
 109 #include <sys/psw.h>
 110 #include <sys/machbrand.h>
 111 #include <sys/param.h>
 112 
 113 #if defined(__lint)
 114 
 115 #include <sys/types.h>
 116 #include <sys/thread.h>
 117 #include <sys/systm.h>


 691         MKIVCT(176);    MKIVCT(177);    MKIVCT(178);    MKIVCT(179);
 692         MKIVCT(180);    MKIVCT(181);    MKIVCT(182);    MKIVCT(183);
 693         MKIVCT(184);    MKIVCT(185);    MKIVCT(186);    MKIVCT(187);
 694         MKIVCT(188);    MKIVCT(189);    MKIVCT(190);    MKIVCT(191);
 695         MKIVCT(192);    MKIVCT(193);    MKIVCT(194);    MKIVCT(195);
 696         MKIVCT(196);    MKIVCT(197);    MKIVCT(198);    MKIVCT(199);
 697         MKIVCT(200);    MKIVCT(201);    MKIVCT(202);    MKIVCT(203);
 698         MKIVCT(204);    MKIVCT(205);    MKIVCT(206);    MKIVCT(207);
 699         MKIVCT(208);    MKIVCT(209);    MKIVCT(210);    MKIVCT(211);
 700         MKIVCT(212);    MKIVCT(213);    MKIVCT(214);    MKIVCT(215);
 701         MKIVCT(216);    MKIVCT(217);    MKIVCT(218);    MKIVCT(219);
 702         MKIVCT(220);    MKIVCT(221);    MKIVCT(222);    MKIVCT(223);
 703         MKIVCT(224);    MKIVCT(225);    MKIVCT(226);    MKIVCT(227);
 704         MKIVCT(228);    MKIVCT(229);    MKIVCT(230);    MKIVCT(231);
 705         MKIVCT(232);    MKIVCT(233);    MKIVCT(234);    MKIVCT(235);
 706         MKIVCT(236);    MKIVCT(237);    MKIVCT(238);    MKIVCT(239);
 707         MKIVCT(240);    MKIVCT(241);    MKIVCT(242);    MKIVCT(243);
 708         MKIVCT(244);    MKIVCT(245);    MKIVCT(246);    MKIVCT(247);
 709         MKIVCT(248);    MKIVCT(249);    MKIVCT(250);    MKIVCT(251);
 710         MKIVCT(252);    MKIVCT(253);    MKIVCT(254);    MKIVCT(255);
 711 
 712         /*
 713          * We're PCIDE, but we don't have INVPCID.  The only way to invalidate a
 714          * PCID other than the current one, then, is to load its cr3 then
 715          * invlpg.  But loading kf_user_cr3 means we can longer access our
 716          * caller's text mapping (or indeed, its stack).  So this little helper
 717          * has to live within our trampoline text region.
 718          *
 719          * Called as tr_mmu_flush_user_range(addr, len, pgsz, cr3)
 720          */
 721         ENTRY_NP(tr_mmu_flush_user_range)
 722         push    %rbx
 723         pushf
 724         cli
 725         /* When we read cr3, it never has the NOINVL bit set. */
 726         mov     %cr3, %rax
 727         movq    $CR3_NOINVL_BIT, %rbx
 728         orq     %rbx, %rax
 729 
 730         mov     %rcx, %cr3
 731         add     %rdi, %rsi
 732 .align  ASM_ENTRY_ALIGN
 733 1:
 734         invlpg  (%rdi)
 735         add     %rdx, %rdi
 736         cmp     %rsi, %rdi
 737         jb      1b
 738         mov     %rax, %cr3
 739         popf
 740         pop     %rbx
 741         retq
 742         SET_SIZE(tr_mmu_flush_user_range)
 743 
 744 .align MMU_PAGESIZE
 745 .global kpti_tramp_end
 746 kpti_tramp_end:
 747         nop
 748 
 749 #endif  /* __lint */