0183-x86-cpu_entry_area-Move-it-to-a-separate-unit.patch 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400
  1. From c03a5cb44d69723a8a2aa0b3b4808d28ea749431 Mon Sep 17 00:00:00 2001
  2. From: Thomas Gleixner <[email protected]>
  3. Date: Wed, 20 Dec 2017 18:28:54 +0100
  4. Subject: [PATCH 183/232] x86/cpu_entry_area: Move it to a separate unit
  5. MIME-Version: 1.0
  6. Content-Type: text/plain; charset=UTF-8
  7. Content-Transfer-Encoding: 8bit
  8. CVE-2017-5754
  9. Separate the cpu_entry_area code out of cpu/common.c and the fixmap.
  10. Signed-off-by: Thomas Gleixner <[email protected]>
  11. Cc: Andy Lutomirski <[email protected]>
  12. Cc: Borislav Petkov <[email protected]>
  13. Cc: Dave Hansen <[email protected]>
  14. Cc: H. Peter Anvin <[email protected]>
  15. Cc: Josh Poimboeuf <[email protected]>
  16. Cc: Juergen Gross <[email protected]>
  17. Cc: Linus Torvalds <[email protected]>
  18. Cc: Peter Zijlstra <[email protected]>
  19. Signed-off-by: Ingo Molnar <[email protected]>
  20. (cherry picked from commit ed1bbc40a0d10e0c5c74fe7bdc6298295cf40255)
  21. Signed-off-by: Andy Whitcroft <[email protected]>
  22. Signed-off-by: Kleber Sacilotto de Souza <[email protected]>
  23. (cherry picked from commit 0fa11d2cd3d67af676aa2762ade282ba6d09cbe5)
  24. Signed-off-by: Fabian Grünbichler <[email protected]>
  25. ---
  26. arch/x86/mm/Makefile | 2 +-
  27. arch/x86/include/asm/cpu_entry_area.h | 52 +++++++++++++++++
  28. arch/x86/include/asm/fixmap.h | 41 +-------------
  29. arch/x86/kernel/cpu/common.c | 94 ------------------------------
  30. arch/x86/kernel/traps.c | 1 +
  31. arch/x86/mm/cpu_entry_area.c | 104 ++++++++++++++++++++++++++++++++++
  32. 6 files changed, 159 insertions(+), 135 deletions(-)
  33. create mode 100644 arch/x86/include/asm/cpu_entry_area.h
  34. create mode 100644 arch/x86/mm/cpu_entry_area.c
  35. diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
  36. index 0fbdcb64f9f8..76f5399a8356 100644
  37. --- a/arch/x86/mm/Makefile
  38. +++ b/arch/x86/mm/Makefile
  39. @@ -2,7 +2,7 @@
  40. KCOV_INSTRUMENT_tlb.o := n
  41. obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
  42. - pat.o pgtable.o physaddr.o setup_nx.o tlb.o
  43. + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o
  44. # Make sure __phys_addr has no stackprotector
  45. nostackp := $(call cc-option, -fno-stack-protector)
  46. diff --git a/arch/x86/include/asm/cpu_entry_area.h b/arch/x86/include/asm/cpu_entry_area.h
  47. new file mode 100644
  48. index 000000000000..5471826803af
  49. --- /dev/null
  50. +++ b/arch/x86/include/asm/cpu_entry_area.h
  51. @@ -0,0 +1,52 @@
  52. +// SPDX-License-Identifier: GPL-2.0
  53. +
  54. +#ifndef _ASM_X86_CPU_ENTRY_AREA_H
  55. +#define _ASM_X86_CPU_ENTRY_AREA_H
  56. +
  57. +#include <linux/percpu-defs.h>
  58. +#include <asm/processor.h>
  59. +
  60. +/*
  61. + * cpu_entry_area is a percpu region that contains things needed by the CPU
  62. + * and early entry/exit code. Real types aren't used for all fields here
  63. + * to avoid circular header dependencies.
  64. + *
  65. + * Every field is a virtual alias of some other allocated backing store.
  66. + * There is no direct allocation of a struct cpu_entry_area.
  67. + */
  68. +struct cpu_entry_area {
  69. + char gdt[PAGE_SIZE];
  70. +
  71. + /*
  72. + * The GDT is just below entry_stack and thus serves (on x86_64) as
  73. + * a a read-only guard page.
  74. + */
  75. + struct entry_stack_page entry_stack_page;
  76. +
  77. + /*
  78. + * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
  79. + * we need task switches to work, and task switches write to the TSS.
  80. + */
  81. + struct tss_struct tss;
  82. +
  83. + char entry_trampoline[PAGE_SIZE];
  84. +
  85. +#ifdef CONFIG_X86_64
  86. + /*
  87. + * Exception stacks used for IST entries.
  88. + *
  89. + * In the future, this should have a separate slot for each stack
  90. + * with guard pages between them.
  91. + */
  92. + char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
  93. +#endif
  94. +};
  95. +
  96. +#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
  97. +#define CPU_ENTRY_AREA_PAGES (CPU_ENTRY_AREA_SIZE / PAGE_SIZE)
  98. +
  99. +DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
  100. +
  101. +extern void setup_cpu_entry_areas(void);
  102. +
  103. +#endif
  104. diff --git a/arch/x86/include/asm/fixmap.h b/arch/x86/include/asm/fixmap.h
  105. index a7fb137ad964..1b2521473480 100644
  106. --- a/arch/x86/include/asm/fixmap.h
  107. +++ b/arch/x86/include/asm/fixmap.h
  108. @@ -25,6 +25,7 @@
  109. #else
  110. #include <uapi/asm/vsyscall.h>
  111. #endif
  112. +#include <asm/cpu_entry_area.h>
  113. /*
  114. * We can't declare FIXADDR_TOP as variable for x86_64 because vsyscall
  115. @@ -44,46 +45,6 @@ extern unsigned long __FIXADDR_TOP;
  116. PAGE_SIZE)
  117. #endif
  118. -/*
  119. - * cpu_entry_area is a percpu region in the fixmap that contains things
  120. - * needed by the CPU and early entry/exit code. Real types aren't used
  121. - * for all fields here to avoid circular header dependencies.
  122. - *
  123. - * Every field is a virtual alias of some other allocated backing store.
  124. - * There is no direct allocation of a struct cpu_entry_area.
  125. - */
  126. -struct cpu_entry_area {
  127. - char gdt[PAGE_SIZE];
  128. -
  129. - /*
  130. - * The GDT is just below entry_stack and thus serves (on x86_64) as
  131. - * a a read-only guard page.
  132. - */
  133. - struct entry_stack_page entry_stack_page;
  134. -
  135. - /*
  136. - * On x86_64, the TSS is mapped RO. On x86_32, it's mapped RW because
  137. - * we need task switches to work, and task switches write to the TSS.
  138. - */
  139. - struct tss_struct tss;
  140. -
  141. - char entry_trampoline[PAGE_SIZE];
  142. -
  143. -#ifdef CONFIG_X86_64
  144. - /*
  145. - * Exception stacks used for IST entries.
  146. - *
  147. - * In the future, this should have a separate slot for each stack
  148. - * with guard pages between them.
  149. - */
  150. - char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
  151. -#endif
  152. -};
  153. -
  154. -#define CPU_ENTRY_AREA_PAGES (sizeof(struct cpu_entry_area) / PAGE_SIZE)
  155. -
  156. -extern void setup_cpu_entry_areas(void);
  157. -
  158. /*
  159. * Here we define all the compile-time 'special' virtual
  160. * addresses. The point is to have a constant address at
  161. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
  162. index 7a8a5d436566..96171ce46d61 100644
  163. --- a/arch/x86/kernel/cpu/common.c
  164. +++ b/arch/x86/kernel/cpu/common.c
  165. @@ -482,102 +482,8 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
  166. [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
  167. [DEBUG_STACK - 1] = DEBUG_STKSZ
  168. };
  169. -
  170. -static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
  171. - [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
  172. -#endif
  173. -
  174. -static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page,
  175. - entry_stack_storage);
  176. -
  177. -static void __init
  178. -set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
  179. -{
  180. - for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
  181. - __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
  182. -}
  183. -
  184. -/* Setup the fixmap mappings only once per-processor */
  185. -static void __init setup_cpu_entry_area(int cpu)
  186. -{
  187. -#ifdef CONFIG_X86_64
  188. - extern char _entry_trampoline[];
  189. -
  190. - /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
  191. - pgprot_t gdt_prot = PAGE_KERNEL_RO;
  192. - pgprot_t tss_prot = PAGE_KERNEL_RO;
  193. -#else
  194. - /*
  195. - * On native 32-bit systems, the GDT cannot be read-only because
  196. - * our double fault handler uses a task gate, and entering through
  197. - * a task gate needs to change an available TSS to busy. If the
  198. - * GDT is read-only, that will triple fault. The TSS cannot be
  199. - * read-only because the CPU writes to it on task switches.
  200. - *
  201. - * On Xen PV, the GDT must be read-only because the hypervisor
  202. - * requires it.
  203. - */
  204. - pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
  205. - PAGE_KERNEL_RO : PAGE_KERNEL;
  206. - pgprot_t tss_prot = PAGE_KERNEL;
  207. -#endif
  208. -
  209. - __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
  210. - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
  211. - per_cpu_ptr(&entry_stack_storage, cpu), 1,
  212. - PAGE_KERNEL);
  213. -
  214. - /*
  215. - * The Intel SDM says (Volume 3, 7.2.1):
  216. - *
  217. - * Avoid placing a page boundary in the part of the TSS that the
  218. - * processor reads during a task switch (the first 104 bytes). The
  219. - * processor may not correctly perform address translations if a
  220. - * boundary occurs in this area. During a task switch, the processor
  221. - * reads and writes into the first 104 bytes of each TSS (using
  222. - * contiguous physical addresses beginning with the physical address
  223. - * of the first byte of the TSS). So, after TSS access begins, if
  224. - * part of the 104 bytes is not physically contiguous, the processor
  225. - * will access incorrect information without generating a page-fault
  226. - * exception.
  227. - *
  228. - * There are also a lot of errata involving the TSS spanning a page
  229. - * boundary. Assert that we're not doing that.
  230. - */
  231. - BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
  232. - offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
  233. - BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
  234. - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
  235. - &per_cpu(cpu_tss_rw, cpu),
  236. - sizeof(struct tss_struct) / PAGE_SIZE,
  237. - tss_prot);
  238. -
  239. -#ifdef CONFIG_X86_32
  240. - per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
  241. #endif
  242. -#ifdef CONFIG_X86_64
  243. - BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
  244. - BUILD_BUG_ON(sizeof(exception_stacks) !=
  245. - sizeof(((struct cpu_entry_area *)0)->exception_stacks));
  246. - set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
  247. - &per_cpu(exception_stacks, cpu),
  248. - sizeof(exception_stacks) / PAGE_SIZE,
  249. - PAGE_KERNEL);
  250. -
  251. - __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
  252. - __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
  253. -#endif
  254. -}
  255. -
  256. -void __init setup_cpu_entry_areas(void)
  257. -{
  258. - unsigned int cpu;
  259. -
  260. - for_each_possible_cpu(cpu)
  261. - setup_cpu_entry_area(cpu);
  262. -}
  263. -
  264. /* Load the original GDT from the per-cpu structure */
  265. void load_direct_gdt(int cpu)
  266. {
  267. diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
  268. index 14b462eefa17..ef2d1b8a0516 100644
  269. --- a/arch/x86/kernel/traps.c
  270. +++ b/arch/x86/kernel/traps.c
  271. @@ -57,6 +57,7 @@
  272. #include <asm/traps.h>
  273. #include <asm/desc.h>
  274. #include <asm/fpu/internal.h>
  275. +#include <asm/cpu_entry_area.h>
  276. #include <asm/mce.h>
  277. #include <asm/fixmap.h>
  278. #include <asm/mach_traps.h>
  279. diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c
  280. new file mode 100644
  281. index 000000000000..235ff9cfaaf4
  282. --- /dev/null
  283. +++ b/arch/x86/mm/cpu_entry_area.c
  284. @@ -0,0 +1,104 @@
  285. +// SPDX-License-Identifier: GPL-2.0
  286. +
  287. +#include <linux/spinlock.h>
  288. +#include <linux/percpu.h>
  289. +
  290. +#include <asm/cpu_entry_area.h>
  291. +#include <asm/pgtable.h>
  292. +#include <asm/fixmap.h>
  293. +#include <asm/desc.h>
  294. +
  295. +static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
  296. +
  297. +#ifdef CONFIG_X86_64
  298. +static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
  299. + [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
  300. +#endif
  301. +
  302. +static void __init
  303. +set_percpu_fixmap_pages(int idx, void *ptr, int pages, pgprot_t prot)
  304. +{
  305. + for ( ; pages; pages--, idx--, ptr += PAGE_SIZE)
  306. + __set_fixmap(idx, per_cpu_ptr_to_phys(ptr), prot);
  307. +}
  308. +
  309. +/* Setup the fixmap mappings only once per-processor */
  310. +static void __init setup_cpu_entry_area(int cpu)
  311. +{
  312. +#ifdef CONFIG_X86_64
  313. + extern char _entry_trampoline[];
  314. +
  315. + /* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
  316. + pgprot_t gdt_prot = PAGE_KERNEL_RO;
  317. + pgprot_t tss_prot = PAGE_KERNEL_RO;
  318. +#else
  319. + /*
  320. + * On native 32-bit systems, the GDT cannot be read-only because
  321. + * our double fault handler uses a task gate, and entering through
  322. + * a task gate needs to change an available TSS to busy. If the
  323. + * GDT is read-only, that will triple fault. The TSS cannot be
  324. + * read-only because the CPU writes to it on task switches.
  325. + *
  326. + * On Xen PV, the GDT must be read-only because the hypervisor
  327. + * requires it.
  328. + */
  329. + pgprot_t gdt_prot = boot_cpu_has(X86_FEATURE_XENPV) ?
  330. + PAGE_KERNEL_RO : PAGE_KERNEL;
  331. + pgprot_t tss_prot = PAGE_KERNEL;
  332. +#endif
  333. +
  334. + __set_fixmap(get_cpu_entry_area_index(cpu, gdt), get_cpu_gdt_paddr(cpu), gdt_prot);
  335. + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, entry_stack_page),
  336. + per_cpu_ptr(&entry_stack_storage, cpu), 1,
  337. + PAGE_KERNEL);
  338. +
  339. + /*
  340. + * The Intel SDM says (Volume 3, 7.2.1):
  341. + *
  342. + * Avoid placing a page boundary in the part of the TSS that the
  343. + * processor reads during a task switch (the first 104 bytes). The
  344. + * processor may not correctly perform address translations if a
  345. + * boundary occurs in this area. During a task switch, the processor
  346. + * reads and writes into the first 104 bytes of each TSS (using
  347. + * contiguous physical addresses beginning with the physical address
  348. + * of the first byte of the TSS). So, after TSS access begins, if
  349. + * part of the 104 bytes is not physically contiguous, the processor
  350. + * will access incorrect information without generating a page-fault
  351. + * exception.
  352. + *
  353. + * There are also a lot of errata involving the TSS spanning a page
  354. + * boundary. Assert that we're not doing that.
  355. + */
  356. + BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
  357. + offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
  358. + BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
  359. + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, tss),
  360. + &per_cpu(cpu_tss_rw, cpu),
  361. + sizeof(struct tss_struct) / PAGE_SIZE,
  362. + tss_prot);
  363. +
  364. +#ifdef CONFIG_X86_32
  365. + per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
  366. +#endif
  367. +
  368. +#ifdef CONFIG_X86_64
  369. + BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
  370. + BUILD_BUG_ON(sizeof(exception_stacks) !=
  371. + sizeof(((struct cpu_entry_area *)0)->exception_stacks));
  372. + set_percpu_fixmap_pages(get_cpu_entry_area_index(cpu, exception_stacks),
  373. + &per_cpu(exception_stacks, cpu),
  374. + sizeof(exception_stacks) / PAGE_SIZE,
  375. + PAGE_KERNEL);
  376. +
  377. + __set_fixmap(get_cpu_entry_area_index(cpu, entry_trampoline),
  378. + __pa_symbol(_entry_trampoline), PAGE_KERNEL_RX);
  379. +#endif
  380. +}
  381. +
  382. +void __init setup_cpu_entry_areas(void)
  383. +{
  384. + unsigned int cpu;
  385. +
  386. + for_each_possible_cpu(cpu)
  387. + setup_cpu_entry_area(cpu);
  388. +}
  389. --
  390. 2.14.2