From 08672050a0304effa4299b6a472cb23a634ee88e Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Wed, 1 Dec 2021 14:58:33 +0800 Subject: [PATCH 1/5] arm64: kernel: Handle disabled[(+)present] cpus in MADT/GICC during init With ACPI enabled, cpus get identified by the presence of the GICC entry in the MADT Table. Each GICC entry part of MADT presents cpu as enabled or disabled. As of now, the disabled cpus are skipped as physical cpu hotplug is not supported. These remain disabled even after the kernel has booted. To support virtual cpu hotplug(in which case disabled vcpus could be hotplugged even after kernel has booted), QEMU will populate MADT Table with appropriate details of GICC entry for each possible(present+disabled) vcpu. Now, during the init time vcpus will be identified as present or disabled. To achieve this, below changes have been made with respect to the present/possible vcpu handling along with the mentioned reasoning: 1. Identify all possible(present+disabled) vcpus at boot/init time and set their present mask and possible mask. In the existing code, cpus are being marked present quite late within smp_prepare_cpus() function, which gets called in context to the kernel thread. Since the cpu hotplug is not supported, present cpus are always equal to the possible cpus. But with cpu hotplug enabled, this assumption is not true. Hence, present cpus should be marked while MADT GICC entries are bring parsed for each vcpu. 2. Set possible cpus to include disabled. This needs to be done now while parsing MADT GICC entries corresponding to each vcpu as the disabled vcpu info is available only at this point as for hotplug case possible vcpus is not equal to present vcpus. 3. We will store the parsed madt/gicc entry even for the disabled vcpus during init time. This is needed as some modules like PMU registers IRQs for each possible vcpus during init time. Therefore, a valid entry of the MADT GICC should be present for all possible vcpus. 4. Refactoring related to DT/OF is also done to align it with the init changes to support vcpu hotplug. Signed-off-by: Salil Mehta Signed-off-by: Xiongfeng Wang --- arch/arm64/kernel/smp.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6f6ff072acbde7..4b317e71b1c40b 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -524,13 +524,12 @@ static int __init smp_cpu_setup(int cpu) if (ops->cpu_init(cpu)) return -ENODEV; - set_cpu_possible(cpu, true); - return 0; } static bool bootcpu_valid __initdata; static unsigned int cpu_count = 1; +static unsigned int disabled_cpu_count; #ifdef CONFIG_ACPI static struct acpi_madt_generic_interrupt cpu_madt_gicc[NR_CPUS]; @@ -549,10 +548,17 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) static void __init acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) { + unsigned int total_cpu_count = disabled_cpu_count + cpu_count; u64 hwid = processor->arm_mpidr; if (!(processor->flags & ACPI_MADT_ENABLED)) { +#ifndef CONFIG_ACPI_HOTPLUG_CPU pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); +#else + cpu_madt_gicc[total_cpu_count] = *processor; + set_cpu_possible(total_cpu_count, true); + disabled_cpu_count++; +#endif return; } @@ -561,7 +567,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; } - if (is_mpidr_duplicate(cpu_count, hwid)) { + if (is_mpidr_duplicate(total_cpu_count, hwid)) { pr_err("duplicate CPU MPIDR 0x%llx in MADT\n", hwid); return; } @@ -582,9 +588,9 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; /* map the logical cpu id to cpu MPIDR */ - set_cpu_logical_map(cpu_count, hwid); + set_cpu_logical_map(total_cpu_count, hwid); - cpu_madt_gicc[cpu_count] = *processor; + cpu_madt_gicc[total_cpu_count] = *processor; /* * Set-up the ACPI parking protocol cpu entries @@ -595,7 +601,10 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) * initialize the cpu if the parking protocol is * the only available enable method). */ - acpi_set_mailbox_entry(cpu_count, processor); + acpi_set_mailbox_entry(total_cpu_count, processor); + + set_cpu_possible(total_cpu_count, true); + set_cpu_present(total_cpu_count, true); cpu_count++; } @@ -629,6 +638,9 @@ static void __init acpi_parse_and_init_cpus(void) acpi_table_parse_madt(ACPI_MADT_TYPE_GENERIC_INTERRUPT, acpi_parse_gic_cpu_interface, 0); + pr_debug("possible cpus(%u) present cpus(%u) disabled cpus(%u)\n", + cpu_count+disabled_cpu_count, cpu_count, disabled_cpu_count); + /* * In ACPI, SMP and CPU NUMA information is provided in separate * static tables, namely the MADT and the SRAT. @@ -699,6 +711,9 @@ static void __init of_parse_and_init_cpus(void) set_cpu_logical_map(cpu_count, hwid); early_map_cpu_to_node(cpu_count, of_node_to_nid(dn)); + + set_cpu_possible(cpu_count, true); + set_cpu_present(cpu_count, true); next: cpu_count++; } @@ -783,7 +798,6 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (err) continue; - set_cpu_present(cpu, true); numa_store_cpu_info(cpu); } } From 6f70bd50f1505c50685f778de04e03d4effe5ab5 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Wed, 1 Dec 2021 16:01:17 +0800 Subject: [PATCH 2/5] arm64: kernel: Bound the total(present+disabled) cpus with nr_cpu_ids Bound the total number of identified cpus(including disabled cpus) by maximum allowed limit by the kernel. Max value is either specified as part of the kernel parameters 'nr_cpus' or specified during compile time using CONFIG_NR_CPUS. Signed-off-by: Salil Mehta Signed-off-by: Xiongfeng Wang --- arch/arm64/kernel/smp.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 4b317e71b1c40b..a264f7f01fb11a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -528,6 +528,7 @@ static int __init smp_cpu_setup(int cpu) } static bool bootcpu_valid __initdata; +static bool cpus_clipped __initdata; static unsigned int cpu_count = 1; static unsigned int disabled_cpu_count; @@ -551,6 +552,11 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) unsigned int total_cpu_count = disabled_cpu_count + cpu_count; u64 hwid = processor->arm_mpidr; + if (total_cpu_count > nr_cpu_ids) { + cpus_clipped = true; + return; + } + if (!(processor->flags & ACPI_MADT_ENABLED)) { #ifndef CONFIG_ACPI_HOTPLUG_CPU pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); @@ -584,9 +590,6 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; } - if (cpu_count >= NR_CPUS) - return; - /* map the logical cpu id to cpu MPIDR */ set_cpu_logical_map(total_cpu_count, hwid); @@ -704,8 +707,10 @@ static void __init of_parse_and_init_cpus(void) continue; } - if (cpu_count >= NR_CPUS) + if (cpu_count >= cpu_possible()) { + cpus_clipped = true; goto next; + } pr_debug("cpu logical map 0x%llx\n", hwid); set_cpu_logical_map(cpu_count, hwid); @@ -726,6 +731,7 @@ static void __init of_parse_and_init_cpus(void) */ void __init smp_init_cpus(void) { + unsigned int total_cpu_count = disabled_cpu_count + cpu_count; int i; if (acpi_disabled) @@ -733,9 +739,9 @@ void __init smp_init_cpus(void) else acpi_parse_and_init_cpus(); - if (cpu_count > nr_cpu_ids) + if (cpus_clipped) pr_warn("Number of cores (%d) exceeds configured maximum of %u - clipping\n", - cpu_count, nr_cpu_ids); + total_cpu_count, nr_cpu_ids); if (!bootcpu_valid) { pr_err("missing boot CPU MPIDR, not enabling secondaries\n"); From 8d06116f82c5c1a19f36449d80c5850f0d24c2d2 Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Thu, 2 Dec 2021 13:57:51 +0800 Subject: [PATCH 3/5] arm64: kernel: Init cpu operations for all possible vcpus Currently, cpu-operations are only initialized for the cpus which already have logical cpuid to hwid assoication established. And this only happens for the cpus which are present during boot time. To support virtual cpu hotplug, we shall initialize the cpu-operations for all possible(present+disabled) vcpus. This means logical cpuid to hwid/mpidr association might not exists(i.e. might be INVALID_HWID) during init. Later, when the vcpu is actually hotplugged logical cpuid is allocated and associated with the hwid/mpidr. This patch does some refactoring to support above change. Signed-off-by: Salil Mehta Signed-off-by: Xiongfeng Wang --- arch/arm64/kernel/smp.c | 39 +++++++++++++++------------------------ 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a264f7f01fb11a..6ea79b537cafe5 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -518,13 +518,16 @@ static int __init smp_cpu_setup(int cpu) const struct cpu_operations *ops; if (init_cpu_ops(cpu)) - return -ENODEV; + goto out; ops = get_cpu_ops(cpu); if (ops->cpu_init(cpu)) - return -ENODEV; + goto out; return 0; +out: + __cpu_logical_map[cpu] = INVALID_HWID; + return -ENODEV; } static bool bootcpu_valid __initdata; @@ -562,7 +565,8 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) pr_debug("skipping disabled CPU entry with 0x%llx MPIDR\n", hwid); #else cpu_madt_gicc[total_cpu_count] = *processor; - set_cpu_possible(total_cpu_count, true); + if (!smp_cpu_setup(total_cpu_count)) + set_cpu_possible(total_cpu_count, true); disabled_cpu_count++; #endif return; @@ -606,9 +610,10 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) */ acpi_set_mailbox_entry(total_cpu_count, processor); - set_cpu_possible(total_cpu_count, true); - set_cpu_present(total_cpu_count, true); - + if (!smp_cpu_setup(total_cpu_count)) { + set_cpu_possible(total_cpu_count, true); + set_cpu_present(total_cpu_count, true); + } cpu_count++; } @@ -716,9 +721,10 @@ static void __init of_parse_and_init_cpus(void) set_cpu_logical_map(cpu_count, hwid); early_map_cpu_to_node(cpu_count, of_node_to_nid(dn)); - - set_cpu_possible(cpu_count, true); - set_cpu_present(cpu_count, true); + if (!smp_cpu_setup(cpu_count)) { + set_cpu_possible(cpu_count, true); + set_cpu_present(cpu_count, true); + } next: cpu_count++; } @@ -732,7 +738,6 @@ static void __init of_parse_and_init_cpus(void) void __init smp_init_cpus(void) { unsigned int total_cpu_count = disabled_cpu_count + cpu_count; - int i; if (acpi_disabled) of_parse_and_init_cpus(); @@ -747,20 +752,6 @@ void __init smp_init_cpus(void) pr_err("missing boot CPU MPIDR, not enabling secondaries\n"); return; } - - /* - * We need to set the cpu_logical_map entries before enabling - * the cpus so that cpu processor description entries (DT cpu nodes - * and ACPI MADT entries) can be retrieved by matching the cpu hwid - * with entries in cpu_logical_map while initializing the cpus. - * If the cpu set-up fails, invalidate the cpu_logical_map entry. - */ - for (i = 1; i < nr_cpu_ids; i++) { - if (cpu_logical_map(i) != INVALID_HWID) { - if (smp_cpu_setup(i)) - set_cpu_logical_map(i, INVALID_HWID); - } - } } void __init smp_prepare_cpus(unsigned int max_cpus) From 744b6fb8fa4b305702ceeae4d398246d8c7e244b Mon Sep 17 00:00:00 2001 From: Salil Mehta Date: Wed, 1 Dec 2021 16:21:50 +0800 Subject: [PATCH 4/5] arm64: kernel: Arch specific ACPI hooks(like logical cpuid<->hwid etc.) To support virtual cpu hotplug, some arch specific hooks must be facilitated. These hooks are called by the generic ACPI cpu hotplug framework during a vcpu hot-(un)plug event handling. The changes required involve: 1. Allocation of the logical cpuid corresponding to the hwid/mpidr 2. Mapping of logical cpuid to hwid/mpidr and marking present 3. Removing vcpu from present mask during hot-unplug 4. For arm64, all possible cpus are registered within topology_init() Hence, we need to override the weak ACPI call of arch_register_cpu() (which returns -ENODEV) and return success. 5. NUMA node mapping set for this vcpu using SRAT Table info during init time will be discarded as the logical cpu-ids used at that time might not be correct. This mapping will be set again using the proximity/node info obtained by evaluating _PXM ACPI method. Note, during hot unplug of vcpu, we do not unmap the association between the logical cpuid and hwid/mpidr. This remains persistent. Signed-off-by: Salil Mehta Signed-off-by: Xiongfeng Wang --- arch/arm64/kernel/smp.c | 80 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 6ea79b537cafe5..c7d8bea1d1d61f 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -543,6 +543,86 @@ struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu) return &cpu_madt_gicc[cpu]; } +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int arch_register_cpu(int num) +{ + return 0; +} + +static int set_numa_node_for_cpu(acpi_handle handle, int cpu) +{ +#ifdef CONFIG_ACPI_NUMA + int node_id; + + /* will evaluate _PXM */ + node_id = acpi_get_node(handle); + if (node_id != NUMA_NO_NODE) + set_cpu_numa_node(cpu, node_id); +#endif + return 0; +} + +static void unset_numa_node_for_cpu(int cpu) +{ +#ifdef CONFIG_ACPI_NUMA + set_cpu_numa_node(cpu, NUMA_NO_NODE); +#endif +} + +static int allocate_logical_cpuid(u64 physid) +{ + int first_invalid_idx = -1; + bool first = true; + int i; + + for_each_possible_cpu(i) { + /* + * logical cpuid<->hwid association remains persistent once + * established + */ + if (cpu_logical_map(i) == physid) + return i; + + if ((cpu_logical_map(i) == INVALID_HWID) && first) { + first_invalid_idx = i; + first = false; + } + } + + return first_invalid_idx; +} + +int acpi_unmap_cpu(int cpu) +{ + set_cpu_present(cpu, false); + unset_numa_node_for_cpu(cpu); + + return 0; +} + +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, + int *cpuid) +{ + int cpu; + + cpu = allocate_logical_cpuid(physid); + if (cpu < 0) { + pr_warn("Unable to map logical cpuid to physid 0x%llx\n", + physid); + return -ENOSPC; + } + + /* map the logical cpu id to cpu MPIDR */ + __cpu_logical_map[cpu] = physid; + set_numa_node_for_cpu(handle, cpu); + + set_cpu_present(cpu, true); + *cpuid = cpu; + + return 0; +} +#endif + /* * acpi_map_gic_cpu_interface - parse processor MADT entry * From 13c50674d17c7212fcc3a1591dfd9f1904c6001f Mon Sep 17 00:00:00 2001 From: Jianyong Wu Date: Fri, 3 Dec 2021 17:11:39 +0800 Subject: [PATCH 5/5] cpu/numa: fix failure when hot-remove cpu When hot-remove cpu, the map from cpu to numa will set to NUMA_NO_NODE which will lead to failure as the map is used by others. Thus we need a specific map to descrip the unpluged cpu. Here we introduce a new map to descrip the unpluged cpu map. Signed-off-by: Jianyong Wu --- arch/arm64/include/asm/smp.h | 2 ++ arch/arm64/kernel/setup.c | 14 ++++++++++++++ arch/arm64/kernel/smp.c | 6 ++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index fc55f5a57a06ef..7949f6090eed61 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -47,6 +47,8 @@ DECLARE_PER_CPU_READ_MOSTLY(int, cpu_number); */ extern u64 __cpu_logical_map[NR_CPUS]; extern u64 cpu_logical_map(unsigned int cpu); +extern u64 get_acpicpu_numa_node(unsigned int cpu); +extern int set_acpicpu_numa_node(unsigned int cpu, unsigned int node); static inline void set_cpu_logical_map(unsigned int cpu, u64 hwid) { diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index be5f85b0a24de6..68d7a7894e107d 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -284,6 +284,20 @@ static int __init reserve_memblock_reserved_regions(void) } arch_initcall(reserve_memblock_reserved_regions); +u64 __acpicpu_node_map[NR_CPUS] = { [0 ... NR_CPUS-1] = NUMA_NO_NODE }; + +u64 get_acpicpu_numa_node(unsigned int cpu) +{ + return __acpicpu_node_map[cpu]; +} + +int set_acpicpu_numa_node(unsigned int cpu, unsigned int node) +{ + __acpicpu_node_map[cpu] = node; + + return 0; +} + u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; u64 cpu_logical_map(unsigned int cpu) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index c7d8bea1d1d61f..6f8fc874eff0e7 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -556,8 +556,10 @@ static int set_numa_node_for_cpu(acpi_handle handle, int cpu) /* will evaluate _PXM */ node_id = acpi_get_node(handle); - if (node_id != NUMA_NO_NODE) + if (node_id != NUMA_NO_NODE) { + set_acpicpu_numa_node(cpu, node_id); set_cpu_numa_node(cpu, node_id); + } #endif return 0; } @@ -565,7 +567,7 @@ static int set_numa_node_for_cpu(acpi_handle handle, int cpu) static void unset_numa_node_for_cpu(int cpu) { #ifdef CONFIG_ACPI_NUMA - set_cpu_numa_node(cpu, NUMA_NO_NODE); + set_acpicpu_numa_node(cpu, NUMA_NO_NODE); #endif }