From 6dac1c0a9b94b62b6412b74a8997f728570f36be Mon Sep 17 00:00:00 2001 From: Hauke Mehrtens Date: Sun, 12 May 2019 18:49:24 +0200 Subject: [PATCH] kernel: Activate CONFIG_OPTIMIZE_INLINING This will reduce the size of the kernel if CONFIG_CC_OPTIMIZE_FOR_SIZE is set like for all targets with small_flash feature flag. I haven't seen any changes for an ARM64 target which optimizes the kernel for speed instead. On the ath79/tiny target the uncompressed kernel size was reduced by 3.2% and the compressed kernel size by 2.1% kernel size with CONFIG_OPTIMIZE_INLINING=n 4346412 build_dir/target-mips_24kc_musl/linux-ath79_tiny/vmlinux 1391169 build_dir/target-mips_24kc_musl/linux-ath79_tiny/tplink_tl-wr941-v4-kernel.bin Kernel size with CONFIG_OPTIMIZE_INLINING=y 4212396 build_dir/target-mips_24kc_musl/linux-ath79_tiny/vmlinux 1362051 build_dir/target-mips_24kc_musl/linux-ath79_tiny/tplink_tl-wr941-v4-kernel.bin This change is currently pending for kernel 5.2 and already in linux-next, this updates our patch to match the upstream version. Signed-off-by: Hauke Mehrtens --- target/linux/generic/config-4.14 | 2 +- target/linux/generic/config-4.19 | 2 +- .../pending-4.14/220-optimize_inlining.patch | 143 +++++++++++- .../pending-4.19/220-optimize_inlining.patch | 203 +++++++++++++++++- 4 files changed, 332 insertions(+), 18 deletions(-) diff --git a/target/linux/generic/config-4.14 b/target/linux/generic/config-4.14 index 1e8497b503d..8aaa54b8ff1 100644 --- a/target/linux/generic/config-4.14 +++ b/target/linux/generic/config-4.14 @@ -3382,7 +3382,7 @@ CONFIG_NMI_LOG_BUF_SHIFT=13 # CONFIG_OPROFILE is not set # CONFIG_OPROFILE_EVENT_MULTIPLEX is not set # CONFIG_OPT3001 is not set -# CONFIG_OPTIMIZE_INLINING is not set +CONFIG_OPTIMIZE_INLINING=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ORION_WATCHDOG is not set # CONFIG_OSF_PARTITION is not set diff --git a/target/linux/generic/config-4.19 b/target/linux/generic/config-4.19 index 72ce1bb2d70..e39837b995b 100644 --- a/target/linux/generic/config-4.19 +++ b/target/linux/generic/config-4.19 @@ -3557,7 +3557,7 @@ CONFIG_NMI_LOG_BUF_SHIFT=13 # CONFIG_OPROFILE is not set # CONFIG_OPROFILE_EVENT_MULTIPLEX is not set # CONFIG_OPT3001 is not set -# CONFIG_OPTIMIZE_INLINING is not set +CONFIG_OPTIMIZE_INLINING=y # CONFIG_ORANGEFS_FS is not set # CONFIG_ORION_WATCHDOG is not set # CONFIG_OSF_PARTITION is not set diff --git a/target/linux/generic/pending-4.14/220-optimize_inlining.patch b/target/linux/generic/pending-4.14/220-optimize_inlining.patch index dca1dcb2041..cdf8bac3232 100644 --- a/target/linux/generic/pending-4.14/220-optimize_inlining.patch +++ b/target/linux/generic/pending-4.14/220-optimize_inlining.patch @@ -1,3 +1,130 @@ +--- a/arch/arm/include/asm/hardirq.h ++++ b/arch/arm/include/asm/hardirq.h +@@ -6,6 +6,7 @@ + #include + #include + ++/* number of IPIS _not_ including IPI_CPU_BACKTRACE */ + #define NR_IPI 7 + + typedef struct { +--- a/arch/arm/kernel/atags.h ++++ b/arch/arm/kernel/atags.h +@@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tag + const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, + unsigned int machine_nr); + #else +-static inline const struct machine_desc * ++static inline const struct machine_desc * __init __noreturn + setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) + { + early_print("no ATAGS support: can't continue\n"); +--- a/arch/arm/kernel/smp.c ++++ b/arch/arm/kernel/smp.c +@@ -76,6 +76,10 @@ enum ipi_msg_type { + IPI_CPU_STOP, + IPI_IRQ_WORK, + IPI_COMPLETION, ++ /* ++ * CPU_BACKTRACE is special and not included in NR_IPI ++ * or tracable with trace_ipi_* ++ */ + IPI_CPU_BACKTRACE, + /* + * SGI8-15 can be reserved by secure firmware, and thus may +@@ -801,7 +805,7 @@ core_initcall(register_cpufreq_notifier) + + static void raise_nmi(cpumask_t *mask) + { +- smp_cross_call(mask, IPI_CPU_BACKTRACE); ++ __smp_cross_call(mask, IPI_CPU_BACKTRACE); + } + + void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -125,7 +125,7 @@ static inline bool cpu_have_feature(unsi + } + + /* System capability check for constant caps */ +-static inline bool __cpus_have_const_cap(int num) ++static __always_inline bool __cpus_have_const_cap(int num) + { + if (num >= ARM64_NCAPS) + return false; +@@ -139,7 +139,7 @@ static inline bool cpus_have_cap(unsigne + return test_bit(num, cpu_hwcaps); + } + +-static inline bool cpus_have_const_cap(int num) ++static __always_inline bool cpus_have_const_cap(int num) + { + if (static_branch_likely(&arm64_const_caps_ready)) + return __cpus_have_const_cap(num); +--- a/arch/mips/include/asm/bitops.h ++++ b/arch/mips/include/asm/bitops.h +@@ -462,7 +462,7 @@ static inline void __clear_bit_unlock(un + * Return the bit position (0..63) of the most significant 1 bit in a word + * Returns -1 if no 1 bit exists + */ +-static inline unsigned long __fls(unsigned long word) ++static __always_inline unsigned long __fls(unsigned long word) + { + int num; + +@@ -528,7 +528,7 @@ static inline unsigned long __fls(unsign + * Returns 0..SZLONG-1 + * Undefined if no bit exists, so code should check against 0 first. + */ +-static inline unsigned long __ffs(unsigned long word) ++static __always_inline unsigned long __ffs(unsigned long word) + { + return __fls(word & -word); + } +--- a/arch/mips/kernel/cpu-bugs64.c ++++ b/arch/mips/kernel/cpu-bugs64.c +@@ -42,8 +42,8 @@ static inline void align_mod(const int a + : GCC_IMM_ASM() (align), GCC_IMM_ASM() (mod)); + } + +-static inline void mult_sh_align_mod(long *v1, long *v2, long *w, +- const int align, const int mod) ++static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, ++ const int align, const int mod) + { + unsigned long flags; + int m1, m2; +--- a/arch/powerpc/kernel/prom_init.c ++++ b/arch/powerpc/kernel/prom_init.c +@@ -474,14 +474,14 @@ static int __init prom_next_node(phandle + } + } + +-static inline int prom_getprop(phandle node, const char *pname, +- void *value, size_t valuelen) ++static inline int __init prom_getprop(phandle node, const char *pname, ++ void *value, size_t valuelen) + { + return call_prom("getprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); + } + +-static inline int prom_getproplen(phandle node, const char *pname) ++static inline int __init prom_getproplen(phandle node, const char *pname) + { + return call_prom("getproplen", 2, 1, node, ADDR(pname)); + } +--- a/arch/s390/include/asm/cpacf.h ++++ b/arch/s390/include/asm/cpacf.h +@@ -184,7 +184,7 @@ static inline int __cpacf_check_opcode(u + } + } + +-static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) ++static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) + { + if (__cpacf_check_opcode(opcode)) { + __cpacf_query(opcode, mask); --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -284,20 +284,6 @@ config CPA_DEBUG @@ -23,13 +150,13 @@ depends on DEBUG_KERNEL --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -137,6 +137,20 @@ endmenu # "printk and dmesg options" - - menu "Compile-time checks and compiler options" +@@ -305,6 +305,20 @@ config HEADERS_CHECK + exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in + your build tree), to make sure they're suitable. +config OPTIMIZE_INLINING -+ bool "Allow gcc to uninline functions marked 'inline'" -+ ---help--- ++ bool "Allow compiler to uninline functions marked 'inline'" ++ help + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of @@ -41,9 +168,9 @@ + + If unsure, say N. + - config DEBUG_INFO - bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL && !COMPILE_TEST + config DEBUG_SECTION_MISMATCH + bool "Enable full Section mismatch analysis" + help --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -296,9 +296,6 @@ config ZONE_DMA32 diff --git a/target/linux/generic/pending-4.19/220-optimize_inlining.patch b/target/linux/generic/pending-4.19/220-optimize_inlining.patch index 22e24f3ecaa..daab7767692 100644 --- a/target/linux/generic/pending-4.19/220-optimize_inlining.patch +++ b/target/linux/generic/pending-4.19/220-optimize_inlining.patch @@ -1,3 +1,179 @@ +--- a/arch/arm/include/asm/hardirq.h ++++ b/arch/arm/include/asm/hardirq.h +@@ -6,6 +6,7 @@ + #include + #include + ++/* number of IPIS _not_ including IPI_CPU_BACKTRACE */ + #define NR_IPI 7 + + typedef struct { +--- a/arch/arm/kernel/atags.h ++++ b/arch/arm/kernel/atags.h +@@ -5,7 +5,7 @@ void convert_to_tag_list(struct tag *tag + const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, + unsigned int machine_nr); + #else +-static inline const struct machine_desc * ++static inline const struct machine_desc * __init __noreturn + setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) + { + early_print("no ATAGS support: can't continue\n"); +--- a/arch/arm/kernel/smp.c ++++ b/arch/arm/kernel/smp.c +@@ -76,6 +76,10 @@ enum ipi_msg_type { + IPI_CPU_STOP, + IPI_IRQ_WORK, + IPI_COMPLETION, ++ /* ++ * CPU_BACKTRACE is special and not included in NR_IPI ++ * or tracable with trace_ipi_* ++ */ + IPI_CPU_BACKTRACE, + /* + * SGI8-15 can be reserved by secure firmware, and thus may +@@ -803,7 +807,7 @@ core_initcall(register_cpufreq_notifier) + + static void raise_nmi(cpumask_t *mask) + { +- smp_cross_call(mask, IPI_CPU_BACKTRACE); ++ __smp_cross_call(mask, IPI_CPU_BACKTRACE); + } + + void arch_trigger_cpumask_backtrace(const cpumask_t *mask, bool exclude_self) +--- a/arch/arm64/include/asm/cpufeature.h ++++ b/arch/arm64/include/asm/cpufeature.h +@@ -365,7 +365,7 @@ static inline bool cpu_have_feature(unsi + } + + /* System capability check for constant caps */ +-static inline bool __cpus_have_const_cap(int num) ++static __always_inline bool __cpus_have_const_cap(int num) + { + if (num >= ARM64_NCAPS) + return false; +@@ -379,7 +379,7 @@ static inline bool cpus_have_cap(unsigne + return test_bit(num, cpu_hwcaps); + } + +-static inline bool cpus_have_const_cap(int num) ++static __always_inline bool cpus_have_const_cap(int num) + { + if (static_branch_likely(&arm64_const_caps_ready)) + return __cpus_have_const_cap(num); +--- a/arch/mips/include/asm/bitops.h ++++ b/arch/mips/include/asm/bitops.h +@@ -463,7 +463,7 @@ static inline void __clear_bit_unlock(un + * Return the bit position (0..63) of the most significant 1 bit in a word + * Returns -1 if no 1 bit exists + */ +-static inline unsigned long __fls(unsigned long word) ++static __always_inline unsigned long __fls(unsigned long word) + { + int num; + +@@ -529,7 +529,7 @@ static inline unsigned long __fls(unsign + * Returns 0..SZLONG-1 + * Undefined if no bit exists, so code should check against 0 first. + */ +-static inline unsigned long __ffs(unsigned long word) ++static __always_inline unsigned long __ffs(unsigned long word) + { + return __fls(word & -word); + } +--- a/arch/mips/kernel/cpu-bugs64.c ++++ b/arch/mips/kernel/cpu-bugs64.c +@@ -42,8 +42,8 @@ static inline void align_mod(const int a + : GCC_IMM_ASM() (align), GCC_IMM_ASM() (mod)); + } + +-static inline void mult_sh_align_mod(long *v1, long *v2, long *w, +- const int align, const int mod) ++static __always_inline void mult_sh_align_mod(long *v1, long *v2, long *w, ++ const int align, const int mod) + { + unsigned long flags; + int m1, m2; +--- a/arch/powerpc/kernel/prom_init.c ++++ b/arch/powerpc/kernel/prom_init.c +@@ -498,14 +498,14 @@ static int __init prom_next_node(phandle + } + } + +-static inline int prom_getprop(phandle node, const char *pname, +- void *value, size_t valuelen) ++static inline int __init prom_getprop(phandle node, const char *pname, ++ void *value, size_t valuelen) + { + return call_prom("getprop", 4, 1, node, ADDR(pname), + (u32)(unsigned long) value, (u32) valuelen); + } + +-static inline int prom_getproplen(phandle node, const char *pname) ++static inline int __init prom_getproplen(phandle node, const char *pname) + { + return call_prom("getproplen", 2, 1, node, ADDR(pname)); + } +--- a/arch/powerpc/mm/tlb-radix.c ++++ b/arch/powerpc/mm/tlb-radix.c +@@ -90,8 +90,8 @@ void radix__tlbiel_all(unsigned int acti + asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory"); + } + +-static inline void __tlbiel_pid(unsigned long pid, int set, +- unsigned long ric) ++static __always_inline void __tlbiel_pid(unsigned long pid, int set, ++ unsigned long ric) + { + unsigned long rb,rs,prs,r; + +@@ -106,7 +106,7 @@ static inline void __tlbiel_pid(unsigned + trace_tlbie(0, 1, rb, rs, ric, prs, r); + } + +-static inline void __tlbie_pid(unsigned long pid, unsigned long ric) ++static __always_inline void __tlbie_pid(unsigned long pid, unsigned long ric) + { + unsigned long rb,rs,prs,r; + +@@ -136,7 +136,7 @@ static inline void __tlbiel_lpid(unsigne + trace_tlbie(lpid, 1, rb, rs, ric, prs, r); + } + +-static inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) ++static __always_inline void __tlbie_lpid(unsigned long lpid, unsigned long ric) + { + unsigned long rb,rs,prs,r; + +@@ -239,7 +239,7 @@ static inline void fixup_tlbie_lpid(unsi + /* + * We use 128 set in radix mode and 256 set in hpt mode. + */ +-static inline void _tlbiel_pid(unsigned long pid, unsigned long ric) ++static __always_inline void _tlbiel_pid(unsigned long pid, unsigned long ric) + { + int set; + +@@ -918,7 +918,7 @@ void radix__tlb_flush(struct mmu_gather + tlb->need_flush_all = 0; + } + +-static inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, ++static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, + unsigned long start, unsigned long end, + int psize, bool also_pwc) + { +--- a/arch/s390/include/asm/cpacf.h ++++ b/arch/s390/include/asm/cpacf.h +@@ -202,7 +202,7 @@ static inline int __cpacf_check_opcode(u + } + } + +-static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) ++static __always_inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask) + { + if (__cpacf_check_opcode(opcode)) { + __cpacf_query(opcode, mask); --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -276,20 +276,6 @@ config CPA_DEBUG @@ -21,15 +197,26 @@ config DEBUG_ENTRY bool "Debug low-level entry code" depends on DEBUG_KERNEL +--- a/drivers/mtd/nand/raw/vf610_nfc.c ++++ b/drivers/mtd/nand/raw/vf610_nfc.c +@@ -373,7 +373,7 @@ static int vf610_nfc_cmd(struct nand_chi + { + const struct nand_op_instr *instr; + struct vf610_nfc *nfc = chip_to_nfc(chip); +- int op_id = -1, trfr_sz = 0, offset; ++ int op_id = -1, trfr_sz = 0, offset = 0; + u32 col = 0, row = 0, cmd1 = 0, cmd2 = 0, code = 0; + bool force8bit = false; + --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug -@@ -150,6 +150,20 @@ endmenu # "printk and dmesg options" - - menu "Compile-time checks and compiler options" +@@ -309,6 +309,20 @@ config HEADERS_CHECK + exported to $(INSTALL_HDR_PATH) (usually 'usr/include' in + your build tree), to make sure they're suitable. +config OPTIMIZE_INLINING -+ bool "Allow gcc to uninline functions marked 'inline'" -+ ---help--- ++ bool "Allow compiler to uninline functions marked 'inline'" ++ help + This option determines if the kernel forces gcc to inline the functions + developers have marked 'inline'. Doing so takes away freedom from gcc to + do what it thinks is best, which is desirable for the gcc 3.x series of @@ -41,9 +228,9 @@ + + If unsure, say N. + - config DEBUG_INFO - bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL && !COMPILE_TEST + config DEBUG_SECTION_MISMATCH + bool "Enable full Section mismatch analysis" + help --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -306,9 +306,6 @@ config ZONE_DMA32