openwrt/target/linux/cns3xxx/patches-3.3/460-cns3xxx_fiq_support.patch
Felix Fietkau 879b7a7a9b cns3xxx: update FIQ header file and disable RWFO
Update header file appropriately and disable read for ownership

Note that the FIQ support implements a workaround that provides a performance
boost over the traditional upstream workaround which ensures cache lines
are exclusive on driver CPU using 'read for ownership'.

Signed-off-by: Tim Harvey <tharvey@gateworks.com>

 target/linux/cns3xxx/config-3.3                                |    2 +-
 target/linux/cns3xxx/patches-3.3/460-cns3xxx_fiq_support.patch |    9 ++++-----
 2 files changed, 5 insertions(+), 6 deletions(-)

SVN-Revision: 33827
2012-10-17 22:03:37 +00:00

429 lines
11 KiB
Diff

--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -375,6 +375,7 @@ config ARCH_CNS3XXX
select PCI_DOMAINS if PCI
select HAVE_ARM_TWD
select HAVE_SMP
+ select FIQ
help
Support for Cavium Networks CNS3XXX platform.
--- a/arch/arm/kernel/fiq.c
+++ b/arch/arm/kernel/fiq.c
@@ -49,6 +49,8 @@
static unsigned long no_fiq_insn;
+unsigned int fiq_number[2] = {0, 0};
+
/* Default reacquire function
* - we always relinquish FIQ control
* - we always reacquire FIQ control
@@ -70,9 +72,12 @@ static struct fiq_handler *current_fiq =
int show_fiq_list(struct seq_file *p, int prec)
{
- if (current_fiq != &default_owner)
- seq_printf(p, "%*s: %s\n", prec, "FIQ",
- current_fiq->name);
+ if (current_fiq != &default_owner) {
+ seq_printf(p, "%*s: ", prec, "FIQ");
+ seq_printf(p, "%10u ", fiq_number[0]);
+ seq_printf(p, "%10u ", fiq_number[1]);
+ seq_printf(p, " %s\n", current_fiq->name);
+ }
return 0;
}
--- a/arch/arm/kernel/smp.c
+++ b/arch/arm/kernel/smp.c
@@ -400,13 +400,13 @@ void show_ipi_list(struct seq_file *p, i
unsigned int cpu, i;
for (i = 0; i < NR_IPI; i++) {
- seq_printf(p, "%*s%u: ", prec - 1, "IPI", i);
+ seq_printf(p, "%*s%u:", prec - 1, "IPI", i);
for_each_present_cpu(cpu)
seq_printf(p, "%10u ",
__get_irq_stat(cpu, ipi_irqs[i]));
- seq_printf(p, " %s\n", ipi_types[i]);
+ seq_printf(p, " %s\n", ipi_types[i]);
}
}
--- a/arch/arm/mach-cns3xxx/Makefile
+++ b/arch/arm/mach-cns3xxx/Makefile
@@ -2,6 +2,6 @@ obj-$(CONFIG_ARCH_CNS3XXX) += core.o pm
obj-$(CONFIG_PCI) += pcie.o
obj-$(CONFIG_MACH_CNS3420VB) += cns3420vb.o
obj-$(CONFIG_MACH_GW2388) += laguna.o
-obj-$(CONFIG_SMP) += platsmp.o headsmp.o
+obj-$(CONFIG_SMP) += platsmp.o headsmp.o cns3xxx_fiq.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug.o
obj-$(CONFIG_LOCAL_TIMERS) += localtimer.o
--- /dev/null
+++ b/arch/arm/mach-cns3xxx/cns3xxx_fiq.S
@@ -0,0 +1,96 @@
+/*
+ * Copyright (C) 2012 Gateworks Corporation
+ * Chris Lang <clang@gateworks.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+#include <asm/asm-offsets.h>
+
+#define D_CACHE_LINE_SIZE 32
+
+ .text
+
+/*
+ * R8 - DMA Start Address
+ * R9 - DMA Length
+ * R10 - DMA Direction
+ * R11 - DMA type
+ * R12 - fiq_buffer Address
+ * R13 - DMA type Address
+*/
+
+ .global cns3xxx_fiq_end
+ENTRY(cns3xxx_fiq_start)
+ mov r8, #0
+ str r8, [r13]
+
+ ldr r9, [r12]
+ ldr r8, [r9]
+ add r8, r8, #1
+ str r8, [r9]
+
+ ldmib r12, {r8, r9, r10}
+ and r11, r10, #0x3000000
+ and r10, r10, #0xff
+
+ teq r11, #0x1000000
+ beq cns3xxx_dma_map_area
+ teq r11, #0x2000000
+ beq cns3xxx_dma_unmap_area
+ b cns3xxx_dma_flush_range
+
+cns3xxx_fiq_exit:
+ mov r8, #0
+ str r8, [r12, #12]
+ mcr p15, 0, r8, c7, c10, 4 @ drain write buffer
+ subs pc, lr, #4
+
+cns3xxx_dma_map_area:
+ add r9, r9, r8
+ teq r10, #DMA_FROM_DEVICE
+ beq cns3xxx_dma_inv_range
+ b cns3xxx_dma_clean_range
+
+cns3xxx_dma_unmap_area:
+ add r9, r9, r8
+ teq r10, #DMA_TO_DEVICE
+ bne cns3xxx_dma_inv_range
+ b cns3xxx_fiq_exit
+
+cns3xxx_dma_flush_range:
+ bic r8, r8, #D_CACHE_LINE_SIZE - 1
+1:
+ mcr p15, 0, r8, c7, c14, 1 @ clean & invalidate D line
+ add r8, r8, #D_CACHE_LINE_SIZE
+ cmp r8, r9
+ blo 1b
+ b cns3xxx_fiq_exit
+
+cns3xxx_dma_clean_range:
+ bic r8, r8, #D_CACHE_LINE_SIZE - 1
+1:
+ mcr p15, 0, r8, c7, c10, 1 @ clean D line
+ add r8, r8, #D_CACHE_LINE_SIZE
+ cmp r8, r9
+ blo 1b
+ b cns3xxx_fiq_exit
+
+cns3xxx_dma_inv_range:
+ tst r8, #D_CACHE_LINE_SIZE - 1
+ bic r8, r8, #D_CACHE_LINE_SIZE - 1
+ mcrne p15, 0, r8, c7, c10, 1 @ clean D line
+ tst r9, #D_CACHE_LINE_SIZE - 1
+ bic r9, r9, #D_CACHE_LINE_SIZE - 1
+ mcrne p15, 0, r9, c7, c14, 1 @ clean & invalidate D line
+1:
+ mcr p15, 0, r8, c7, c6, 1 @ invalidate D line
+ add r8, r8, #D_CACHE_LINE_SIZE
+ cmp r8, r9
+ blo 1b
+ b cns3xxx_fiq_exit
+
+cns3xxx_fiq_end:
--- a/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h
+++ b/arch/arm/mach-cns3xxx/include/mach/cns3xxx.h
@@ -294,6 +294,7 @@
#define MISC_PCIE_INT_MASK(x) MISC_MEM_MAP(0x978 + (x) * 0x100)
#define MISC_PCIE_INT_STATUS(x) MISC_MEM_MAP(0x97C + (x) * 0x100)
+#define MISC_FIQ_CPU(x) MISC_MEM_MAP(0xA58 - (x) * 0x4)
/*
* Power management and clock control
*/
--- a/arch/arm/mach-cns3xxx/include/mach/irqs.h
+++ b/arch/arm/mach-cns3xxx/include/mach/irqs.h
@@ -14,6 +14,7 @@
#define IRQ_LOCALTIMER 29
#define IRQ_LOCALWDOG 30
#define IRQ_TC11MP_GIC_START 32
+#define FIQ_START 0
#include <mach/cns3xxx.h>
--- /dev/null
+++ b/arch/arm/mach-cns3xxx/include/mach/smp.h
@@ -0,0 +1,8 @@
+#ifndef __MACH_SMP_H
+#define __MACH_SMP_H
+
+extern void smp_dma_map_area(const void *, size_t, int);
+extern void smp_dma_unmap_area(const void *, size_t, int);
+extern void smp_dma_flush_range(const void *, const void *);
+
+#endif
--- a/arch/arm/mach-cns3xxx/platsmp.c
+++ b/arch/arm/mach-cns3xxx/platsmp.c
@@ -24,10 +24,27 @@
#include <asm/hardware/gic.h>
#include <asm/smp_scu.h>
#include <asm/unified.h>
-
+#include <asm/fiq.h>
+#include <mach/smp.h>
#include <mach/cns3xxx.h>
+static struct fiq_handler fh = {
+ .name = "cns3xxx-fiq"
+};
+
+static unsigned int fiq_buffer[8];
+
+#define FIQ_ENABLED 0x80000000
+#define FIQ_GENERATE 0x00010000
+#define CNS3XXX_MAP_AREA 0x01000000
+#define CNS3XXX_UNMAP_AREA 0x02000000
+#define CNS3XXX_FLUSH_RANGE 0x03000000
+
extern void cns3xxx_secondary_startup(void);
+extern unsigned char cns3xxx_fiq_start, cns3xxx_fiq_end;
+extern unsigned int fiq_number[2];
+extern struct cpu_cache_fns cpu_cache;
+struct cpu_cache_fns cpu_cache_save;
#define SCU_CPU_STATUS 0x08
static void __iomem *scu_base;
@@ -38,12 +55,50 @@ static void __iomem *scu_base;
*/
volatile int __cpuinitdata pen_release = -1;
+static void __init cns3xxx_set_fiq_regs(void)
+{
+ struct pt_regs FIQ_regs;
+ unsigned int cpu = smp_processor_id();
+
+ if (cpu) {
+ FIQ_regs.ARM_ip = (unsigned int)&fiq_buffer[4];
+ FIQ_regs.ARM_sp = (unsigned int)MISC_FIQ_CPU(0);
+ } else {
+ FIQ_regs.ARM_ip = (unsigned int)&fiq_buffer[0];
+ FIQ_regs.ARM_sp = (unsigned int)MISC_FIQ_CPU(1);
+ }
+ set_fiq_regs(&FIQ_regs);
+}
+
+static void __init cns3xxx_init_fiq(void)
+{
+ void *fiqhandler_start;
+ unsigned int fiqhandler_length;
+ int ret;
+
+ fiqhandler_start = &cns3xxx_fiq_start;
+ fiqhandler_length = &cns3xxx_fiq_end - &cns3xxx_fiq_start;
+
+ ret = claim_fiq(&fh);
+
+ if (ret) {
+ return;
+ }
+
+ set_fiq_handler(fiqhandler_start, fiqhandler_length);
+ fiq_buffer[0] = (unsigned int)&fiq_number[0];
+ fiq_buffer[3] = 0;
+ fiq_buffer[4] = (unsigned int)&fiq_number[1];
+ fiq_buffer[7] = 0;
+}
+
+
/*
* Write pen_release in a way that is guaranteed to be visible to all
* observers, irrespective of whether they're taking part in coherency
* or not. This is necessary for the hotplug code to work reliably.
*/
-static void write_pen_release(int val)
+static void __cpuinit write_pen_release(int val)
{
pen_release = val;
smp_wmb();
@@ -63,12 +118,25 @@ void __cpuinit platform_secondary_init(u
gic_secondary_init(0);
/*
+ * Setup Secondary Core FIQ regs
+ */
+ cns3xxx_set_fiq_regs();
+
+ /*
* let the primary processor know we're out of the
* pen, then head off into the C entry point
*/
write_pen_release(-1);
/*
+ * Fixup DMA Operations
+ *
+ */
+ cpu_cache.dma_map_area = (void *)smp_dma_map_area;
+ cpu_cache.dma_unmap_area = (void *)smp_dma_unmap_area;
+ cpu_cache.dma_flush_range = (void *)smp_dma_flush_range;
+
+ /*
* Synchronise with the boot thread.
*/
spin_lock(&boot_lock);
@@ -171,4 +239,112 @@ void __init platform_smp_prepare_cpus(un
*/
__raw_writel(virt_to_phys(cns3xxx_secondary_startup),
(void __iomem *)(CNS3XXX_MISC_BASE_VIRT + 0x0600));
+
+ /*
+ * Setup FIQ's for main cpu
+ */
+ cns3xxx_init_fiq();
+ cns3xxx_set_fiq_regs();
+ memcpy((void *)&cpu_cache_save, (void *)&cpu_cache, sizeof(struct cpu_cache_fns));
+}
+
+
+static inline unsigned long cns3xxx_cpu_id(void)
+{
+ unsigned long cpu;
+
+ asm volatile(
+ " mrc p15, 0, %0, c0, c0, 5 @ cns3xxx_cpu_id\n"
+ : "=r" (cpu) : : "memory", "cc");
+ return (cpu & 0xf);
+}
+
+void smp_dma_map_area(const void *addr, size_t size, int dir)
+{
+ unsigned int cpu;
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ cpu = cns3xxx_cpu_id();
+ if (cpu) {
+ fiq_buffer[1] = (unsigned int)addr;
+ fiq_buffer[2] = size;
+ fiq_buffer[3] = dir | CNS3XXX_MAP_AREA | FIQ_ENABLED;
+ smp_mb();
+ __raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(1));
+
+ cpu_cache_save.dma_map_area(addr, size, dir);
+ while ((fiq_buffer[3]) & FIQ_ENABLED) { barrier(); }
+ } else {
+
+ fiq_buffer[5] = (unsigned int)addr;
+ fiq_buffer[6] = size;
+ fiq_buffer[7] = dir | CNS3XXX_MAP_AREA | FIQ_ENABLED;
+ smp_mb();
+ __raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(0));
+
+ cpu_cache_save.dma_map_area(addr, size, dir);
+ while ((fiq_buffer[7]) & FIQ_ENABLED) { barrier(); }
+ }
+ raw_local_irq_restore(flags);
+}
+
+void smp_dma_unmap_area(const void *addr, size_t size, int dir)
+{
+ unsigned int cpu;
+ unsigned long flags;
+
+ raw_local_irq_save(flags);
+ cpu = cns3xxx_cpu_id();
+ if (cpu) {
+
+ fiq_buffer[1] = (unsigned int)addr;
+ fiq_buffer[2] = size;
+ fiq_buffer[3] = dir | CNS3XXX_UNMAP_AREA | FIQ_ENABLED;
+ smp_mb();
+ __raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(1));
+
+ cpu_cache_save.dma_unmap_area(addr, size, dir);
+ while ((fiq_buffer[3]) & FIQ_ENABLED) { barrier(); }
+ } else {
+
+ fiq_buffer[5] = (unsigned int)addr;
+ fiq_buffer[6] = size;
+ fiq_buffer[7] = dir | CNS3XXX_UNMAP_AREA | FIQ_ENABLED;
+ smp_mb();
+ __raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(0));
+
+ cpu_cache_save.dma_unmap_area(addr, size, dir);
+ while ((fiq_buffer[7]) & FIQ_ENABLED) { barrier(); }
+ }
+ raw_local_irq_restore(flags);
+}
+
+void smp_dma_flush_range(const void *start, const void *end)
+{
+ unsigned int cpu;
+ unsigned long flags;
+ raw_local_irq_save(flags);
+ cpu = cns3xxx_cpu_id();
+ if (cpu) {
+
+ fiq_buffer[1] = (unsigned int)start;
+ fiq_buffer[2] = (unsigned int)end;
+ fiq_buffer[3] = CNS3XXX_FLUSH_RANGE | FIQ_ENABLED;
+ smp_mb();
+ __raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(1));
+
+ cpu_cache_save.dma_flush_range(start, end);
+ while ((fiq_buffer[3]) & FIQ_ENABLED) { barrier(); }
+ } else {
+
+ fiq_buffer[5] = (unsigned int)start;
+ fiq_buffer[6] = (unsigned int)end;
+ fiq_buffer[7] = CNS3XXX_FLUSH_RANGE | FIQ_ENABLED;
+ smp_mb();
+ __raw_writel(FIQ_GENERATE, MISC_FIQ_CPU(0));
+
+ cpu_cache_save.dma_flush_range(start, end);
+ while ((fiq_buffer[7]) & FIQ_ENABLED) { barrier(); }
+ }
+ raw_local_irq_restore(flags);
}
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -793,7 +793,7 @@ config NEEDS_SYSCALL_FOR_CMPXCHG
config DMA_CACHE_RWFO
bool "Enable read/write for ownership DMA cache maintenance"
- depends on CPU_V6K && SMP
+ depends on CPU_V6K && SMP && !ARCH_CNS3XXX
default y
help
The Snoop Control Unit on ARM11MPCore does not detect the