From 424f79f35a94611f73182f19a7711174b756b052 Mon Sep 17 00:00:00 2001
From: P33M <P33M@github.com>
Date: Fri, 26 Sep 2014 11:32:09 +0100
Subject: [PATCH 092/114] dwc_otg: introduce fiq_fsm_spin(un|)lock()

SMP safety for the FIQ relies on register read-modify write cycles being
completed in the correct order. Several places in the DWC code modify
registers also touched by the FIQ. Protect these by a bare-bones lock
mechanism.

This also makes it possible to run the FIQ and IRQ handlers on different
cores.
---
 .../usb/host/dwc_common_port/dwc_common_linux.c    |  6 ---
 drivers/usb/host/dwc_otg/dwc_otg_cil.c             | 10 -----
 drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c         | 46 +++++++++++++++++++++-
 drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h         | 16 +++++++-
 drivers/usb/host/dwc_otg/dwc_otg_hcd.c             | 23 ++++++++++-
 drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c        |  9 ++++-
 6 files changed, 88 insertions(+), 22 deletions(-)

--- a/drivers/usb/host/dwc_common_port/dwc_common_linux.c
+++ b/drivers/usb/host/dwc_common_port/dwc_common_linux.c
@@ -580,13 +580,7 @@ void DWC_WRITE_REG64(uint64_t volatile *
 
 void DWC_MODIFY_REG32(uint32_t volatile *reg, uint32_t clear_mask, uint32_t set_mask)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-	local_fiq_disable();
 	writel((readl(reg) & ~clear_mask) | set_mask, reg);
-	local_fiq_enable();
-	local_irq_restore(flags);
 }
 
 #if 0
--- a/drivers/usb/host/dwc_otg/dwc_otg_cil.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_cil.c
@@ -2244,9 +2244,7 @@ void dwc_otg_core_host_init(dwc_otg_core
  */
 void dwc_otg_hc_init(dwc_otg_core_if_t * core_if, dwc_hc_t * hc)
 {
-	uint32_t intr_enable;
 	hcintmsk_data_t hc_intr_mask;
-	gintmsk_data_t gintmsk = {.d32 = 0 };
 	hcchar_data_t hcchar;
 	hcsplt_data_t hcsplt;
 
@@ -2348,14 +2346,6 @@ void dwc_otg_hc_init(dwc_otg_core_if_t *
 	}
 	DWC_WRITE_REG32(&hc_regs->hcintmsk, hc_intr_mask.d32);
 
-	/* Enable the top level host channel interrupt. */
-	intr_enable = (1 << hc_num);
-	DWC_MODIFY_REG32(&host_if->host_global_regs->haintmsk, 0, intr_enable);
-
-	/* Make sure host channel interrupts are enabled. */
-	gintmsk.b.hcintr = 1;
-	DWC_MODIFY_REG32(&core_if->core_global_regs->gintmsk, 0, gintmsk.d32);
-
 	/*
 	 * Program the HCCHARn register with the endpoint characteristics for
 	 * the current transfer.
--- a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.c
@@ -75,6 +75,46 @@ void notrace _fiq_print(enum fiq_debug_l
 }
 
 /**
+ * fiq_fsm_spin_lock() - ARMv6+ bare bones spinlock
+ * Must be called with local interrupts and FIQ disabled.
+ */
+inline void fiq_fsm_spin_lock(fiq_lock_t *lock)
+{
+	unsigned long tmp;
+	uint32_t newval;
+	fiq_lock_t lockval;
+	smp_mb__before_spinlock();
+	/* Nested locking, yay. If we are on the same CPU as the fiq, then the disable
+	 * will be sufficient. If we are on a different CPU, then the lock protects us. */
+	prefetchw(&lock->slock);
+	asm volatile (
+	"1:     ldrex   %0, [%3]\n"
+	"       add     %1, %0, %4\n"
+	"       strex   %2, %1, [%3]\n"
+	"       teq     %2, #0\n"
+	"       bne     1b"
+	: "=&r" (lockval), "=&r" (newval), "=&r" (tmp)
+	: "r" (&lock->slock), "I" (1 << 16)
+	: "cc");
+
+	while (lockval.tickets.next != lockval.tickets.owner) {
+		wfe();
+		lockval.tickets.owner = ACCESS_ONCE(lock->tickets.owner);
+	}
+	smp_mb();
+}
+
+/**
+ * fiq_fsm_spin_unlock() - ARMv6+ bare bones spinunlock
+ */
+inline void fiq_fsm_spin_unlock(fiq_lock_t *lock)
+{
+	smp_mb();
+	lock->tickets.owner++;
+	dsb_sev();
+}
+
+/**
  * fiq_fsm_restart_channel() - Poke channel enable bit for a split transaction
  * @channel: channel to re-enable
  */
@@ -1142,6 +1182,7 @@ void notrace dwc_otg_fiq_fsm(struct fiq_
 	gintsts_handled.d32 = 0;
 	haint_handled.d32 = 0;
 
+	fiq_fsm_spin_lock(&state->lock);
 	gintsts.d32 = FIQ_READ(state->dwc_regs_base + GINTSTS);
 	gintmsk.d32 = FIQ_READ(state->dwc_regs_base + GINTMSK);
 	gintsts.d32 &= gintmsk.d32;
@@ -1231,7 +1272,7 @@ void notrace dwc_otg_fiq_fsm(struct fiq_
 
 	}
 	state->fiq_done++;
-	mb();
+	fiq_fsm_spin_unlock(&state->lock);
 }
 
 
@@ -1253,6 +1294,7 @@ void notrace dwc_otg_fiq_nop(struct fiq_
 	gintmsk_data_t gintmsk;
 	hfnum_data_t hfnum;
 
+	fiq_fsm_spin_lock(&state->lock);
 	hfnum.d32 = FIQ_READ(state->dwc_regs_base + HFNUM);
 	gintsts.d32 = FIQ_READ(state->dwc_regs_base + GINTSTS);
 	gintmsk.d32 = FIQ_READ(state->dwc_regs_base + GINTMSK);
@@ -1290,5 +1332,5 @@ void notrace dwc_otg_fiq_nop(struct fiq_
 
 	}
 	state->fiq_done++;
-	mb();
+	fiq_fsm_spin_unlock(&state->lock);
 }
--- a/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h
+++ b/drivers/usb/host/dwc_otg/dwc_otg_fiq_fsm.h
@@ -120,7 +120,6 @@ typedef struct {
 	volatile void* intstat;
 } mphi_regs_t;
 
-
 enum fiq_debug_level {
 	FIQDBG_SCHED = (1 << 0),
 	FIQDBG_INT   = (1 << 1),
@@ -128,6 +127,16 @@ enum fiq_debug_level {
 	FIQDBG_PORTHUB = (1 << 3),
 };
 
+typedef struct {
+	union {
+		uint32_t slock;
+		struct _tickets {
+			uint16_t owner;
+			uint16_t next;
+		} tickets;
+	};
+} fiq_lock_t;
+
 struct fiq_state;
 
 extern void _fiq_print (enum fiq_debug_level dbg_lvl, volatile struct fiq_state *state, char *fmt, ...);
@@ -324,6 +333,7 @@ struct fiq_channel_state {
  * It contains top-level state information.
  */
 struct fiq_state {
+	fiq_lock_t lock;
 	mphi_regs_t mphi_regs;
 	void *dwc_regs_base;
 	dma_addr_t dma_base;
@@ -342,6 +352,10 @@ struct fiq_state {
 	struct fiq_channel_state channel[0];
 };
 
+extern void fiq_fsm_spin_lock(fiq_lock_t *lock);
+
+extern void fiq_fsm_spin_unlock(fiq_lock_t *lock);
+
 extern int fiq_fsm_too_late(struct fiq_state *st, int n);
 
 extern int fiq_fsm_tt_in_use(struct fiq_state *st, int num_channels, int n);
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd.c
@@ -1184,6 +1184,9 @@ static void assign_and_init_hc(dwc_otg_h
 	dwc_otg_qtd_t *qtd;
 	dwc_otg_hcd_urb_t *urb;
 	void* ptr = NULL;
+	uint32_t intr_enable;
+	unsigned long flags;
+	gintmsk_data_t gintmsk = { .d32 = 0, };
 
 	qtd = DWC_CIRCLEQ_FIRST(&qh->qtd_list);
 
@@ -1409,6 +1412,20 @@ static void assign_and_init_hc(dwc_otg_h
 		hc->desc_list_addr = qh->desc_list_dma;
 
 	dwc_otg_hc_init(hcd->core_if, hc);
+
+	local_irq_save(flags);
+	local_fiq_disable();
+	fiq_fsm_spin_lock(&hcd->fiq_state->lock);
+	/* Enable the top level host channel interrupt. */
+	intr_enable = (1 << hc->hc_num);
+	DWC_MODIFY_REG32(&hcd->core_if->host_if->host_global_regs->haintmsk, 0, intr_enable);
+
+	/* Make sure host channel interrupts are enabled. */
+	gintmsk.b.hcintr = 1;
+	DWC_MODIFY_REG32(&hcd->core_if->core_global_regs->gintmsk, 0, gintmsk.d32);
+	fiq_fsm_spin_unlock(&hcd->fiq_state->lock);
+	local_fiq_enable();
+	local_irq_restore(flags);
 	hc->qh = qh;
 }
 
@@ -1659,6 +1676,7 @@ int fiq_fsm_queue_isoc_transaction(dwc_o
 	fiq_print(FIQDBG_INT, hcd->fiq_state, "%08x", st->hcdma_copy.d32);
 	hfnum.d32 = DWC_READ_REG32(&hcd->core_if->host_if->host_global_regs->hfnum);
 	local_fiq_disable();
+	fiq_fsm_spin_lock(&hcd->fiq_state->lock);
 	DWC_WRITE_REG32(&hc_regs->hctsiz, st->hctsiz_copy.d32);
 	DWC_WRITE_REG32(&hc_regs->hcsplt, st->hcsplt_copy.d32);
 	DWC_WRITE_REG32(&hc_regs->hcdma, st->hcdma_copy.d32);
@@ -1676,6 +1694,7 @@ int fiq_fsm_queue_isoc_transaction(dwc_o
 	}
 	mb();
 	st->hcchar_copy.b.chen = 0;
+	fiq_fsm_spin_unlock(&hcd->fiq_state->lock);
 	local_fiq_enable();
 	return 0;
 }
@@ -1811,7 +1830,7 @@ int fiq_fsm_queue_split_transaction(dwc_
 	DWC_WRITE_REG32(&hc_regs->hcintmsk, st->hcintmsk_copy.d32);
 
 	local_fiq_disable();
-	mb();
+	fiq_fsm_spin_lock(&hcd->fiq_state->lock);
 
 	if (hc->ep_type & 0x1) {
 		hfnum.d32 = DWC_READ_REG32(&hcd->core_if->host_if->host_global_regs->hfnum);
@@ -1909,7 +1928,7 @@ int fiq_fsm_queue_split_transaction(dwc_
 		st->hcchar_copy.b.chen = 1;
 		DWC_WRITE_REG32(&hc_regs->hcchar, st->hcchar_copy.d32);
 	}
-	mb();
+	fiq_fsm_spin_unlock(&hcd->fiq_state->lock);
 	local_fiq_enable();
 	return 0;
 }
--- a/drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c
+++ b/drivers/usb/host/dwc_otg/dwc_otg_hcd_intr.c
@@ -101,6 +101,7 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_
 	if (dwc_otg_is_host_mode(core_if)) {
 		if (fiq_enable) {
 			local_fiq_disable();
+			fiq_fsm_spin_lock(&dwc_otg_hcd->fiq_state->lock);
 			/* Pull in from the FIQ's disabled mask */
 			gintmsk.d32 = gintmsk.d32 | ~(dwc_otg_hcd->fiq_state->gintmsk_saved.d32);
 			dwc_otg_hcd->fiq_state->gintmsk_saved.d32 = ~0;
@@ -116,8 +117,10 @@ int32_t dwc_otg_hcd_handle_intr(dwc_otg_
 		}
 		gintsts.d32 &= gintmsk.d32;
 
-		if (fiq_enable)
+		if (fiq_enable) {
+			fiq_fsm_spin_unlock(&dwc_otg_hcd->fiq_state->lock);
 			local_fiq_enable();
+		}
 
 		if (!gintsts.d32) {
 			goto exit_handler_routine;
@@ -200,6 +203,7 @@ exit_handler_routine:
 		gintmsk_data_t gintmsk_new;
 		haintmsk_data_t haintmsk_new;
 		local_fiq_disable();
+		fiq_fsm_spin_lock(&dwc_otg_hcd->fiq_state->lock);
 		gintmsk_new.d32 = *(volatile uint32_t *)&dwc_otg_hcd->fiq_state->gintmsk_saved.d32;
 		if(fiq_fsm_enable)
 			haintmsk_new.d32 = *(volatile uint32_t *)&dwc_otg_hcd->fiq_state->haintmsk_saved.d32;
@@ -222,6 +226,7 @@ exit_handler_routine:
 		haintmsk.d32 = DWC_READ_REG32(&core_if->host_if->host_global_regs->haintmsk);
 		/* Re-enable interrupts that the FIQ masked (first time round) */
 		FIQ_WRITE(dwc_otg_hcd->fiq_state->dwc_regs_base + GINTMSK, gintmsk.d32);
+		fiq_fsm_spin_unlock(&dwc_otg_hcd->fiq_state->lock);
 		local_fiq_enable();
 
 		if ((jiffies / HZ) > last_time) {
@@ -633,8 +638,10 @@ int32_t dwc_otg_hcd_handle_hc_intr(dwc_o
 	{
 		/* check the mask? */
 		local_fiq_disable();
+		fiq_fsm_spin_lock(&dwc_otg_hcd->fiq_state->lock);
 		haint.b2.chint |= ~(dwc_otg_hcd->fiq_state->haintmsk_saved.b2.chint);
 		dwc_otg_hcd->fiq_state->haintmsk_saved.b2.chint = ~0;
+		fiq_fsm_spin_unlock(&dwc_otg_hcd->fiq_state->lock);
 		local_fiq_enable();
 	}