/*
* Copyright (C) 2005-2006 by Texas Instruments
*
* This file implements a DMA interface using TI's CPPI DMA.
* For now it's DaVinci-only, but CPPI isn't specific to DaVinci or USB.
* The TUSB6020, using VLYNQ, has CPPI that looks much like DaVinci.
*/
#include <linux/usb.h>
#include "musb_core.h"
#include "musb_debug.h"
#include "cppi_dma.h"
/* CPPI DMA status 7-mar-2006:
*
* - See musb_{host,gadget}.c for more info
*
* - Correct RX DMA generally forces the engine into irq-per-packet mode,
* which can easily saturate the CPU under non-mass-storage loads.
*
* NOTES 24-aug-2006 (2.6.18-rc4):
*
* - peripheral RXDMA wedged in a test with packets of length 512/512/1.
* evidently after the 1 byte packet was received and acked, the queue
* of BDs got garbaged so it wouldn't empty the fifo. (rxcsr 0x2003,
* and RX DMA0: 4 left, 80000000 8feff880, 8feff860 8feff860; 8f321401
* 004001ff 00000001 .. 8feff860) Host was just getting NAKed on tx
* of its next (512 byte) packet. IRQ issues?
*
* REVISIT: the "transfer DMA" glue between CPPI and USB fifos will
* evidently also directly update the RX and TX CSRs ... so audit all
* host and peripheral side DMA code to avoid CSR access after DMA has
* been started.
*/
/* REVISIT now we can avoid preallocating these descriptors; or
* more simply, switch to a global freelist not per-channel ones.
* Note: at full speed, 64 descriptors == 4K bulk data.
*/
#define NUM_TXCHAN_BD 64
#define NUM_RXCHAN_BD 64
static inline void cpu_drain_writebuffer(void)
{
wmb();
#ifdef CONFIG_CPU_ARM926T
/* REVISIT this "should not be needed",
* but lack of it sure seemed to hurt ...
*/
asm("mcr p15, 0, r0, c7, c10, 4 @ drain write buffer\n");
#endif
}
static inline struct cppi_descriptor *cppi_bd_alloc(struct cppi_channel *c)
{
struct cppi_descriptor *bd = c->freelist;
if (bd)
c->freelist = bd->next;
return bd;
}
static inline void
cppi_bd_free(struct cppi_channel *c, struct cppi_descriptor *bd)
{
if (!bd)
return;
bd->next = c->freelist;
c->freelist = bd;
}
/*
* Start DMA controller
*
* Initialize the DMA controller as necessary.
*/
/* zero out entire rx state RAM entry for the channel */
static void cppi_reset_rx(struct cppi_rx_stateram __iomem *rx)
{
musb_writel(&rx->rx_skipbytes, 0, 0);
musb_writel(&rx->rx_head, 0, 0);
musb_writel(&rx->rx_sop, 0, 0);
musb_writel(&rx->rx_current, 0, 0);
musb_writel(&rx->rx_buf_current, 0, 0);
musb_writel(&rx->rx_len_len, 0, 0);
musb_writel(&rx->rx_cnt_cnt, 0, 0);
}
/* zero out entire tx state RAM entry for the channel */
static void cppi_reset_tx(struct cppi_tx_stateram __iomem *tx, u32 ptr)
{
musb_writel(&tx->tx_head, 0, 0);
musb_writel(&tx->tx_buf, 0, 0);
musb_writel(&tx->tx_current, 0, 0);
musb_writel(&tx->tx_buf_current, 0, 0);
musb_writel(&tx->tx_info, 0, 0);
musb_writel(&tx->tx_rem_len, 0, 0);
/* musb_writel(&tx->tx_dummy, 0, 0); */
musb_writel(&tx->tx_complete, 0, ptr);
}
static void __init cppi_pool_init(struct cppi *cppi, struct cppi_channel *c)
{
int j;
/* initialize channel fields */
c->head = NULL;
c->tail = NULL;
c->last_processed = NULL;
c->channel.status = MUSB_DMA_STATUS_UNKNOWN;
c->controller = cppi;
c->is_rndis = 0;
c->freelist = NULL;
/* build the BD Free list for the channel */
for (j = 0; j < NUM_TXCHAN_BD + 1; j++) {
struct cppi_descriptor *bd;
dma_addr_t dma;
bd = dma_pool_alloc(cppi->pool, GFP_KERNEL, &dma);
bd->dma = dma;
cppi_bd_free(c, bd);
}
}
static int cppi_channel_abort(struct dma_channel *);
static void cppi_pool_free(struct cppi_channel *c)
{
struct cppi *cppi = c->controller;
struct cppi_descriptor *bd;
(void) cppi_channel_abort(&c->channel);
c->channel.status = MUSB_DMA_STATUS_UNKNOWN;
c->controller = NULL;
/* free all its bds */
bd = c->last_processed;
do {
if (bd)
dma_pool_free(cppi->pool, bd, bd->dma);
bd = cppi_bd_alloc(c);
} while (bd);
c->last_processed = NULL;
}
static int __init cppi_controller_start(struct dma_controller *c)
{
struct cppi *controller;
void __iomem *tibase;
int i;
controller = container_of(c, struct cppi, controller);
/* do whatever is necessary to start controller */
for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
controller->tx[i].transmit = true;
controller->tx[i].index = i;
}
for (i = 0; i < ARRAY_SIZE(controller->rx); i++) {
controller->rx[i].transmit = false;
controller->rx[i].index = i;
}
/* setup BD list on a per channel basis */
for (i = 0; i < ARRAY_SIZE(controller->tx); i++)
cppi_pool_init(controller, controller->tx + i);
for (i = 0; i < ARRAY_SIZE(controller->rx); i++)
cppi_pool_init(controller, controller->rx + i);
tibase = controller->tibase;
INIT_LIST_HEAD(&controller->tx_complete);
/* initialise tx/rx channel head pointers to zero */
for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
struct cppi_channel *tx_ch = controller->tx + i;
struct cppi_tx_stateram __iomem *tx;
INIT_LIST_HEAD(&tx_ch->tx_complete);
tx = tibase + DAVINCI_TXCPPI_STATERAM_OFFSET(i);
tx_ch->state_ram = tx;
cppi_reset_tx(tx, 0);
}
for (i = 0; i < ARRAY_SIZE(controller->rx); i++) {
struct cppi_channel *rx_ch = controller->rx + i;
struct cppi_rx_stateram __iomem *rx;
INIT_LIST_HEAD(&rx_ch->tx_complete);
rx = tibase + DAVINCI_RXCPPI_STATERAM_OFFSET(i);
rx_ch->state_ram = rx;
cppi_reset_rx(rx);
}
/* enable individual cppi channels */
musb_writel(tibase, DAVINCI_TXCPPI_INTENAB_REG,
DAVINCI_DMA_ALL_CHANNELS_ENABLE);
musb_writel(tibase, DAVINCI_RXCPPI_INTENAB_REG,
DAVINCI_DMA_ALL_CHANNELS_ENABLE);
/* enable tx/rx CPPI control */
musb_writel(tibase, DAVINCI_TXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
musb_writel(tibase, DAVINCI_RXCPPI_CTRL_REG, DAVINCI_DMA_CTRL_ENABLE);
/* disable RNDIS mode, also host rx RNDIS autorequest */
musb_writel(tibase, DAVINCI_RNDIS_REG, 0);
musb_writel(tibase, DAVINCI_AUTOREQ_REG, 0);
return 0;
}
/*
* Stop DMA controller
*
* De-Init the DMA controller as necessary.
*/
static int cppi_controller_stop(struct dma_controller *c)
{
struct cppi *controller;
void __iomem *tibase;
int i;
controller = container_of(c, struct cppi, controller);
tibase = controller->tibase;
/* DISABLE INDIVIDUAL CHANNEL Interrupts */
musb_writel(tibase, DAVINCI_TXCPPI_INTCLR_REG,
DAVINCI_DMA_ALL_CHANNELS_ENABLE);
musb_writel(tibase, DAVINCI_RXCPPI_INTCLR_REG,
DAVINCI_DMA_ALL_CHANNELS_ENABLE);
DBG(1, "Tearing down RX and TX Channels\n");
for (i = 0; i < ARRAY_SIZE(controller->tx); i++) {
/* FIXME restructure of txdma to use bds lik
|