/* ------------------------------------------------------------------------- */
/*   dma-riva.c DMA support functions for NVIDIA display adapters	     */
/* ------------------------------------------------------------------------- */
/*   Copyright (C) 2002 Dirk Thierbach <dthierbach@gmx.de>
 *   Copyright (C) 2002, 2003 Stefan Jahn <stefan@lkcc.org>
 *
 *   This program is free software; you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *   
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *   
 *   You should have received a copy of the GNU General Public License
 *   along with this program; if not, write to the Free Software
 *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.		     */
/* ------------------------------------------------------------------------- */

#include <linux/module.h>
#include <linux/types.h>
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/sched.h>
#include <linux/interrupt.h>
#if (defined(CONFIG_AGP) || defined(CONFIG_AGP_MODULE)) && !defined(RIVATV_DISABLE_AGP)
#include <linux/agp_backend.h>
#endif
#include <asm/io.h>

#include "rivatv.h"

/* configurable module parameter: DMA and AGP usage requested */
int dma = 0;
int agp = 0;

/* Objects allocated by 'nv' driver for NV10 cards:
 *
 * Handle Subch Class
 *
 * 80000000 0     043  NV03_CONTEXT_ROP
 * 80000001 1     019  NV01_CONTEXT_CLIP_RECTANGLE
 * 80000002 2     018  NV01_CONTEXT_PATTERN
 * 80000003 -     058  NV03_SURFACE_0 / NV03_CONTEXT_SURFACES_2D
 * 80000004 -     059  NV03_SURFACE_1
 * 80000005 -     05A  NV03_SURFACE_2
 * 80000006 -     05B  NV03_SURFACE_3
 * 80000007 -     093  NV10_CONTEXT_SURFACES_3D
 * 80000010 3     021  NV01_IMAGE_FROM_CPU
 * 80000011 4     05F  NV04_IMAGE_BLIT
 * 80000012 5     04B  NV03_GDI_RECTANGLE_TEXT
 * 80000013 -     048  NV03_DX3_TEXTURED_TRIANGLE 
 * 80000014 7     094  NV10_DX5_TEXTURE_TRIANGLE 
 * 80000015 -     095  NV10_DX6_MULTI_TEXTURE_TRIANGLE
 * 80000016 6     01C  NV01_RENDER_SOLID_LIN
 *
 */

/* Thus we possibly overwrite/destroy the NV10_DX5_TEXTURE_TRIANGLE object ? 

   Yes, in the sense it's no longer in the channel. You can restore it
   by setting the handle back to 80000014. 
   It's not used by anybody besides the Utah GLX project. Maybe that
   should be mentioned in the docs; it will break program. (On the other hand,
   Utah-GLX didn't even support the GeForce2 without some fixes by Dirk 
   Thierbach in the first place, so it's not really serious). 
*/
#define RIVATV_CHANNEL    0
#define RIVATV_SUBCHANNEL 7

/* Pick some handles that won't collide with those already present */
enum {
	OBJ_FIRST   = 0x04000000,
	OBJ_MEMFMT  = 0x04000001,
	OBJ_SURF2D  = 0x04000002,
	OBJ_SCALER  = 0x04000003,
	SURF2D_NTFY = 0x04000010,
	SURF2D_IN   = 0x04000011,
	SURF2D_OUT  = 0x04000012,
	SCALER_NTFY = 0x04000013,
	SCALER_IN   = 0x04000014,
	SCALER_OUT  = 0x04000015,
	DMA_NTFY    = 0x04000016,
	DMA_IN      = 0x04000017,
	DMA_OUT     = 0x04000018,
};

/* In the same way, pick some addresses that won't collide. The 'nv' driver
   uses 1145h-1150h for NV4 and better and 0341h-034Ch for NV3, so we start 
   at 1200h */
enum {
	ADDR_OBJ_MEMFMT  = 0x1200,
	ADDR_OBJ_SURF2D  = 0x1201,
	ADDR_OBJ_SCALER  = 0x1202,
	ADDR_SURF2D_NTFY = 0x1203,
	ADDR_SURF2D_IN   = 0x1204,
	ADDR_SURF2D_OUT  = 0x1205,
	ADDR_SCALER_NTFY = 0x1206,
	ADDR_SCALER_IN   = 0x1207,
	ADDR_SCALER_OUT  = 0x1208,
	ADDR_DMA_NTFY    = 0x1209,
	ADDR_DMA_IN      = 0x120A,
	ADDR_DMA_OUT     = 0x120B, /* this is last, because it gets bigger than one entry! */
};

/* Initializes the nVidia card for DMA transfers if not done by a running
   XFree86 driver. */
static void rivatv_enableDMA (struct rivatv_info *info)
{
	struct rivatv_dma *dma = &info->dma;
	u32 reg;

	/* enable PGRAPH (Graphics engine) and PFIFO (FIFO channels) if necessary */
	reg = VID_RD32 (info->chip.PMC, 0x000200);
	if ((reg & 0x00001100) != 0x00001100) {
		PRINTK_INFO ("PFIFO and PGRAPH disabled, enabling...\n");
		VID_OR32 (info->chip.PMC, 0x000200, 0x00001100);
	}

	PRINTK_INFO ("Setting up instance RAM for DMA\n");

	switch (info->chip.arch) {
	case NV_ARCH_03:
		dma->ht_size = 0x1000;
		dma->ht_base = 0x0000;
		dma->ht_bits = 9;

		/* NV_PGRAPH_DEBUG_0 */
		VID_WR32 (info->chip.PGRAPH, 0x0080, 0x1230001F);
		/* NV_PGRAPH_DEBUG_1 */
		VID_WR32 (info->chip.PGRAPH, 0x0084, 0x10113000);
		/* NV_PGRAPH_DEBUG_2 */
		VID_WR32 (info->chip.PGRAPH, 0x0088, 0x1131F101);
		/* NV_PGRAPH_DEBUG_3 */
		VID_WR32 (info->chip.PGRAPH, 0x008C, 0x0100F531);
		/* NV_PGRAPH_CTX_SWITCH */
		VID_WR32 (info->chip.PGRAPH, 0x0180, 0x00000000);
		/* NV_PGRAPH_CTX_USER */
		VID_WR32 (info->chip.PGRAPH, 0x0194, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_0 */
		VID_WR32 (info->chip.PGRAPH, 0x01A0, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_1 */
		VID_WR32 (info->chip.PGRAPH, 0x01A4, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_2 */
		VID_WR32 (info->chip.PGRAPH, 0x01A8, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_3 */
		VID_WR32 (info->chip.PGRAPH, 0x01AC, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_4 */
		VID_WR32 (info->chip.PGRAPH, 0x01B0, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_5 */
		VID_WR32 (info->chip.PGRAPH, 0x01B4, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_6 */
		VID_WR32 (info->chip.PGRAPH, 0x01B8, 0x00000000);
		/* NV_PGRAPH_CTX_CACHE_7 */
		VID_WR32 (info->chip.PGRAPH, 0x01BC, 0x00000000);
		/* NV_PGRAPH_CLIP_MISC */
		VID_WR32 (info->chip.PGRAPH, 0x06A0, 0x00000000);
		/* NV_PGRAPH_DMA_INTR_0 */
		VID_WR32 (info->chip.PGRAPH, 0x1100, 0xFFFFFFFF);
		/* NV_PGRAPH_DMA_INTR_0 */
		VID_WR32 (info->chip.PGRAPH, 0x1200, 0x00000001);
		/* NV_PGRAPH_DMA */
		VID_WR32 (info->chip.PGRAPH, 0x0680, 0x00000000);
		/* NV_PGRAPH_INSTANCE */
		VID_WR32 (info->chip.PGRAPH, 0x0688, 0x00000000);
		/* NV_PGRAPH_PLANE_MASK */
		VID_WR32 (info->chip.PGRAPH, 0x0628, 0xFFFFFFFF);
		/* NV_PGRAPH_BETA */
		VID_WR32 (info->chip.PGRAPH, 0x0640, 0x00000000);
		/* NV_PGRAPH_EXCEPTIONS */
		VID_WR32 (info->chip.PGRAPH, 0x0508, 0x00000000);
		/* NV_PGRAPH_SRC_CANVAS_MIN */
		VID_WR32 (info->chip.PGRAPH, 0x0550, 0x00000000);
		/* NV_PGRAPH_SRC_CANVAS_MAX */
		VID_WR32 (info->chip.PGRAPH, 0x0554, 0xFFFFFFFF);
		/* NV_PGRAPH_DST_CANVAS_MIN */
		VID_WR32 (info->chip.PGRAPH, 0x0558, 0x00000000);
		/* NV_PGRAPH_DST_CANVAS_MAX */
		VID_WR32 (info->chip.PGRAPH, 0x055C, 0xFFFFFFFF);
		/* NV_PGRAPH_CTX_CONTROL */
		VID_WR32 (info->chip.PGRAPH, 0x0190, 0x10010002);
		/* NV_PGRAPH_INTR_EN_0 */
		VID_WR32 (info->chip.PGRAPH, 0x0140, 0x00000000);
		/* NV_PGRAPH_INTR_EN_1 */
		VID_WR32 (info->chip.PGRAPH, 0x0144, 0x00000000);
		/* NV_PGRAPH_INTR_0 */
		VID_WR32 (info->chip.PGRAPH, 0x0100, 0xFFFFFFFF);
		/* NV_PGRAPH_INTR_1 */
		VID_WR32 (info->chip.PGRAPH, 0x0104, 0xFFFFFFFF);
		/* NV_PGRAPH_DMA_INTR_EN_0 */
		VID_WR32 (info->chip.PGRAPH, 0x1140, 0x00000000);
		/* NV_PGRAPH_DMA_INTR_0 */
		VID_WR32 (info->chip.PGRAPH, 0x1100, 0xFFFFFFFF);
		/* NV_PGRAPH_FIFO */
		VID_WR32 (info->chip.PGRAPH, 0x06A4, 0x00000001);

		/* NV_PFIFO_CACHES */
		VID_WR32 (info->chip.PFIFO, 0x0500, 0x00000000);
		/* NV_PFIFO_CACHE1_PUSH0 */
		VID_WR32 (info->chip.PFIFO, 0x1200, 0x00000000);
		/* NV_PFIFO_CACHE1_PULL0 */
		VID_WR32 (info->chip.PFIFO, 0x1240, 0x00000000);
		/* NV_PFIFO_CACHE1_PULL1 */
		VID_WR32 (info->chip.PFIFO, 0x1250, 0x00000000);
		/* NV_PFIFO_CACHE1_PUSH1 */
		VID_WR32 (info->chip.PFIFO, 0x1204, 0x00000000);

		/* NV_PFIFO_RAMHT */
		VID_WR32 (info->chip.PFIFO, 0x0210, 0x00000000);
		/* NV_PFIFO_RAMRO */
		VID_WR32 (info->chip.PFIFO, 0x0218, 0x00002000);
		/* NV_PFIFO_RAMFC */
		VID_WR32 (info->chip.PFIFO, 0x0214, 0x00002200);

		/* NV_PFIFO_CACHE1_PUT */
		VID_WR32 (info->chip.PFIFO, 0x1210, 0x00000000);
		/* NV_PFIFO_CACHE1_GET */
		VID_WR32 (info->chip.PFIFO, 0x1270, 0x00000000);
		/* NV_PFIFO_RUNOUT_PUT */
		VID_WR32 (info->chip.PFIFO, 0x0410, 0x00000000);
		/* NV_PFIFO_RUNOUT_GET */
		VID_WR32 (info->chip.PFIFO, 0x0420, 0x00000000);
		/* NV_PFIFO_RUNOUT_STATUS */
		VID_WR32 (info->chip.PFIFO, 0x0400, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_0 */
		VID_WR32 (info->chip.PFIFO, 0x1280, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_1 */
		VID_WR32 (info->chip.PFIFO, 0x1290, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_2 */
		VID_WR32 (info->chip.PFIFO, 0x12A0, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_3 */
		VID_WR32 (info->chip.PFIFO, 0x12B0, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_4 */
		VID_WR32 (info->chip.PFIFO, 0x12C0, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_5 */
		VID_WR32 (info->chip.PFIFO, 0x12D0, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_6 */
		VID_WR32 (info->chip.PFIFO, 0x12E0, 0x00000000);
		/* NV_PFIFO_CACHE1_CTX_7 */
		VID_WR32 (info->chip.PFIFO, 0x12F0, 0x00000000);
		/* NV_PFIFO_INTR_EN_0 */
		VID_WR32 (info->chip.PFIFO, 0x0140, 0x00000000);
		/* NV_PFIFO_INTR_0 */
		VID_WR32 (info->chip.PFIFO, 0x0100, 0xFFFFFFFF);
		/* NV_PFIFO_CACHE1_PUSH0 */
		VID_WR32 (info->chip.PFIFO, 0x1200, 0x00000001);
		/* NV_PFIFO_CACHE1_PULL0 */
		VID_WR32 (info->chip.PFIFO, 0x1240, 0x00000001);
		/* NV_PFIFO_CACHES */
		VID_WR32 (info->chip.PFIFO, 0x0500, 0x00000001);
		/* NV_PGRAPH_DMA_CONTROL */
		VID_WR32 (info->chip.PFIFO, 0x1210, 0x00000000);
		break;

	case NV_ARCH_04:
	case NV_ARCH_10:
	case NV_ARCH_20:
	case NV_ARCH_30:
		dma->ht_search = 128;
		dma->ht_size = 0x1000;
		dma->ht_base = 0x10000;
		dma->ht_bits = 9;
		if (info->chip.arch == NV_ARCH_04) {
			VID_WR32 (info->chip.PGRAPH, 0x0080, 0x1231C001);
			VID_WR32 (info->chip.PGRAPH, 0x0084, 0x72111101);

			VID_WR32 (info->chip.PGRAPH, 0x0140, 0x00000000);
			VID_WR32 (info->chip.PGRAPH, 0x0100, 0xFFFFFFFF);
			VID_WR32 (info->chip.PGRAPH, 0x0170, 0x10010100);
		} else {
			/* NV_PGRAPH_DEBUG_0 */
			VID_WR32 (info->chip.PGRAPH, 0x0080, 0x0003FFFF);
			/* NV_PGRAPH_DEBUG_1 */
			VID_WR32 (info->chip.PGRAPH, 0x0084, 0x00118701);
			/* NV_PGRAPH_DEBUG_0 */
			VID_WR32 (info->chip.PGRAPH, 0x0080, 0x00000000);

			/* NV_PGRAPH_INTR_EN */
			VID_WR32 (info->chip.PGRAPH, 0x0140, 0x01111111);
			/* NV_PGRAPH_INTR */
			VID_WR32 (info->chip.PGRAPH, 0x0100, 0xFFFFFFFF);
			/* NV_PGRAPH_CTX_CONTROL */
			VID_WR32 (info->chip.PGRAPH, 0x0144, 0x10010100);
		}

		/* NV_PFIFO_CACHES */
		VID_WR32 (info->chip.PFIFO, 0x0500, 0x00000000);
		/* NV_PFIFO_CACHE1_PUSH0_ACCESS_DISABLED */
		VID_WR32 (info->chip.PFIFO, 0x1200, 0x00000000);
		/* NV_PFIFO_CACHE1_PULL0_ACCESS_DISABLED */
		VID_WR32 (info->chip.PFIFO, 0x1250, 0x00000000);
		/* NV_PFIFO_CACHE1_PUSH1_MODE_PIO */
		VID_WR32 (info->chip.PFIFO, 0x1204, 0x00000000);
		/* NV_PFIFO_CACHE1_DMA_INSTANCE */
		VID_WR32 (info->chip.PFIFO, 0x122C, 0x00000000);
		/* NV_PFIFO_CACHE0_PUSH0_ACCESS_DISABLED */
		VID_WR32 (info->chip.PFIFO, 0x1000, 0x00000000);
		/* NV_PFIFO_CACHE0_PULL0_ACCESS_DISABLED */
		VID_WR32 (info->chip.PFIFO, 0x1050, 0x00000000);

		/* NV_PFIFO_RAMHT */
		VID_WR32 (info->chip.PFIFO, 0x0210, 0x03000100);
		/* NV_PFIFO_RAMFC */
		VID_WR32 (info->chip.PFIFO, 0x0214, 0x00000110);
		/* NV_PFIFO_RAMRO */
		VID_WR32 (info->chip.PFIFO, 0x0218, 0x00000112);

		/* NV_PFIFO_SIZE_CHANNEL */
		VID_WR32 (info->chip.PFIFO, 0x050C, 0x0000FFFF);
		/* NV_PFIFO_CACHE1_INSTANCE */
		VID_WR32 (info->chip.PFIFO, 0x1258, 0x0000FFFF);

		/* NV_PFIFO_INTR_EN */
		VID_WR32 (info->chip.PFIFO, 0x0140, 0x00000000);
		/* NV_PFIFO_INTR */
		VID_WR32 (info->chip.PFIFO, 0x0100, 0xFFFFFFFF);

		/* NV_PFIFO_CACHE0_PULL0_ENGINE_GRAPHICS */
		VID_WR32 (info->chip.PFIFO, 0x1054, 0x00000001);
		/* NV_PFIFO_CACHE0_PULL0_ACCESS_ENABLED */
		VID_WR32 (info->chip.PFIFO, 0x1200, 0x00000001);
		/* NV_PFIFO_CACHE1_PULL0_ACCESS_ENABLED */
		VID_WR32 (info->chip.PFIFO, 0x1250, 0x00000001);
		/* NV_PFIFO_CACHE1_PULL1_ENGINE_GRAPHICS */
		VID_WR32 (info->chip.PFIFO, 0x1254, 0x00000001);
		/* NV_PFIFO_CACHES_REASSIGN_ENABLED */
		VID_WR32 (info->chip.PFIFO, 0x0500, 0x00000001);
		break;
	}
}

/* Checks the current hash table layout of the hardware and saves some useful
   values for later.  Returns non-zero if the layout is usable. */
static int rivatv_checkHT (struct rivatv_info *info)
{
	struct rivatv_dma *dma = &info->dma;
	u32 htval, roval, fcval, nv = 0;
	ulong addr = 0;

	/* NV_PFIFO_RAMHT */
	htval = VID_RD32 (info->chip.PFIFO, 0x210);
	/* NV_PFIFO_RAMFC */
	fcval = VID_RD32 (info->chip.PFIFO, 0x214);
	/* NV_PFIFO_RAMRO */
	roval = VID_RD32 (info->chip.PFIFO, 0x218);

	switch (info->chip.arch) {
	case NV_ARCH_03:
		dma->ht_base = 0x1000 * ((htval >> 12) & 0x0f);
		dma->ht_size = 0x1000 << ((htval >> 16) & 0x03);
		dma->ht_bits = ((htval >> 16) & 0x03) + 9;
		nv = (dma->ht_size == 0x1000 && dma->ht_base == 0x0000 && fcval == 0x2200);
		addr = (ulong) info->base1_region + (ulong) info->chip.PRAMIN - (ulong) info->video_base;
		break;
	case NV_ARCH_04:
	case NV_ARCH_10:
	case NV_ARCH_20:
	case NV_ARCH_30:
		dma->ht_base = 0x1000 * ((htval >> 4) & 0x1f);
		dma->ht_size = 0x1000 << ((htval >> 16) & 0x03);
		dma->ht_bits = ((htval >> 16) & 0x03) + 9;
		dma->ht_search = 0x10 << ((htval >> 24) & 0x03);
		nv = (dma->ht_search == 128 && dma->ht_size == 0x1000 && dma->ht_base == 0x10000);
		addr = (ulong) info->base0_region + (ulong) info->chip.PRAMIN - (ulong) info->chip.PMC;
		break;
	}

	PRINTK_INFO ("Hash table layout: %dkB (%d bits) @ 0x%08lX\n", 
		     dma->ht_size / 1024, dma->ht_bits, addr + dma->ht_base);

	spin_lock_irq (&info->sched.sched_lock);
	if (nv) {
		PRINTK_INFO ("Open source nv driver detected, DMA supported\n");
		info->sched.flags |= RIVATV_SCHED_DMA;
	} else if (dma->ht_search == 128 && dma->ht_size == 0x4000 && dma->ht_base == 0x10000) {
		PRINTK_INFO ("NVdriver (nvidia) detected, DMA not supported\n");
		info->sched.flags &= ~RIVATV_SCHED_DMA;
	} else {
		PRINTK_INFO ("No additional driver detected\n");
		rivatv_enableDMA (info);
		info->sched.flags |= RIVATV_SCHED_DMA;
	}
	spin_unlock_irq (&info->sched.sched_lock);

	/* assign PRAMHT, PRAMFC and PRAMRO pointers */
	htval = VID_RD32 (info->chip.PFIFO, 0x210);
	fcval = VID_RD32 (info->chip.PFIFO, 0x214);
	roval = VID_RD32 (info->chip.PFIFO, 0x218);
	switch (info->chip.arch) {
	case NV_ARCH_03:
		info->chip.PRAMHT = (u32 *) ((ulong) info->chip.PRAMIN + 0x1000 * ((htval >> 12) & 0x0f));
		info->chip.PRAMFC = (u32 *) ((ulong) info->chip.PRAMIN + 0x0200 * ((fcval >> 9) & 0x7f));
		info->chip.PRAMRO = (u32 *) ((ulong) info->chip.PRAMIN + 0x0200 * ((roval >> 9) & 0x7f));
		break;
	case NV_ARCH_04:
	case NV_ARCH_10:
	case NV_ARCH_20:
	case NV_ARCH_30:
		info->chip.PRAMHT = (u32 *) ((ulong) info->chip.PRAMIN + 0x1000 * ((htval >> 4) & 0x1f));
		info->chip.PRAMFC = (u32 *) ((ulong) info->chip.PRAMIN + 0x0800 * ((fcval >> 3) & 0x3f));
		info->chip.PRAMRO = (u32 *) ((ulong) info->chip.PRAMIN + 0x0200 * ((roval >> 1) & 0xff));
		break;
	}
	DPRINTK2 ("RAMHT: 0x%08lX, RAMFC: 0x%08lX, RAMRO: 0x%08lX\n",
		  addr + (ulong) info->chip.PRAMHT - (ulong) info->chip.PRAMIN,
		  addr + (ulong) info->chip.PRAMFC - (ulong) info->chip.PRAMIN,
		  addr + (ulong) info->chip.PRAMRO - (ulong) info->chip.PRAMIN);

	return (info->sched.flags & RIVATV_SCHED_DMA) ? 1 : 0;
}

/* Computes a hash table key based on a given object handle identifier and the 
   subchannel identifier.  Used as an address offset in the hash table itself. */
static u32 rivatv_hashHT (struct rivatv_info *info, u32 handle, int chid)
{
	struct rivatv_dma *dma = &info->dma;
	u32 result = 0;
	s32 i;

	for (i = 32; i > 0; i -= dma->ht_bits) {
		result ^= (handle & ((1 << dma->ht_bits) - 1));
		handle >>= dma->ht_bits;
	}
	result ^= chid << (dma->ht_bits - 4);
	return result << 3;
}

#define ENGINE_SW		0
#define ENGINE_GRAPHICS		1
#define ENGINE_DVD		2

/* Stores a graphics object handle into the hash table. */
static void rivatv_storeHT (struct rivatv_info *info, u32 handle, int chid, int addr, int engine, int class)
{
	u32 key, context;

	key = rivatv_hashHT (info, handle, chid);
	if (info->chip.arch == NV_ARCH_03) {
		context = addr | (chid << 24) | (engine << 23) | (class << 16);
	} else {
		context = addr | (chid << 24) | (1 << 31) | (engine << 16);
	}
	VID_WR32 (info->chip.PRAMHT, key + 0, handle);
	VID_WR32 (info->chip.PRAMHT, key + 4, context);
	DPRINTK2 ("handle: %08x, key: %08x, context: %08x\n", handle, key, context);
}

#define CTX_FLAG_CHROMA_KEY		  (1 << 12) /* enable */
#define CTX_FLAG_USER_CLIP		  (1 << 13) /* enable */
#define CTX_FLAG_SWIZZLE		  (1 << 14) /* enable */
#define CTX_FLAG_PATCH_SRCCPY_AND	  (0 << 15)
#define CTX_FLAG_PATCH_ROP_AND		  (1 << 15)
#define CTX_FLAG_PATCH_BLEND_AND	  (2 << 15)
#define CTX_FLAG_PATCH_SRCCOPY		  (3 << 15)
#define CTX_FLAG_PATCH_SRCCOPY_PRE	  (4 << 15)
#define CTX_FLAG_PATCH_BLEND_PRE	  (5 << 15)
#define CTX_FLAG_SYNCHRONIZE		  (1 << 18) /* enable */
#define CTX_FLAG_ENDIAN_LITTLE		  (0 << 19) /* little endian */
#define CTX_FLAG_ENDIAN_BIG		  (1 << 19) /* big endian */
#define CTX_FLAG_DITHER_COMPATIBILITY	  (0 << 20)
#define CTX_FLAG_DITHER_DITHER		  (1 << 20)
#define CTX_FLAG_DITHER_TRUNCATE	  (2 << 20)
#define CTX_FLAG_DITHER_SUBTRACT_TRUNCATE (3 << 20)
#define CTX_FLAG_SINGLE_STEP		  (1 << 23) /* enable */
#define CTX_FLAG_PATCH			  (1 << 24) /* valid */
#define CTX_FLAG_CTX_SURFACE0		  (1 << 25) /* valid */
#define CTX_FLAG_CTX_SURFACE1		  (1 << 26) /* valid */
#define CTX_FLAG_CTX_PATTERN		  (1 << 27) /* valid */
#define CTX_FLAG_CTX_ROP		  (1 << 28) /* valid */
#define CTX_FLAG_CTX_BETA1		  (1 << 29) /* valid */
#define CTX_FLAG_CTX_BETA4		  (1 << 30) /* valid */

#define CTX_FLAG_MONO_FORMAT_INVALID	  (0 << 0)
#define CTX_FLAG_COLOR_FORMAT_INVALID	  (0 << 8)

/* Stores a DMA context object into the instance RAM. */
static void rivatv_storeContext (struct rivatv_info *info, int addr, int class, int flags, 
				 int mono, int color, int notify, int dma_0, int dma_1)
{
	VID_WR32 (info->chip.PRAMIN, (addr << 4) +  0, class | flags);
	VID_WR32 (info->chip.PRAMIN, (addr << 4) +  4, mono | color | (notify << 16));
	VID_WR32 (info->chip.PRAMIN, (addr << 4) +  8, dma_0 | (dma_1 << 16));
	VID_WR32 (info->chip.PRAMIN, (addr << 4) + 12, 0); /* traps disabled */
}

#define DMA_FLAG1_PAGE_TABLE		(1 << 12)
/* present = use page table, 0 = not present */
#define DMA_FLAG1_PAGE_ENTRY_NONLIN	(0 << 13)
/* the page table is not linear and is stored in the instance at offset
   pageNum * 4 + 0x08 and consists of 
   NV04_DMA_ACCESS
   NV04_DMA_FRAME_ADDRESS
*/
#define DMA_FLAG1_PAGE_ENTRY_LIN	(1 << 13) 
/* the page address is calculate from the first page entry at offset 0x0008
   the formulate is page base = NV04_DMA_FRAME_ADDRESS + (offset >> 12)
   NV04_DMA_ACCESS is copped
*/
#define DMA_FLAG1_ACCESS_READ_WRITE	(0 << 14) 
/* memory can be read and also written if ACCESS_READ_ONLY is not in affect */
#define DMA_FLAG1_ACCESS_READ_ONLY	(1 << 14) 
/* memory can only be read */
#define DMA_FLAG1_ACCESS_WRITE_ONLY	(2 << 14) 
/* memory can only be written if ACCESS_READ_ONLY in not in affect */
#define DMA_FLAG1_TARGET		(3 << 16) 
#define DMA_FLAG1_TARGET_NVM		(0 << 16) 
#define DMA_FLAG1_TARGET_NVM_TILED	(1 << 16) 
/* local memory to the nvidia chip */
#define DMA_FLAG1_TARGET_PCI		(2 << 16) 
/* host memory accessed through PCI bus master transfer */
#define DMA_FLAG1_TARGET_AGP		(3 << 16) 
/* host memory accessed through AGP bus master transfer */

#define DMA_FLAG2_UNKNOWN		(1 << 0)
#define DMA_FLAG2_ACCESS_READ_ONLY	(0 << 1)
/* this page is readable */
#define DMA_FLAG2_ACCESS_READ_WRITE	(1 << 1)
/* this page is readable and writable */

#define DMA_PAGE_SIZE (0x1000)
#define DMA_PAGE_MASK (DMA_PAGE_SIZE - 1)

/* Stores a DMA object (NV01_CONEXT_DMA) into the instance RAM. */
static void rivatv_storeDMA (struct rivatv_info *info, u32 addr, int class, ulong pa, u32 size,
			     int flags, int access)
{
	/* adjust: adjusts byte offset in a page */
	u32 offset, adjust = 0;
	int linear = (flags & DMA_FLAG1_PAGE_ENTRY_LIN) ? 1 : 0;
	u32 target = (flags & DMA_FLAG1_TARGET) >> 16;

	offset = linear ? pa : vmalloc_to_bus_addr (pa);
	offset &= DMA_PAGE_MASK;

	/* architecture dependent settings */
	switch (info->chip.arch) {
	case NV_ARCH_03:
		access |= DMA_FLAG2_UNKNOWN;
		adjust = offset;
		flags &= ~(DMA_FLAG1_TARGET | DMA_FLAG1_PAGE_ENTRY_LIN);
		flags |= target << 24; /* DMA_FLAG1_TARGET_*   */
		flags |= 1 << 16;      /* DMA_FLAG1_PAGE_TABLE */
		class = 0;
		break;
	case NV_ARCH_04:
		adjust = offset << 20;
		flags |= DMA_FLAG1_PAGE_TABLE;
		class = 0;
		break;
	case NV_ARCH_10:
	case NV_ARCH_20:
	case NV_ARCH_30:
		adjust = offset << 20;
		flags |= DMA_FLAG1_PAGE_TABLE;
		break;
	}

	DPRINTK2 ("DMA: 00000000 = %08X\n", class | adjust | flags);
	DPRINTK2 ("DMA: 00000004 = %08X\n", size - 1);
	VID_WR32 (info->chip.PRAMIN, (addr << 4) + 0, class | adjust | flags);
	/* the last highest valid address in the memory range */
	VID_WR32 (info->chip.PRAMIN, (addr << 4) + 4, size - 1);

	/* page table: each page is 4kB (12 bits), contains one or more page addresses 
	   for local memory: page 0 means the start of the framebuffer
	   for pci/agp:      physical (bus) (possibly non-linear) addresses
	*/
	if (linear) {
		/* just a single page address */
		DPRINTK2 ("DMA: 00000008 = %08X\n", (u32) ((pa & ~DMA_PAGE_MASK) | access));
		DPRINTK2 ("DMA: 0000000C = %08X\n", 0xffffffff);
		VID_WR32 (info->chip.PRAMIN, (addr << 4) +  8, (pa & ~DMA_PAGE_MASK) | access);
		VID_WR32 (info->chip.PRAMIN, (addr << 4) + 12, 0xffffffff);
	} else {
		/* loads of page adresses, "pa" has been vmalloc()'ed */
		unsigned long ofs = 8;
		unsigned long virt_addr = pa;

		DPRINTK2 ("non-linear page table for DMA transfer:\n");
		while ((long) size > 0) {
			pa = vmalloc_to_bus_addr (virt_addr);
			VID_WR32 (info->chip.PRAMIN, (addr << 4) + ofs, (pa & ~DMA_PAGE_MASK) | access);
			DPRINTK2 ("  %03ld. PCI bus address = 0x%08x\n", (ofs - 8) / 4, (u32) pa);
			virt_addr += DMA_PAGE_SIZE;
			size -= DMA_PAGE_SIZE;
			ofs += 4;
		}
		VID_WR32 (info->chip.PRAMIN, (addr << 4) + ofs, 0xffffffff);
	}
}

#define WAIT_UFREE_MAX 0x10000
#define WAIT_COUNT_MAX 10000000

/* What is going on here ?  Does the hardware decrement the Free counter and iff so,
   when happens this ? 

   The "Free" field points to an address in the fifo channel that tells you
   the number of available free entries in the fifo. This number is the same
   for all subchannels, and I guess it's aliased directly to the corresponding
   hardware register.
   Channel access waiting is done in a similar way in the X driver, BTW,
   only without the timeout. I guess that should be removed; it makes
   only sense when testing. 
*/
#define RIVATV_WAIT_UFREE(ch,subch,cnt) {                                          \
        int loopcnt = 0;                                                           \
        while (ch->sub[subch].Free < cnt) {                                        \
                if (loopcnt > WAIT_UFREE_MAX) {                                    \
                        DPRINTK ("Free count timeout: current 0x%x target 0x%x\n", \
                                 ch->sub[subch].Free, cnt);                        \
                        break;                                                     \
                }                                                                  \
                loopcnt++;                                                         \
        } }


/* Dumps the state of a notifier object to the syslog. */
static void rivatv_printNotifier (NvNotification *nfy) 
{
	DPRINTK2 ("Notify TIME=%llu RET=%08x ERR=%04x STATUS=%02x\n",
		  nfy->timeStamp, nfy->returnVal, nfy->errorCode, nfy->status);
}

/* Waits for a notifier object to change its status value. */
static int rivatv_WaitOnNotifier (NvNotification *nfy, u8 value)
{
	int count = 0;

	while (nfy->status == value) {
		if (++count == WAIT_COUNT_MAX) {
			PRINTK_ERR ("Notify stuck after %d loops...\n", count);
			rivatv_printNotifier (nfy);
			return 1;
		}
	}
	return 0;
}

/* Sets up everything in the intance RAM and hashtable.  Tests the notifier object
   once. */
static int rivatv_setupDMA (struct rivatv_info *info)
{
	struct rivatv_dma *dma = &info->dma;
	NvNotification *ntfy;
	NvChannel *channel;

	/* reserve notifiers */
	dma->notifiers = pci_alloc_consistent (info->pci, PAGE_SIZE, &dma->notifiers_addr);
	if (dma->notifiers == NULL) {
		PRINTK_ERR ("Unable to allocate DMA notifiers\n");
		return 0;
	}
	ntfy = (NvNotification *) dma->notifiers;

	/* setup PCI DMA */
	pci_set_master (info->pci);
        if (pci_set_dma_mask (info->pci, 0xffffffff)) {
		PRINTK_ERR ("No suitable 32bit DMA available\n");
                return 0;
        }

	channel = (NvChannel *) info->chip.FIFO;

	/* setup the DMA notifier object */
	rivatv_storeDMA (info, ADDR_DMA_NTFY, NV01_MEMORY_SYSTEM,
			 dma->notifiers_addr, NV_NOTIFICATION_SIZE,
			 DMA_FLAG1_PAGE_ENTRY_LIN | DMA_FLAG1_ACCESS_READ_WRITE | DMA_FLAG1_TARGET_PCI, 
			 DMA_FLAG2_ACCESS_READ_WRITE);
	rivatv_storeHT (info, DMA_NTFY, RIVATV_CHANNEL, ADDR_DMA_NTFY, ENGINE_SW, NV01_MEMORY_SYSTEM);

	/* setup the DMA source object */
	rivatv_storeDMA (info, ADDR_DMA_IN, NV01_MEMORY_LOCAL_BANKED,
			 info->picture_offset, RIVATV_RAW_CAPTURE_BUFSIZE,
			 DMA_FLAG1_PAGE_ENTRY_LIN | DMA_FLAG1_ACCESS_READ_WRITE | DMA_FLAG1_TARGET_NVM, 
			 DMA_FLAG2_ACCESS_READ_WRITE);
	rivatv_storeHT (info, DMA_IN, RIVATV_CHANNEL, ADDR_DMA_IN, ENGINE_SW, NV01_MEMORY_LOCAL_BANKED);

	/* setup the DMA target object */
	rivatv_storeDMA (info, ADDR_DMA_OUT, NV01_MEMORY_SYSTEM,
			 (ulong) dma->addr, RIVATV_RAW_CAPTURE_BUFSIZE,
			 DMA_FLAG1_PAGE_ENTRY_LIN | DMA_FLAG1_ACCESS_READ_WRITE | DMA_FLAG1_TARGET_PCI,
			 DMA_FLAG2_ACCESS_READ_WRITE);
	rivatv_storeHT (info, DMA_OUT, RIVATV_CHANNEL, ADDR_DMA_OUT, ENGINE_SW, NV01_MEMORY_SYSTEM);

	/* setup the DMA context object */
	rivatv_storeContext (info, ADDR_OBJ_MEMFMT, NV03_MEMORY_TO_MEMORY_FORMAT, 
			     CTX_FLAG_ENDIAN_LITTLE, 
			     CTX_FLAG_MONO_FORMAT_INVALID, CTX_FLAG_COLOR_FORMAT_INVALID, 
			     ADDR_DMA_NTFY, ADDR_DMA_IN, ADDR_DMA_OUT);
	rivatv_storeHT (info, OBJ_MEMFMT, RIVATV_CHANNEL, ADDR_OBJ_MEMFMT, ENGINE_GRAPHICS, NV03_MEMORY_TO_MEMORY_FORMAT);

	/* finally pass the OBJ_MEMFMT to the subchannel */
	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 1 * 4);
	channel->sub[RIVATV_SUBCHANNEL].SetObject = OBJ_MEMFMT;

	/* debug code: test if the notifier reacts to notification once */
	ntfy->status = NV_NOTIFICATION_STATUS_IN_PROGRESS;
	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 2 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.Notify = NV_NOTIFY_WRITE_LE;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.NoOperation = 0;

	rivatv_WaitOnNotifier (ntfy, NV_NOTIFICATION_STATUS_IN_PROGRESS);
	if (ntfy->status != NV_NOTIFICATION_STATUS_IN_PROGRESS) {
		DPRINTK ("Testing notifier... passed\n");
		rivatv_printNotifier (ntfy);
		return 1;
	}
	PRINTK_ERR ("Testing notifier... failed\n");
	return 0;
}

/* Free notifier object. etc. */
void rivatv_cleanupDMA (struct rivatv_info *info)
{
	struct rivatv_dma *dma = &info->dma;

	if (!(info->sched.flags & RIVATV_SCHED_DMA))
		return;

	pci_free_consistent (info->pci, PAGE_SIZE, dma->notifiers, dma->notifiers_addr);
	dma->notifiers = NULL;
	dma->notifiers_addr = 0;
}

/* Checks if DMA can be used. */
void rivatv_checkDMA (struct rivatv_info *info)
{
	if (rivatv_checkHT (info)) {
		if (dma) {
			rivatv_setupDMA (info);
		} else {
			spin_lock_irq (&info->sched.sched_lock);
			info->sched.flags &= ~RIVATV_SCHED_DMA;
			spin_unlock_irq (&info->sched.sched_lock);

			PRINTK_INFO ("DMA transfers disabled\n");
		}
	}
}

/* Prepares a DMA transfer. */
static void rivatv_prepareDMA (struct rivatv_info *info, struct rivatv_direct_DMA_plane *plane, int frame)
{
	struct rivatv_port *port = &info->port;
	u32 src_pitch, dst_pitch, src_offset, dst_offset, length, lines, inc;
	NvChannel *channel;

	/* standard cropping */
	dst_offset = frame * info->capture_buffer_size;
	dst_pitch = port->vld_width << 1;
	src_offset = (port->org_width - port->vld_width) & ~1;
	src_pitch = port->org_width << 1;
	length = port->vld_width << 1;
	lines = port->vld_height;
	inc = NV0039_SET_FORMAT_INPUT_INC_1 | NV0039_SET_FORMAT_OUTPUT_INC_1;

	/* extra color conversion */
	if (plane != NULL) {
		src_offset += plane->src_offset;
		src_pitch = port->org_width * plane->src_pitch;
		if (plane->planar) {
			dst_offset += port->vld_width * port->vld_height * plane->dst_offset / 16;
		} else {
			dst_offset += plane->dst_offset;
		}
		dst_pitch = port->vld_width * plane->dst_pitch / 2;
		length = port->vld_width * 2 / plane->src_length;
		lines = port->vld_height * plane->src_lines / 4;
		inc = plane->src_inc | plane->dst_inc;
	}

	DPRINTK2 ("DMA: OffsetIn   %u\n", src_offset);
	DPRINTK2 ("DMA: OffsetOut  %u\n", dst_offset);
	DPRINTK2 ("DMA: PitchIn    %u\n", src_pitch);
	DPRINTK2 ("DMA: PitchOut   %u\n", dst_pitch);
	DPRINTK2 ("DMA: LineLength %u\n", length);
	DPRINTK2 ("DMA: LineCount  %u\n", lines);
	DPRINTK2 ("DMA: Format     0x%08X\n", inc);

	channel = (NvChannel *) info->chip.FIFO;

	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 7 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetOffsetIn = src_offset;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetOffsetOut = dst_offset;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetPitchIn = src_pitch;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetPitchOut = dst_pitch;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetLineLengthIn = length;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetLineCount = lines;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetFormat = inc;

	/* What about SetContextDmaNotifies, SetContextDmaBufferIn and SetContextDmaBufferOut ? 
	   Is this possibly already setup by rivatv_storeContext() ? 

	   Yes, the SetContext* methods seem just to update the context.  But probably the
	   values should be setup anyway.  Possibly it solves the NV10 DMA problem with
	   multiple plane transfers.
	*/
#if SOMEONE_TESTED_IT
	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 3 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetContextDmaNotifies = DMA_NTFY;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetContextDmaBufferIn = DMA_IN;
	channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetContextDmaBufferOut = DMA_OUT;
#endif
}

/* Prepares a picture conversion transfer. 

   The idea behind this code is:
     1. The UYVY raw video data is located in the framebuffer.
     2. We use a Nv05ScaledImageFromMemory class to convert this data into the 
        destination colour format.
     3. Then a Nv04ContextSurfaces2d class - which is used by the Nv05ScaledImageFromMemory class -
        renders this converted video data into another offscreen location in the framebuffer.
     4. In a last step we can transfer this data to the user land. 

   Notes:
     * We need yet another offscreen framebuffer location for the converted video data.
     * Two more sub channels are necessary.
     * The pitch values of the Nv05ScaledImageFromMemory and Nv04ContextSurfaces2d seem
       to be limited to multiples of 64.  That is why final cropping must be done by
       the Nv03MemoryToMemoryFormat class.
     * I suggest to make the nVidia hardware to dump its data in 720x576 format, then convert
       the valid 704x576 portion of it (704 is a multiple of 64) to the second framebuffer
       location (again with a multiple of 64 in width) and finally crop the video via the
       final DMA transfer.

*/

/* Looks like the class can convert into: GREY, RGB555, RGB565, RGB24 and RGB32 */
#define NV0042_SET_FORMAT(palette)                                              \
  ((palette == VIDEO_PALETTE_GREY)   ? NV0042_SET_FORMAT_LE_Y8 :                \
  ((palette == VIDEO_PALETTE_RGB555) ? NV0042_SET_FORMAT_LE_X1R5G5B5_Z1R5G5B5 : \
  ((palette == VIDEO_PALETTE_RGB565) ? NV0042_SET_FORMAT_LE_R5G6B5 :            \
  ((palette == VIDEO_PALETTE_RGB24)  ? NV0042_SET_FORMAT_LE_X8R8G8B8_Z8R8G8B8 : \
  ((palette == VIDEO_PALETTE_RGB32)  ? NV0042_SET_FORMAT_LE_A8R8G8B8 : -1)))))

int rivatv_startConversion (struct rivatv_info *info) __attribute__ ((unused));

static void rivatv_prepareConversion (struct rivatv_info *info)
{
	struct rivatv_port *port = &info->port;
	u32 src_pitch, dst_pitch, src_offset, dst_offset, dst_format, dst_width, dst_height, src_width, src_height;
	NvChannel *channel;

	channel = (NvChannel *) info->chip.FIFO;

	/* set up destination rendering values */
	dst_format = info->format.format;
	dst_pitch = info->format.width * rivatv_convert[dst_format].bpp >> 3;
	dst_pitch = (dst_pitch + 0x3f) & 0x3f;
	dst_offset = 0;

	/* set up source rendering values */
	src_pitch = port->org_width << 1;
	src_offset = (port->org_width - port->vld_width) & ~1;

	/* configure the 2D surface */
	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 7 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetContextDmaNotifies = SURF2D_NTFY;
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetContextDmaSource = SURF2D_IN;
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetContextDmaDestin = SURF2D_OUT;
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetFormat = NV0042_SET_FORMAT (dst_format);
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetPitch = dst_pitch << 16 | src_pitch;
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetOffsetSource = src_offset;
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.SetOffsetDestin = dst_offset;

	/* configure colour conversion class */
	dst_width = info->format.width * rivatv_convert[dst_format].bpp >> 3;
	dst_height = info->format.height;
	src_width = port->vld_width;
	src_height = port->vld_height;

	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 6 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.SetContextDmaNotifies = SCALER_NTFY;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.SetContextDmaImage = SCALER_IN;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.SetContextSurface = OBJ_SURF2D;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.SetColorConversion = 
		NV0063_SET_COLOR_CONVERSION_TYPE_DITHER;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.SetOperation = 
		NV0063_SET_OPERATION_MODE_SRCCOPY;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.SetColorFormat = 
		NV0063_SET_COLOR_FORMAT_LE_V8YB8U8YA8;
	/*	NV0063_SET_COLOR_FORMAT_LE_YB8V8YA8U8 or this one ? */

	/* We yet need to investigate how those both values can be used in order to tell
	   the hardware that its source format is UYVY.  Possibly it is 'LE_V8YB8U8YA8' with
	   an source offset of two bytes. */

	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 9 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ClipPoint = 0x00000000;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ClipSize = 0xffffffff;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ImageOutPoint =  (0 << 16) | 0;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ImageOutSize = (dst_height << 16) | dst_width;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.DuDx = (src_width << 20) / dst_width;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.DvDy = (dst_height << 20) / dst_height;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ImageInSize = (src_height << 16) | src_width;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ImageInFormat = 
		src_pitch | NV0063_FORMAT_ORIGIN_CORNER | NV0063_FORMAT_INTERPOLATOR_ZOH;
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.ImageInOffset = src_offset;
}

/* Initiates a single colour conversion. */
int rivatv_startConversion (struct rivatv_info *info)
{
	NvNotification *ntfy = (NvNotification *) info->dma.notifiers;
	NvChannel *channel = (NvChannel *) info->chip.FIFO;

	ntfy->status = NV_NOTIFICATION_STATUS_IN_PROGRESS;

	rivatv_prepareConversion (info);

	RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 2 * 4);
	channel->sub[RIVATV_SUBCHANNEL].u.nv05ScaledImageFromMemory.Notify = NV_NOTIFY_WRITE_LE_AWAKEN;
	channel->sub[RIVATV_SUBCHANNEL].u.nv04ContextSurfaces2d.Notify = NV_NOTIFY_WRITE_LE_AWAKEN;
	return 1;
}

/* Checks whether we can do direct DMA or not. */
int rivatv_checkDirectDMA (struct rivatv_info *info)
{
	struct rivatv_format *format = &info->format;
	struct rivatv_sched *sched = &info->sched;

	if (!(sched->flags & RIVATV_SCHED_DMA))
		return 0;

	sched->flags |= RIVATV_SCHED_DMA_ZC;
	if (info->chip.arch == NV_ARCH_03 || info->chip.arch == NV_ARCH_04) {
		if (info->port.vld_width != format->width || info->port.vld_height != format->height)
			sched->flags &= ~RIVATV_SCHED_DMA_ZC;
	}
	if (rivatv_convert_DMA[format->format].valid == 0)
		sched->flags &= ~RIVATV_SCHED_DMA_ZC;
	DPRINTK ("DMA: zero copy for %dx%d %s%s possible\n", format->width, format->height,
		 rivatv_convert[format->format].name,
		 (sched->flags & RIVATV_SCHED_DMA_ZC) ? "" : " not");
	return (sched->flags & RIVATV_SCHED_DMA_ZC) ? 1 : 0;
}

void rivatv_configureDMA (struct rivatv_info *info)
{
	if (!(info->sched.flags & RIVATV_SCHED_DMA))
		return;

	/* setup the DMA target object */
	if (rivatv_checkDirectDMA (info)) {
		rivatv_storeDMA (info, ADDR_DMA_OUT, NV01_MEMORY_SYSTEM,
				 (ulong) info->capture_buffer, info->capture_buffers * info->capture_buffer_size,
				 DMA_FLAG1_PAGE_ENTRY_NONLIN | DMA_FLAG1_ACCESS_READ_WRITE | DMA_FLAG1_TARGET_PCI,
				 DMA_FLAG2_ACCESS_READ_WRITE);
	} else {
		rivatv_storeDMA (info, ADDR_DMA_OUT, NV01_MEMORY_SYSTEM,
				 (ulong) info->dma.addr, RIVATV_RAW_CAPTURE_BUFSIZE,
				 DMA_FLAG1_PAGE_ENTRY_LIN | DMA_FLAG1_ACCESS_READ_WRITE | DMA_FLAG1_TARGET_PCI,
				 DMA_FLAG2_ACCESS_READ_WRITE);
	}
	rivatv_storeHT (info, DMA_OUT, RIVATV_CHANNEL, ADDR_DMA_OUT, ENGINE_SW, NV01_MEMORY_SYSTEM);

	/* enable DMA interrupts */
	if (info->chip.arch == NV_ARCH_03) {
		VID_WR32 (info->chip.PGRAPH, 0x0140, 0x11111111);
		VID_WR32 (info->chip.PGRAPH, 0x0144, 0x11111111);
		VID_WR32 (info->chip.PGRAPH, 0x1140, 0x11111111);
	} else {
		/* NV_PGRAPH_INTR_EN (NOTIFY, MISSING_HW, BUFFER_NOTIFY, ERROR) */
		VID_WR32 (info->chip.PGRAPH, 0x140, 0x00110011);
	}
}

/* Initiates a single DMA transfer. */
int rivatv_startDMA (struct rivatv_info *info, int frame)
{
	struct rivatv_sched *sched = &info->sched;
	NvNotification *ntfy = (NvNotification *) info->dma.notifiers;
	NvChannel *channel = (NvChannel *) info->chip.FIFO;
	int format, n;

	if (!(sched->flags & RIVATV_SCHED_DMA))
		return 0;

	format = info->format.format;
	sched->dma_zc_count = 0;
	sched->dma_zc_planes = rivatv_convert_DMA[format].nplanes;

	ntfy->status = NV_NOTIFICATION_STATUS_IN_PROGRESS;

	if (sched->flags & RIVATV_SCHED_DMA_ZC) {
		for (n = 0; n < sched->dma_zc_planes; n++) {
			rivatv_prepareDMA (info, &rivatv_convert_DMA[format].plane[n], frame);
			RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 2 * 4);
			channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.Notify = NV_NOTIFY_WRITE_LE_AWAKEN;
			channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetBufNotify = 0;
		}
	} else {
		rivatv_prepareDMA (info, NULL, 0);
		RIVATV_WAIT_UFREE (channel, RIVATV_SUBCHANNEL, 2 * 4);
		channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.Notify = NV_NOTIFY_WRITE_LE_AWAKEN;
		channel->sub[RIVATV_SUBCHANNEL].u.nv03MemoryToMemoryFormat.SetBufNotify = 0; /* starts */
	}
	return 1;
}

/* Waits for a DMA transfer to be finished. */
int rivatv_WaitOnDMA (struct rivatv_info *info)
{
	NvNotification *ntfy = (NvNotification *) info->dma.notifiers;

	if (!(info->sched.flags & RIVATV_SCHED_DMA))
		return 0;

	rivatv_WaitOnNotifier (ntfy, NV_NOTIFICATION_STATUS_IN_PROGRESS);

	if (ntfy->status != NV_NOTIFICATION_STATUS_IN_PROGRESS) {
		DPRINTK2 ("DMA finished\n"); 
		rivatv_printNotifier (ntfy);
		return 1;
	}
	return 0;
}

/* Display NV object. */
static void rivatv_printObject (struct rivatv_info *info, u32 addr, char *name)
{
	int n;
	struct rivatv_dma *dma = &info->dma;
	u32 handle, context, channel, engine, val, class = 0, found = 0;

	val = VID_RD32 (info->chip.PRAMIN, addr << 4);
	DPRINTK ("InstAddr: 0x%04X = 0x%08X (Class 0x%02X) [%s]\n", addr, val, val & 0x7F, name);
	for (n = 0; n < dma->ht_size; n += 4) {
		if ((VID_RD32 (info->chip.PRAMHT, n) & 0xFFFF) == addr) {
			handle = VID_RD32 (info->chip.PRAMHT, n - 4);
			context = VID_RD32 (info->chip.PRAMHT, n);
			if (info->chip.arch == NV_ARCH_03) {
				channel = (context >> 24) & 0x7F;
				engine = (context >> 23) & 0x01;
				class = (context >> 16) & 0x7F;
			}
			else {
				channel = (context >> 24) & 0x1F;
				engine = (context >> 16) & 0x03;
			}
			DPRINTK ("Handle:   0x%08X\n", handle);
			DPRINTK ("Context:  0x%08X (Channel %u, Engine '%s', Class 0x%02X)\n", 
				 context, channel, 
				 (engine == ENGINE_SW) ? "SoftWare" : (engine == ENGINE_GRAPHICS) ? "Graphics" : "DVD",
				 class);
			DPRINTK ("HashKey:  0x%08X\n", n - 4);
			found++;
		}
	}
	if (!found) {
		DPRINTK ("Error:    Object not found in hash table\n");
	}
}

/* Subroutine of the interrupt handler. */
void rivatv_interrupt_DMA (struct rivatv_info *info)
{
	u32 reg, instance = 0, notify = 0, intr, error = 0, intr0, intr1;

	if (!(info->sched.flags & RIVATV_SCHED_DMA))
		return;

	if (info->chip.arch == NV_ARCH_03) {
		VID_WR32 (info->chip.PGRAPH, 0x6A4, 0);                   /* disable FIFOs */
		while (VID_RD32 (info->chip.PGRAPH, 0x6B0) != 0);         /* status busy ? */
		intr0 = VID_RD32 (info->chip.PGRAPH, 0x100) & 0x11111011; /* get interrupt reg 0 */
		intr1 = VID_RD32 (info->chip.PGRAPH, 0x104);              /* get interrupt reg 1 */
		VID_WR32 (info->chip.PGRAPH, 0x104, intr1);               /* set interrupt reg 1 */
	} else {
		/* NV_PGRAPH_FIFO_ACCESS_DISABLED */
		VID_WR32 (info->chip.PGRAPH, 0x720, 0);
		/* NV_PGRAPH_INTR */
		intr0 = VID_RD32 (info->chip.PGRAPH, 0x100);
	}

	/* NV_PMC_INTR */
	intr = VID_RD32 (info->chip.PMC, 0x100);

	if (info->chip.arch == NV_ARCH_03) {
		if (intr0 & 0x10000000) {
			DPRINTK2 ("DMA: NOTIFY\n");
		}
		if (intr0 & 0x00010000) {
			DPRINTK2 ("DMA: EXCEPTION\n");
			VID_WR32 (info->chip.PGRAPH, 0x508, 0x33);
			error = 1;
		}
	} else {
		if (intr0 & 0x00000001) {
			DPRINTK2 ("DMA: NOTIFY\n");
		}
		if (intr0 & 0x00000010) {
			PRINTK_ERR ("DMA: MISSING_HW\n");
		}
		if (intr0 & 0x00010000) {
			DPRINTK2 ("DMA: BUFFER_NOTIFY\n");
		}
		if (intr0 & 0x00100000) {
			PRINTK_ERR ("DMA: ERROR\n");
			error = 1;
		}
	}

	if (intr0) {
		NvNotification *ntfy = (NvNotification *) info->dma.notifiers;

		if (info->chip.arch != NV_ARCH_03) {
			/* NV_PGRAPH_NSTATUS */
			reg = VID_RD32 (info->chip.PGRAPH, 0x104);
			DPRINTK2 ("DMA: NSTATUS 0x%08X\n", reg);
			/* NV_PGRAPH_NSOURCE */
			reg = VID_RD32 (info->chip.PGRAPH, 0x108);
			DPRINTK2 ("DMA: NSOURCE 0x%08X\n", reg);
		} else {
			reg = (intr0 & 0x10000000) ? 1 : 0;
		}

		switch (info->chip.arch) {
		case NV_ARCH_03:
			instance = VID_RD32 (info->chip.PGRAPH, 0x68C);
			notify = VID_RD32 (info->chip.PGRAPH, 0x684) >> 16;
			break;
		case NV_ARCH_04:
			instance = VID_RD32 (info->chip.PGRAPH, 0x16C);
			notify = VID_RD32 (info->chip.PGRAPH, 0x164) >> 16;
			break;
		case NV_ARCH_10:
		case NV_ARCH_20:
		case NV_ARCH_30:
			instance = VID_RD32 (info->chip.PGRAPH, 0x158);
			notify = VID_RD32 (info->chip.PGRAPH, 0x150) >> 16;
			break;
		}

		DPRINTK2 ("DMA: INSTANCE 0x%08X\n", instance); 
		DPRINTK2 ("DMA: NOTIFIER 0x%08X\n", notify); 
		if (info->chip.arch == NV_ARCH_03) {
			DPRINTK2 ("DMA: BUSY 0x%08X\n", VID_RD32 (info->chip.PGRAPH, 0x6B0));
		} else {
			DPRINTK2 ("DMA: BUSY 0x%08X\n", VID_RD32 (info->chip.PGRAPH, 0x700));
		}

		if (error) {
			rivatv_printObject (info, instance, "DMA Error Object");
			rivatv_printObject (info, notify, "DMA Notifier Object");
		}

		/* NV_PGRAPH_INTR */
		VID_WR32 (info->chip.PGRAPH, 0x100, intr0);
		/* NV_PMC_INTR_PGRAPH_PENDING */
		VID_WR32 (info->chip.PMC, 0x100, 0x00001000);
	
		if ((reg & 0x01) && (instance == ADDR_OBJ_MEMFMT)) {

			info->sched.stats.dma_interrupts++;

			if (info->chip.arch != NV_ARCH_03) {
				/* NV_PGRAPH_STATUS_XXX_BUSY */
				while (VID_RD32 (info->chip.PGRAPH, 0x700) & 0x00020000);
			}

			if (info->sched.flags & RIVATV_SCHED_DMA_ZC)
				info->sched.dma_zc_count++;
			if (!(info->sched.flags & RIVATV_SCHED_DMA_ZC) || 
			    info->sched.dma_zc_count == info->sched.dma_zc_planes) {
				ntfy->status = 0x00;
				rivatv_schedule_next (info);
				tasklet_hi_schedule (&info->dma.task);
			}
		}
	}

	if (info->chip.arch == NV_ARCH_03) {
		VID_WR32 (info->chip.PGRAPH, 0x6A4, 1);
	} else {
		/* NV_PGRAPH_FIFO_ACCESS_ENABLED */
		VID_WR32 (info->chip.PGRAPH, 0x720, 1);	
	}
}

struct rivatv_direct_DMA rivatv_convert_DMA[17] = {
	{ /* 0			      */ 0, 0, { } },
	{ /* 1	VIDEO_PALETTE_GREY    */ 1, 1,
	  { { 1,  0, 2, 2, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 },
	    {  },
	    {  } } },
	{ /* 2	VIDEO_PALETTE_HI240   */ 0, 0, { } },
	{ /* 3	VIDEO_PALETTE_RGB565  */ 0, 0, { } },
	{ /* 4	VIDEO_PALETTE_RGB24   */ 0, 0, { } },
	{ /* 5	VIDEO_PALETTE_RGB32   */ 0, 0, { } },
	{ /* 6	VIDEO_PALETTE_RGB555  */ 0, 0, { } },
	{ /* 7	VIDEO_PALETTE_YUV422  */ 1, 2,
	  { { 1,  0, 2, 4, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_2, 0 }, 
	    { 0,  1, 2, 4, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_2, 0 }, 
	    { } } },
	{ /* 8	VIDEO_PALETTE_YUYV    */ 1, 2,
	  { { 1,  0, 2, 4, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_2, 0 }, 
	    { 0,  1, 2, 4, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_2, 0 }, 
	    { } } },
	{ /* 9	VIDEO_PALETTE_UYVY    */ 1, 1,
	  { { 0,  0, 2, 4, 1, 4, NV0039_SET_FORMAT_INPUT_INC_1, NV0039_SET_FORMAT_OUTPUT_INC_1, 0 }, 
	    { }, 
	    { } } },
	{ /* 10 VIDEO_PALETTE_YUV420  */ 0, 0, { } },
	{ /* 11 VIDEO_PALETTE_YUV411  */ 0, 0, { } },
	{ /* 12 VIDEO_PALETTE_RAW     */ 0, 0, { } },
	{ /* 13 VIDEO_PALETTE_YUV422P */ 1, 3,
	  { { 1,  0, 2, 2, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 }, 
	    { 0, 16, 2, 1, 4, 4, NV0039_SET_FORMAT_INPUT_INC_4, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 }, 
	    { 2, 24, 2, 1, 4, 4, NV0039_SET_FORMAT_INPUT_INC_4, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 } } },
	{ /* 14 VIDEO_PALETTE_YUV411P */ 0, 0, { } },
	{ /* 15 VIDEO_PALETTE_YUV420P */ 1, 3,
	  { { 1,  0, 2, 2, 2, 4, NV0039_SET_FORMAT_INPUT_INC_2, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 }, 
	    { 0, 16, 4, 1, 4, 2, NV0039_SET_FORMAT_INPUT_INC_4, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 }, 
	    { 2, 20, 4, 1, 4, 2, NV0039_SET_FORMAT_INPUT_INC_4, NV0039_SET_FORMAT_OUTPUT_INC_1, 1 } } },
	{ /* 16 VIDEO_PALETTE_YUV410P */ 0, 0, { } },
};

/* Checks whether the nVidia card and the chipset (a AGP bridge) is AGP capable. */
void rivatv_checkAGP (struct rivatv_info *info)
{
	int cap;
	u32 status, command;

#if (defined(CONFIG_AGP) || defined(CONFIG_AGP_MODULE)) && !defined(RIVATV_DISABLE_AGP)
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 0)
	agp_kern_info agpinfo;
#else
	struct agp_kern_info agpinfo;
#endif
	agp_copy_info (&agpinfo);
	if (agpinfo.chipset == NOT_SUPPORTED) {
		PRINTK_INFO ("AGPGART: chipset not supported\n");
	} else {
		PRINTK_INFO ("AGPGART: version %d.%d\n", agpinfo.version.major, agpinfo.version.minor);
		PRINTK_INFO ("AGPGART: aperture is %dMB @ 0x%08lX, AGP%s%s%s supported\n", agpinfo.aper_size,
			     agpinfo.aper_base,
			     (agpinfo.mode & 0x01) ? " 1x" : "",
			     (agpinfo.mode & 0x02) ? " 2x" : "",
			     (agpinfo.mode & 0x04) ? " 4x" : "");
	}
#else
	PRINTK_INFO ("AGPGART: not available\n");
#endif
	if (!agp) {
		PRINTK_INFO ("AGP: disabled\n");
		return;
	}

	if ((cap = pci_find_capability (info->pci, PCI_CAP_ID_AGP)) != 0) {
		pci_read_config_dword (info->pci, cap + PCI_AGP_STATUS, &status);
		pci_read_config_dword (info->pci, cap + PCI_AGP_COMMAND, &command);
		PRINTK_INFO ("AGP:%s%s%s supported\n",
			     (status & PCI_AGP_STATUS_RATE1) ? " 1x" : "",
			     (status & PCI_AGP_STATUS_RATE2) ? " 2x" : "",
			     (status & PCI_AGP_STATUS_RATE4) ? " 4x" : "");
	} else {
		PRINTK_INFO ("AGP: not supported by this device, PCI only\n");
	}
}
