/*
 * VIC driver for Tegra X1
 *
 * Copyright (c) 2018-2024 CTCaer
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

#include <string.h>

#include "vic.h"
#include <mem/heap.h>
#include <soc/bpmp.h>
#include <soc/clock.h>
#include <soc/timer.h>
#include <soc/t210.h>
#include <utils/types.h>

/* VIC Private registers */
#define PVIC_FALCON_PA_OFFSET 0x1000
#define PVIC_FALCON_ADDR      0x10AC
#define PVIC_FALCON_IDLESTATE 0x104C

/* VIC Control and Status registers. */
/* Fetch Control registers. */
#define VIC_FC_COMPOSE                         0x10000
#define  COMPOSE_START                          BIT(0)

#define VIC_FC_CFG_STRUCT_SLOT_INDEX           0x10B00

#define VIC_FC_CFG_STRUCT_SLOT_CFG0            0x10B04
#define  SLOT_ENABLE                            BIT(0)
#define  FIELD_CURRENT_ENABLE                   BIT(8)

#define VIC_FC_CFG_STRUCT_SLOT_CFG2            0x10B0C
#define  CACHE_WIDTH(n) ((n) << 16)
#define  CACHE_WIDTH_16BX16                     0 // Block Linear.
#define  CACHE_WIDTH_32BX8                      1 // Block Linear. Recommended for Block Linear.
#define  CACHE_WIDTH_64BX4                      2 // Block Linear, Pitch. Recommended for Pitch.
#define  CACHE_WIDTH_128BX2                     3 // Block Linear, Pitch.
#define  OUTPUT_FLIP_X                          BIT(20)
#define  OUTPUT_FLIP_Y                          BIT(21)
#define  OUTPUT_TRANSPOSE                       BIT(22)

#define VIC_FC_CFG_STRUCT_SLOT_SFC_SIZE        0x10B10
#define VIC_FC_CFG_STRUCT_SLOT_LUMA_SIZE       0x10B14
#define VIC_FC_CFG_STRUCT_SLOT_CHROMA_SIZE     0x10B18
#define VIC_FC_CFG_STRUCT_SLOT_SRC_RECT_LR     0x10B1C
#define VIC_FC_CFG_STRUCT_SLOT_SRC_RECT_TB     0x10B20
#define VIC_FC_CFG_STRUCT_SLOT_DST_RECT_LR     0x10B30
#define VIC_FC_CFG_STRUCT_SLOT_DST_RECT_TB     0x10B34
#define VIC_FC_CFG_STRUCT_TGT_RECT_LR          0x10B38
#define VIC_FC_CFG_STRUCT_TGT_RECT_TB          0x10B3C
#define VIC_FC_SLOT_MAP                        0x10C00

#define VIC_FC_FCE_CTRL                        0x11000
#define  START_TRIGGER                          BIT(0)
#define  HALT_TRIGGER                           BIT(1)
#define  CLEAR_ERROR                            BIT(8)

#define VIC_FC_FCE_UCODE_ADDR                  0x11200
#define VIC_FC_FCE_UCODE_INST                  0x11300

/* Surface List registers. */
#define VIC_SL_CFG_STRUCT_SLOT_INDEX           0x12100
#define VIC_SL_CFG_STRUCT_SLOT_DST_RECT_LR     0x12200
#define VIC_SL_CFG_STRUCT_SLOT_DST_RECT_TB     0x12300
#define VIC_SL_CFG_STRUCT_TGT_RECT_LR          0x12400
#define VIC_SL_CFG_STRUCT_TGT_RECT_TB          0x12500
#define VIC_SL_CFG_STRUCT_SLOT_CFG0            0x12600

/* Surface Cache registers. */
#define VIC_SC_PRAMBASE                        0x14000
#define VIC_SC_PRAMSIZE                        0x14100
#define VIC_SC_SFC0_BASE_LUMA(n)               (0x14300 + (n) * 0x100)

/* Blending Output registers. */
#define VIC_BL_TARGET_BASADR                   0x22000
#define VIC_BL_CONFIG                          0x22800
#define  SUBPARTITION_MODE                      BIT(0)
#define  PROCESS_CFG_STRUCT_TRIGGER             BIT(2)
#define  SLOTMASK(n) ((n) << 8)

#define VIC_BL_CFG_STRUCT_CFG0                 0x22C00
#define VIC_BL_CFG_STRUCT_SFC_SIZE             0x22C04
#define VIC_BL_CFG_STRUCT_LUMA_SIZE            0x22C08
#define VIC_BL_CFG_STRUCT_CHROMA_SIZE          0x22C0C
#define VIC_BL_CFG_STRUCT_TGT_RECT_LR          0x22C10
#define VIC_BL_CFG_STRUCT_TGT_RECT_TB          0x22C14

// VIC_FC_CFG_STRUCT_SLOT_CFG2 & VIC_BL_CFG_STRUCT_CFG0.
#define BLK_KIND(n)    ((n) << 8)
#define  BLK_KIND_PITCH          0
#define  BLK_KIND_GENERIC_16BX2  1
#define BLK_HEIGHT(n)  ((n) << 12)
#define  BLK_HEIGHT_ONE_GOB      0
#define  BLK_HEIGHT_SIXTEEN_GOBS 4

// Generic size macros.
#define SIZE_WIDTH(n)  (((n) - 1) << 0)
#define SIZE_HEIGHT(n) (((n) - 1) << 16)
#define RECT_LEFT(n)   ((n) << 0)
#define RECT_RIGHT(n)  (((n) - 1) << 16)
#define RECT_TOP(n)    ((n) << 0)
#define RECT_BOTTOM(n) (((n) - 1) << 16)

#define FORMAT_PROGRESSIVE 0
#define SOFT_CLAMP_MIN     0
#define SOFT_CLAMP_MAX     0x3FFu
#define ALPHA_1_0          0x3FFu

typedef struct _OutputConfig {
	u64 AlphaFillMode:3;
	u64 AlphaFillSlot:3;
	u64 BackgroundAlpha:10;
	u64 BackgroundR:10;
	u64 BackgroundG:10;
	u64 BackgroundB:10;
	u64 RegammaMode:2;
	u64 OutputFlipX:1;
	u64 OutputFlipY:1;
	u64 OutputTranspose:1;
	u64 rsvd1:1;
	u64 rsvd2:12;
	u64 TargetRectLeft:14;
	u64 rsvd3:2;
	u64 TargetRectRight:14;
	u64 rsvd4:2;
	u64 TargetRectTop:14;
	u64 rsvd5:2;
	u64 TargetRectBottom:14;
	u64 rsvd6:2;
} OutputConfig;

typedef struct _OutputSurfaceConfig {
	u64 OutPixelFormat:7;
	u64 OutChromaLocHoriz:2;
	u64 OutChromaLocVert:2;
	u64 OutBlkKind:4;
	u64 OutBlkHeight:4;
	u64 rsvd0:3;
	u64 rsvd1:10;
	u64 OutSurfaceWidth:14;
	u64 OutSurfaceHeight:14;
	u64 rsvd2:4;
	u64 OutLumaWidth:14;
	u64 OutLumaHeight:14;
	u64 rsvd3:4;
	u64 OutChromaWidth:14;
	u64 OutChromaHeight:14;
	u64 rsvd4:4;
} OutputSurfaceConfig;

typedef struct _SlotConfig {
	u64 SlotEnable:1;
	u64 DeNoise:1;
	u64 AdvancedDenoise:1;
	u64 CadenceDetect:1;
	u64 MotionMap:1;
	u64 MMapCombine:1;
	u64 IsEven:1;
	u64 ChromaEven:1;
	u64 CurrentFieldEnable:1;
	u64 PrevFieldEnable:1;
	u64 NextFieldEnable:1;
	u64 NextNrFieldEnable:1;
	u64 CurMotionFieldEnable:1;
	u64 PrevMotionFieldEnable:1;
	u64 PpMotionFieldEnable:1;
	u64 CombMotionFieldEnable:1;
	u64 FrameFormat:4;
	u64 FilterLengthY:2;
	u64 FilterLengthX:2;
	u64 Panoramic:12;
	u64 rsvd1:22;
	u64 DetailFltClamp:6;
	u64 FilterNoise:10;
	u64 FilterDetail:10;
	u64 ChromaNoise:10;
	u64 ChromaDetail:10;
	u64 DeinterlaceMode:4;
	u64 MotionAccumWeight:3;
	u64 NoiseIir:11;
	u64 LightLevel:4;
	u64 rsvd4:2;
	u64 SoftClampLow:10;
	u64 SoftClampHigh:10;
	u64 rsvd5:3;
	u64 rsvd6:9;
	u64 PlanarAlpha:10;
	u64 ConstantAlpha:1;
	u64 StereoInterleave:3;
	u64 ClipEnabled:1;
	u64 ClearRectMask:8;
	u64 DegammaMode:2;
	u64 rsvd7:1;
	u64 DecompressEnable:1;
	u64 rsvd9:5;
	u64 DecompressCtbCount:8;
	u64 DecompressZbcColor:32;
	u64 rsvd12:24;
	u64 SourceRectLeft:30;
	u64 rsvd14:2;
	u64 SourceRectRight:30;
	u64 rsvd15:2;
	u64 SourceRectTop:30;
	u64 rsvd16:2;
	u64 SourceRectBottom:30;
	u64 rsvd17:2;
	u64 DestRectLeft:14;
	u64 rsvd18:2;
	u64 DestRectRight:14;
	u64 rsvd19:2;
	u64 DestRectTop:14;
	u64 rsvd20:2;
	u64 DestRectBottom:14;
	u64 rsvd21:2;
	u64 rsvd22:32;
	u64 rsvd23:32;
} SlotConfig;

typedef struct _SlotSurfaceConfig {
	u64 SlotPixelFormat:7;
	u64 SlotChromaLocHoriz:2;
	u64 SlotChromaLocVert:2;
	u64 SlotBlkKind:4;
	u64 SlotBlkHeight:4;
	u64 SlotCacheWidth:3;
	u64 rsvd0:10;
	u64 SlotSurfaceWidth:14;
	u64 SlotSurfaceHeight:14;
	u64 rsvd1:4;
	u64 SlotLumaWidth:14;
	u64 SlotLumaHeight:14;
	u64 rsvd2:4;
	u64 SlotChromaWidth:14;
	u64 SlotChromaHeight:14;
	u64 rsvd3:4;
} SlotSurfaceConfig;

typedef struct _SlotStruct {
	SlotConfig slot_cfg;
	SlotSurfaceConfig slot_sfc_cfg;

	// No need to configure. Reset to zeros.
	u8 lumaKeyStruct[0x10];
	u8 colorMatrixStruct[0x20];
	u8 gamutMatrixStruct[0x20];
	u8 blendingSlotStruct[0x10];
} SlotStruct;

typedef struct _vic_config_t {
	// No need to configure. Reset to zeros.
	u8 pipeConfig[0x10];

	OutputConfig out_cfg;
	OutputSurfaceConfig out_sfc_cfg;

	// No need to configure. Reset to zeros.
	u8 out_color_matrix[0x20];
	u8 clear_rect[0x10 * 4];

	SlotStruct slots[8];
} vic_config_t;

// VIC Fetch Control Engine microcode. Dumped from L4T r33.
u8 vic_fce_ucode[] = {
	0x66, 0x00, 0x00, 0x00, 0x60, 0x07, 0x00, 0x00, 0x42, 0x40, 0x10, 0x00, 0x4E, 0x01, 0x40, 0x00,
	0x6A, 0x07, 0x00, 0x00, 0x6E, 0x23, 0x04, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x4E, 0x01, 0x04, 0x00,
	0x6A, 0x0B, 0x00, 0x00, 0x6E, 0x1F, 0x04, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x4E, 0x01, 0x10, 0x00,
	0x6A, 0x0F, 0x00, 0x00, 0x6E, 0x1F, 0x04, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x48, 0x80, 0x02, 0x00,
	0x0E, 0x11, 0x00, 0x00, 0x6A, 0x14, 0x00, 0x00, 0x6E, 0x08, 0x06, 0x00, 0x6C, 0x00, 0x00, 0x00,
	0x4E, 0x01, 0x08, 0x00, 0x6A, 0x18, 0x00, 0x00, 0x6E, 0x26, 0x04, 0x00, 0x6C, 0x00, 0x00, 0x00,
	0x4E, 0x01, 0x20, 0x00, 0x6A, 0x1C, 0x00, 0x00, 0x6E, 0x26, 0x04, 0x00, 0x6C, 0x00, 0x00, 0x00,
	0x4E, 0x01, 0x02, 0x00, 0x6A, 0x20, 0x00, 0x00, 0x6E, 0x24, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00,
	0x56, 0x00, 0x10, 0x00, 0x56, 0x40, 0x10, 0x00, 0x22, 0x41, 0x01, 0x00, 0x6C, 0x00, 0x00, 0x00,
	0x62, 0x80, 0x01, 0x00, 0x60, 0x47, 0x00, 0x00, 0x60, 0x87, 0x00, 0x00, 0x01, 0x4A, 0x00, 0x00,
	0x55, 0xC0, 0x20, 0x00, 0x00, 0x59, 0x00, 0x00, 0x60, 0x87, 0x00, 0x00, 0x60, 0xC7, 0x00, 0x00,
	0x01, 0x93, 0x00, 0x00, 0x40, 0x82, 0x02, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x6B, 0x34, 0x00, 0x00,
	0x43, 0xC1, 0x10, 0x00, 0x42, 0x02, 0x03, 0x00, 0x00, 0x23, 0x01, 0x00, 0x24, 0xD4, 0x00, 0x00,
	0x56, 0x40, 0x3D, 0x00, 0x04, 0xEB, 0x00, 0x00, 0x60, 0x07, 0x01, 0x00, 0x60, 0x47, 0x00, 0x00,
	0x6A, 0x3E, 0x00, 0x00, 0x55, 0xC0, 0x30, 0x00, 0x48, 0x00, 0x01, 0x00, 0x48, 0x40, 0x01, 0x00,
	0x48, 0x80, 0x01, 0x00, 0x6B, 0x28, 0x02, 0x00, 0x56, 0x40, 0x09, 0x00, 0x04, 0x4D, 0x01, 0x00,
	0x06, 0x4D, 0x00, 0x00, 0x42, 0xC0, 0x03, 0x00, 0x56, 0x80, 0x09, 0x00, 0x04, 0xFE, 0x01, 0x00,
	0x00, 0xF9, 0x01, 0x00, 0x4E, 0x02, 0x00, 0x00, 0x6B, 0x32, 0x02, 0x00, 0x55, 0x40, 0x2F, 0x00,
	0x56, 0x80, 0x0D, 0x00, 0x4F, 0x00, 0x00, 0x00, 0x6A, 0x0D, 0x02, 0x00, 0x55, 0x40, 0x31, 0x00,
	0x56, 0x80, 0x0B, 0x00, 0x0C, 0x2B, 0x00, 0x00, 0x6A, 0x13, 0x02, 0x00, 0x43, 0x45, 0x03, 0x00,
	0x42, 0x86, 0x03, 0x00, 0x4D, 0x06, 0x02, 0x00, 0x6A, 0x0D, 0x02, 0x00, 0x42, 0x86, 0x03, 0x00,
	0x22, 0x7E, 0x01, 0x00, 0x4E, 0x04, 0x00, 0x00, 0x6B, 0x32, 0x02, 0x00, 0x55, 0x40, 0x17, 0x00,
	0x0D, 0x2C, 0x00, 0x00, 0x56, 0xC0, 0x09, 0x00, 0x6A, 0x1E, 0x02, 0x00, 0x48, 0xC0, 0x01, 0x00,
	0x43, 0x04, 0x03, 0x00, 0x6C, 0x20, 0x02, 0x00, 0x55, 0x40, 0x19, 0x00, 0x01, 0x2C, 0x01, 0x00,
	0x65, 0x23, 0x01, 0x00, 0x42, 0x42, 0x03, 0x00, 0x00, 0x2C, 0x01, 0x00, 0x24, 0x14, 0x01, 0x00,
	0x00, 0x2C, 0x01, 0x00, 0x24, 0x14, 0x01, 0x00, 0x00, 0x3C, 0x01, 0x00, 0x42, 0x04, 0x09, 0x00,
	0x42, 0xC3, 0x02, 0x00, 0x65, 0x54, 0x01, 0x00, 0x65, 0x55, 0x01, 0x00, 0x42, 0x45, 0x0D, 0x00,
	0x62, 0x03, 0x00, 0x00, 0x62, 0x44, 0x00, 0x00, 0x62, 0x85, 0x00, 0x00, 0x62, 0xC2, 0x00, 0x00,
	0x22, 0x48, 0x1F, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x48, 0x00, 0x01, 0x00, 0x6C, 0x28, 0x02, 0x00,
	0x62, 0x80, 0x01, 0x00, 0x60, 0x07, 0x00, 0x00, 0x60, 0x47, 0x00, 0x00, 0x60, 0x87, 0x00, 0x00,
	0x01, 0x01, 0x00, 0x00, 0x43, 0x00, 0x02, 0x00, 0x40, 0x00, 0x02, 0x00, 0x01, 0xCA, 0x01, 0x00,
	0x60, 0x03, 0x01, 0x00, 0x01, 0xA0, 0x01, 0x00, 0x60, 0x40, 0x00, 0x00, 0x65, 0x01, 0x00, 0x00,
	0x55, 0xC0, 0x2E, 0x00, 0x01, 0x18, 0x00, 0x00, 0x43, 0x00, 0x04, 0x00, 0x43, 0x41, 0x06, 0x00,
	0x6F, 0x00, 0x00, 0x00, 0x61, 0xC1, 0x00, 0x00, 0x61, 0x42, 0x01, 0x00, 0x65, 0xB5, 0x00, 0x00,
	0x65, 0x73, 0x01, 0x00, 0x65, 0x35, 0x01, 0x00, 0x65, 0x34, 0x01, 0x00, 0x42, 0x04, 0x0D, 0x00,
	0x01, 0x14, 0x01, 0x00, 0x42, 0x04, 0x03, 0x00, 0x00, 0x20, 0x00, 0x00, 0x43, 0x03, 0x05, 0x00,
	0x43, 0x85, 0x02, 0x00, 0x00, 0xAA, 0x00, 0x00, 0x48, 0x46, 0x01, 0x00, 0x65, 0xEB, 0x00, 0x00,
	0x00, 0x9A, 0x00, 0x00, 0x65, 0xB2, 0x01, 0x00, 0x00, 0xA6, 0x01, 0x00, 0x42, 0x86, 0x0D, 0x00,
	0x61, 0x42, 0x01, 0x00, 0x01, 0xAE, 0x01, 0x00, 0x00, 0x71, 0x00, 0x00, 0x42, 0x82, 0x08, 0x00,
	0x42, 0xC3, 0x08, 0x00, 0x48, 0x40, 0x01, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x6E, 0x34, 0x02, 0x00,
	0x65, 0x79, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x6C, 0x36, 0x04, 0x00, 0x6E, 0x34, 0x02, 0x00,
	0x48, 0x7F, 0x01, 0x00, 0x6C, 0x0A, 0x06, 0x00, 0x6E, 0x34, 0x02, 0x00, 0x6E, 0x05, 0x04, 0x00,
	0x65, 0x79, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x41, 0x87, 0x03, 0x00, 0x65, 0xBA, 0x00, 0x00,
	0x65, 0xB2, 0x00, 0x00, 0x42, 0x82, 0x02, 0x00, 0x00, 0x51, 0x00, 0x00, 0x61, 0xC1, 0x00, 0x00,
	0x65, 0xFB, 0x00, 0x00, 0x65, 0xF3, 0x00, 0x00, 0x41, 0x87, 0x05, 0x00, 0x65, 0xF3, 0x00, 0x00,
	0x42, 0xC3, 0x08, 0x00, 0x00, 0x59, 0x00, 0x00, 0x60, 0xC7, 0x00, 0x00, 0x60, 0xC7, 0x00, 0x00,
	0x56, 0xC0, 0x21, 0x00, 0x04, 0xDF, 0x01, 0x00, 0x43, 0xC7, 0x15, 0x00, 0x00, 0x38, 0x00, 0x00,
	0x00, 0x79, 0x00, 0x00, 0x42, 0xC3, 0x20, 0x00, 0x43, 0xC3, 0x04, 0x00, 0x42, 0x00, 0x30, 0x00,
	0x42, 0x41, 0x30, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x4B, 0x00, 0x00, 0x60, 0xC7, 0x01, 0x00,
	0x22, 0x78, 0x01, 0x00, 0x22, 0x79, 0x03, 0x00, 0x22, 0x7F, 0x1F, 0x00, 0x6F, 0x00, 0x00, 0x00,
	0x6E, 0x34, 0x02, 0x00, 0x6E, 0x05, 0x04, 0x00, 0x4B, 0x41, 0x00, 0x00, 0x60, 0xC7, 0x01, 0x00,
	0x60, 0x87, 0x01, 0x00, 0x43, 0x86, 0x15, 0x00, 0x00, 0x30, 0x00, 0x00, 0x65, 0x39, 0x01, 0x00,
	0x42, 0x04, 0x05, 0x00, 0x4E, 0x05, 0x7E, 0x00, 0x6A, 0x1B, 0x06, 0x00, 0x55, 0xC0, 0x3D, 0x00,
	0x0A, 0x3C, 0x01, 0x00, 0x60, 0xC7, 0x01, 0x00, 0x22, 0x78, 0x01, 0x00, 0x22, 0x79, 0x03, 0x00,
	0x22, 0x7C, 0x09, 0x00, 0x22, 0x7F, 0x1F, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x65, 0x7A, 0x01, 0x00,
	0x42, 0x45, 0x05, 0x00, 0x65, 0xBB, 0x01, 0x00, 0x42, 0x86, 0x05, 0x00, 0x55, 0xC0, 0x3D, 0x00,
	0x0A, 0x7D, 0x01, 0x00, 0x0A, 0xBE, 0x01, 0x00, 0x07, 0xC7, 0x01, 0x00, 0x0B, 0x7D, 0x01, 0x00,
	0x0B, 0xBE, 0x01, 0x00, 0x55, 0xC0, 0x3D, 0x00, 0x0A, 0x3C, 0x01, 0x00, 0x60, 0xC7, 0x01, 0x00,
	0x22, 0x78, 0x01, 0x00, 0x22, 0x79, 0x03, 0x00, 0x22, 0x7A, 0x05, 0x00, 0x22, 0x7B, 0x07, 0x00,
	0x22, 0x7C, 0x09, 0x00, 0x22, 0x7D, 0x0B, 0x00, 0x22, 0x7E, 0x0D, 0x00, 0x22, 0x7F, 0x1F, 0x00,
	0x6F, 0x00, 0x00, 0x00
};

vic_config_t __attribute__((aligned (0x100))) vic_cfg = {0};

u32 _vic_read_priv(u32 addr)
{
	u32 addr_lsb = addr & 0xFF;

	// Set address LSB.
	if (addr_lsb)
		VIC(PVIC_FALCON_ADDR) = addr_lsb >> 2;

	// Set address.
	u32 val = VIC(PVIC_FALCON_PA_OFFSET + (addr >> 6));

	// Unset address LSB.
	if (addr_lsb)
		VIC(PVIC_FALCON_ADDR) = 0;

	return val;
}

static void _vic_write_priv(u32 addr, u32 data)
{
	u32 addr_lsb = addr & 0xFF;

	// Set address LSB.
	if (addr_lsb)
		VIC(PVIC_FALCON_ADDR) = addr_lsb >> 2;

	// Set address.
	VIC(PVIC_FALCON_PA_OFFSET + (addr >> 6)) = data;

	// Unset address LSB.
	if (addr_lsb)
		VIC(PVIC_FALCON_ADDR) = 0;
}

static int _vic_wait_idle()
{
	u32 timeout_count = 15000; // 150ms.

	while (VIC(PVIC_FALCON_IDLESTATE))
	{
		usleep(10);

		timeout_count--;
		if (!timeout_count)
			return -1;
	};

	return 0;
}

void vic_set_surface(vic_surface_t *sfc)
{
	u32 flip_x  = 0;
	u32 flip_y  = 0;
	u32 swap_xy = 0;
	u32 const_alpha = 0;

	u32 width   = sfc->width;
	u32 height  = sfc->height;
	u32 pix_fmt = sfc->pix_fmt;
	u32 src_buf = sfc->src_buf;
	u32 dst_buf = sfc->dst_buf;

	// Get format alpha type.
	switch (sfc->pix_fmt)
	{
	case VIC_PIX_FORMAT_L8:
	case VIC_PIX_FORMAT_X1B5G5R5:
	case VIC_PIX_FORMAT_B5G5R5X1:
	case VIC_PIX_FORMAT_X8B8G8R8:
	case VIC_PIX_FORMAT_X8R8G8B8:
	case VIC_PIX_FORMAT_B8G8R8X8:
	case VIC_PIX_FORMAT_R8G8B8X8:
		const_alpha = 1;
		break;

	case VIC_PIX_FORMAT_A8B8G8R8:
	case VIC_PIX_FORMAT_A8R8G8B8:
	case VIC_PIX_FORMAT_B8G8R8A8:
	case VIC_PIX_FORMAT_R8G8B8A8:
	default:
		break;
	}

	// Get rotation parameters.
	switch (sfc->rotation)
	{
	case VIC_ROTATION_90:
		swap_xy = 1;
		break;

	case VIC_ROTATION_180:
		flip_x  = 1;
		flip_y  = 1;
		break;

	case VIC_ROTATION_270:
		flip_x  = 1;
		swap_xy = 1;
		break;

	case VIC_ROTATION_0:
	default:
		break;
	}

	// Set output surface format.
	vic_cfg.out_sfc_cfg.OutPixelFormat = pix_fmt;
	vic_cfg.out_sfc_cfg.OutBlkKind     = BLK_KIND_PITCH;
	vic_cfg.out_sfc_cfg.OutBlkHeight   = 0;

	// Set output rotation/flip.
	vic_cfg.out_cfg.OutputFlipX     = flip_x;
	vic_cfg.out_cfg.OutputFlipY     = flip_y;
	vic_cfg.out_cfg.OutputTranspose = swap_xy;

	// Set output surface resolution.
	vic_cfg.out_sfc_cfg.OutSurfaceWidth  = width  - 1;
	vic_cfg.out_sfc_cfg.OutSurfaceHeight = height - 1;
	vic_cfg.out_sfc_cfg.OutLumaWidth     = width  - 1;
	vic_cfg.out_sfc_cfg.OutLumaHeight    = height - 1;

	// Set output destination rectangle. Anything outside will not be touched at output buffer.
	vic_cfg.out_cfg.TargetRectLeft   = 0;
	vic_cfg.out_cfg.TargetRectRight  = width - 1;
	vic_cfg.out_cfg.TargetRectTop    = 0;
	vic_cfg.out_cfg.TargetRectBottom = height - 1;

	// Initialize slot parameters.
	vic_cfg.slots[0].slot_cfg.SlotEnable    = 1;
	vic_cfg.slots[0].slot_cfg.SoftClampLow  = SOFT_CLAMP_MIN;
	vic_cfg.slots[0].slot_cfg.SoftClampHigh = SOFT_CLAMP_MAX;
	vic_cfg.slots[0].slot_cfg.PlanarAlpha   = ALPHA_1_0;
	vic_cfg.slots[0].slot_cfg.ConstantAlpha = const_alpha;
	vic_cfg.slots[0].slot_cfg.FrameFormat   = FORMAT_PROGRESSIVE;

	// Set input source rectangle.
	vic_cfg.slots[0].slot_cfg.SourceRectLeft   = 0;
	vic_cfg.slots[0].slot_cfg.SourceRectRight  = (width  - 1) << 16;
	vic_cfg.slots[0].slot_cfg.SourceRectTop    = 0;
	vic_cfg.slots[0].slot_cfg.SourceRectBottom = (height - 1) << 16;

	// Set input destination rectangle.
	vic_cfg.slots[0].slot_cfg.DestRectLeft   = 0;
	vic_cfg.slots[0].slot_cfg.DestRectRight  = (width  - 1);
	vic_cfg.slots[0].slot_cfg.DestRectTop    = 0;
	vic_cfg.slots[0].slot_cfg.DestRectBottom = (height - 1);

	// Set input surface format.
	vic_cfg.slots[0].slot_sfc_cfg.SlotPixelFormat = pix_fmt;
	vic_cfg.slots[0].slot_sfc_cfg.SlotBlkKind     = BLK_KIND_PITCH;
	vic_cfg.slots[0].slot_sfc_cfg.SlotBlkHeight   = 0;
	vic_cfg.slots[0].slot_sfc_cfg.SlotCacheWidth  = CACHE_WIDTH_64BX4;

	// Set input surface resolution.
	vic_cfg.slots[0].slot_sfc_cfg.SlotSurfaceWidth  = width  - 1;
	vic_cfg.slots[0].slot_sfc_cfg.SlotSurfaceHeight = height - 1;
	vic_cfg.slots[0].slot_sfc_cfg.SlotLumaWidth     = width  - 1;
	vic_cfg.slots[0].slot_sfc_cfg.SlotLumaHeight    = height - 1;

	// Flush data.
	bpmp_mmu_maintenance(BPMP_MMU_MAINT_CLEAN_WAY, false);

	// Set parameters base and size. Causes a parse by surface cache.
	_vic_write_priv(VIC_SC_PRAMBASE, (u32)&vic_cfg >> 8);
	_vic_write_priv(VIC_SC_PRAMSIZE, sizeof(vic_config_t) >> 6);

	// Wait for surface cache to get ready.
	_vic_wait_idle();

	// Set slot mapping.
	_vic_write_priv(VIC_FC_SLOT_MAP, 0xFFFFFFF0);

	// Set input surface buffer.
	_vic_write_priv(VIC_SC_SFC0_BASE_LUMA(0), src_buf >> 8);

	// Set output surface buffer.
	_vic_write_priv(VIC_BL_TARGET_BASADR, dst_buf >> 8);

	// Set blending config and push changes to surface cache.
	_vic_write_priv(VIC_BL_CONFIG, SLOTMASK(0x1F) | PROCESS_CFG_STRUCT_TRIGGER | SUBPARTITION_MODE);

	// Wait for surface cache to get ready.
	_vic_wait_idle();
}

int vic_compose()
{
	// Wait for surface cache to get ready. Otherwise VIC will hang.
	int res = _vic_wait_idle();

	// Start composition of a single frame.
	_vic_write_priv(VIC_FC_COMPOSE, COMPOSE_START);

	return res;
}

int vic_init()
{
	clock_enable_vic();

	// Load Fetch Control Engine microcode.
	for (u32 i = 0; i < sizeof(vic_fce_ucode) / sizeof(u32); i++)
	{
		_vic_write_priv(VIC_FC_FCE_UCODE_ADDR, (i * sizeof(u32)));
		_vic_write_priv(VIC_FC_FCE_UCODE_INST, *(u32 *)&vic_fce_ucode[i * sizeof(u32)]);
	}

	// Start Fetch Control Engine.
	_vic_write_priv(VIC_FC_FCE_CTRL, START_TRIGGER);

	return _vic_wait_idle();
}

void vic_end()
{
	clock_disable_vic();
}