aboutsummaryrefslogtreecommitdiff
path: root/board/rda/common/vpu_test_coda960.c
blob: 82b0b1190f33dcf71fc7be91de288db8c869dc06 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
/**********************************
vpu test.
modify ver 1.0	: for CODA960-8810
2014.12.29
modify ver 1.1
2015.01.07
sheen
***********************************/

#include "coda960_regs.h" //sync with cnm-coda-sw-pkg within android project.
//vpu firmware
#include "coda960_fw.h"

//test video stream.
#define FRAME_TEST_NUM	(14) //decode frmae num < bit_stream frame num.
const unsigned int bit_stream[] =
{
//don't rename bitstream!!
//#include "bs_h264_5f_720x480.txt"
#include "freh3_h264_99f_hp_cif.txt"
};
//16bytes each md5 digest.
const unsigned char yuv_md5_digest[]=
{
//bs_h264_5f_720x480.txt yuv md5,create by coda960 cmodel.
//#include "bs_h264_5f_720x480.md5"
#include "freh3_h264_30f_hp_cif_decOrder.md5"
};
//#include <u-boot/md5.h>
#include "rda_md5.h"
/*************************************************
	configs
**************************************************/
//0= little endian, 1= big endian.
#define VPU_FRAME_ENDIAN			0
#define VPU_STREAM_ENDIAN			0

#define VPU_ENABLE_BWB			0	//burst write back. wirtes output with 8 burst in linear map mode.
#define	CBCR_INTERLEAVE			0	//[default 1 for BW checking with CnMViedo Conformance] 0 (chroma separate mode), 1 (chroma interleave mode) // if the type of tiledmap uses the kind of MB_RASTER_MAP. must set to enable CBCR_INTERLEAVE
#define VPU_REPORT_USERDATA		0//if enabled, user data is writen to user data buffer
#define	USE_BIT_INTERNAL_BUF	1 //enable secondary AXI for prediction data of the bit-processor.
#define USE_IP_INTERNAL_BUF		1 //enable secondary AXI for row pixel data of IP/AC-DC.
#define	USE_DBKY_INTERNAL_BUF	1 //enable secondary AXI for temporal luminance data of the de-blocking filter.
#define	USE_DBKC_INTERNAL_BUF	1 //enable secondary AXI for temporal chrominance data of the de-blocking filter.
#define	USE_OVL_INTERNAL_BUF	0 //enable secondary AXI for temporal data of the overlap filter(VC1 only)
#define	USE_BTP_INTERNAL_BUF	0//enable secondary AXI for bit-plane data of the bit-processor(VC1 only).

typedef enum {
	INT_BIT_INIT = 0,
	INT_BIT_SEQ_INIT = 1,
	INT_BIT_SEQ_END = 2,
	INT_BIT_PIC_RUN = 3,
	INT_BIT_FRAMEBUF_SET = 4,
	INT_BIT_ENC_HEADER = 5,
	INT_BIT_DEC_PARA_SET = 7,
	INT_BIT_DEC_BUF_FLUSH = 8,
	INT_BIT_USERDATA = 9,
	INT_BIT_DEC_MB_ROWS = 13,
	INT_BIT_BIT_BUF_EMPTY = 14,
	INT_BIT_BIT_BUF_FULL = 15
}InterruptBit;

//ms
#define VPU_ENC_TIMEOUT				5000
#define VPU_DEC_TIMEOUT				5000


/*****************************************************
	register and memory base address.
******************************************************/
//vpu reg base addr. sheen
#define BIT_REG_BASE		0x20830000 //CODA960
#define BIT_REG_SIZE		0x4000

//second AXI base addr.
// SECOND AXI ACCCESS MEMORY ON CHIP
//#define	HD_SEC_AXI_BASE_ADDR	0x1C00000	//8810 CODA960
#define	HD_SEC_AXI_BASE_ADDR	0x100000	//8810 CODA960

//external sdram base addr. sheen
// FIRST AXI ACCCESS SDRAM
//#define	HD_BASE_ADDR					0x8db00000
#define	HD_BASE_ADDR					0x88000000  //minimum reserve 128M for 32 frames 1080P

// Base address of the bitstream buffer. for the whole bitstream.
#define	HD_ADDR_BIT_STREAM				(HD_BASE_ADDR + 0x000000)
// Size of the bitstream buffer in byte,(1M bytes)
#define	HD_STREAM_BUF_SIZE				0x100000
// Base address for the firmware image, (firmware size 126976x2)
#define	HD_ADDR_BIT_CODE				(HD_ADDR_BIT_STREAM + HD_STREAM_BUF_SIZE)
//bitcode buf size 260KB
#define CODE_BUF_SIZE	  (260*1024)//CODA960
// Base address for the firmware common parameters buffer.
//1.record decode frame buf YUV addr,384= 4*3*MAX32, 2.record MvColBuf.sheen
#define	HD_ADDR_BIT_PARA				(HD_ADDR_BIT_CODE + CODE_BUF_SIZE)
//common parameters buffer size 10KB
#define PARA_BUF_SIZE	 (10*1024) //CODA960
// Base address for the firmware common working buffer.
#define	HD_ADDR_BIT_WORK				(HD_ADDR_BIT_PARA + PARA_BUF_SIZE)
//common work buffer size, AVC MAX= (WORK_BUF_SIZE + PS_SAVE_SIZE)=(80KB+320KB)= 400KB
#define WORK_BUF_SIZE	  (400*1024) //CODA960
//base address of the firmware common temp buffer.
#define HD_ADDR_TEMP_BUF				(HD_ADDR_BIT_WORK + WORK_BUF_SIZE)
//common temp buffer size.
#define TEMP_BUF_SIZE	  (204*1024) //CODA960
// Slice Buffer of decoder. max as frame buffer.
#define HD_ADDR_SLICE_BUFFER			(HD_ADDR_TEMP_BUF + TEMP_BUF_SIZE)
// For VP8. reuse HD_ADDR_SLICE_BUFFER
#define HD_ADDR_VP8DEC_MB_BUF	  		HD_ADDR_SLICE_BUFFER
// For MP4. reuse HD_ADDR_SLICE_BUFFER
#define HD_ADDR_MP4ENC_DP_BUF	  		HD_ADDR_SLICE_BUFFER
//max slice save buffer size. max 1920*1088*1.5
#define SLICE_SAVE_SIZE				 (1920*1088*3/4)
//SPS/PPS save buffer. Parameter Set Buffer for H.264 decoder
#define HD_ADDR_PS_SAVE_BUFFER			(HD_ADDR_SLICE_BUFFER + SLICE_SAVE_SIZE)
//SPS/PPS save buffer size. CODA960 320KB, CODA7L 512KB.
#define HD_PS_SAVE_SIZE					0x100000
// Base address of the DPB.yuv frame buffer.
#define HD_ADDR_FRAME_BASE				(HD_ADDR_PS_SAVE_BUFFER	+ HD_PS_SAVE_SIZE)
//register yuv buf num for decode. MAX=32, sheen
#define HD_REG_FRAME_BUF_NUM   32
// End address of the DPB(128MB-21MB)
//resolution is limited to 1920x1088 (1920x1088x1.75x32=112 MB frame buffer size)
#define HD_MAX_FRAME_BASE				(HD_ADDR_FRAME_BASE + (1920*1088*7/4)*HD_REG_FRAME_BUF_NUM)


/********second AXI addr config***********/
//second AXI SRAM size 0x10000 (8810 CODA960)
//max size for 1920x1088(120x68MB) H.264 8810 CODA960
//Y Deblocking FIlter buffer, size 120*128= 0x3C00 bytes
#define HD_ADDR_SEC_AXI_DBKY  			(HD_SEC_AXI_BASE_ADDR)
//C Deblocking FIlter buffer, size 120*128= 0x3C00 bytes
#define HD_ADDR_SEC_AXI_DBKC			(HD_ADDR_SEC_AXI_DBKY + 0x3C00)
//MB Information, size 120*144= 0x4380 bytes
#define HD_ADDR_SEC_AXI_BIT 			(HD_ADDR_SEC_AXI_DBKC + 0x3C00)
//Intra Prediciton buffer,  size 120*64=0x1e00 bytes
#define HD_ADDR_SEC_AXI_IP			(HD_ADDR_SEC_AXI_BIT + 0x4380)
//VC1 only overlap Filter, size 0x2580 bytes
#define HD_ADDR_SEC_AXI_OVL  			(HD_ADDR_SEC_AXI_IP + 0x1E00)
//VC1 only BIT PLANE, size 0xF00  bytes
#define HD_ADDR_SEC_AXI_BTP  			(HD_ADDR_SEC_AXI_OVL + 0x2580)
#define HD_ADDR_SEC_AXI_BUF_END  		(HD_SEC_AXI_BASE_ADDR + 0x10000)

/************************************************************/

//vpu registers read/write
#define VpuWriteReg(ADDR, DATA)   *((volatile unsigned int *)(ADDR + (unsigned int)BIT_REG_BASE)) = DATA
#define VpuReadReg(ADDR)		*((volatile unsigned int *)(ADDR + (unsigned int)BIT_REG_BASE))

//directly addr read/write
#define MREAD_WORD(ADDR)	*((volatile int *)(ADDR))
#define MWRITE_WORD(ADDR,DATA)	*((volatile int *)(ADDR)) = DATA

static int _vpu_test(int md5_check)
{
	int  k;
	int  ret;
	int  sizeX;
	int  sizeY;
	int  FrameBufNum = 0;
	int  PicSize = 0;
	int  DispFrameIdx;
	int  DecDecFrameIdx;
	int  DecPicType;
	unsigned int data;
	int  dec_num=0;
	int  stride;
	unsigned int  DpbLum[HD_REG_FRAME_BUF_NUM];
	unsigned int  DpbCb[HD_REG_FRAME_BUF_NUM];
	unsigned int  DpbCr[HD_REG_FRAME_BUF_NUM];
	unsigned int  MvColBuf[HD_REG_FRAME_BUF_NUM];
	int dataSize;
	int bitStreamSize;
	unsigned char md5Data[16];
	// VPP_MEDIA_NODE p_node;

	printf("%s: start\n", __func__);

	//store firmware in sdram and then dma firmware to internal PMEM by vpu. sheen
	dataSize = sizeof(bit_code) / sizeof(bit_code[0]);//126976
	for (k = 0; k < dataSize; k += 4) {
		int dataH = 0;
		int dataL = 0;
		dataH = (bit_code[k+0] << 16) | bit_code[k+1];
		dataL = (bit_code[k+2] << 16) | bit_code[k+3];
		// 64 BIT BIG Endian
		MWRITE_WORD(HD_ADDR_BIT_CODE + k * 2, dataL);
		MWRITE_WORD(HD_ADDR_BIT_CODE+k * 2 + 4, dataH);
		//j=50;   // which is 338us low level
		//while(--j);
	}

	flush_dcache_range(HD_ADDR_BIT_CODE, HD_ADDR_BIT_CODE+CODE_BUF_SIZE);

	//init_vpu register
	for(k = 0; k < 0x200; k += 4)
		MWRITE_WORD(BIT_REG_BASE + k, 0);

	VpuWriteReg(BIT_FRM_DIS_FLG, 0);//BIT_FRM_DIS_FLG

	// Start decoding configuration
	VpuWriteReg(BIT_BASE + 0xffc, 0x01); // enable clk, any other value than 0xA1B2C3D4
	VpuWriteReg(BIT_CODE_RUN, 0x0);  // BIT_CODE_RUN
	VpuWriteReg(BIT_INT_ENABLE, 0x0);  // BIT_INT_ENABLE, Disable interrupt

	//load bit stream to sdram. sheen
	dataSize = sizeof(bit_stream) / sizeof(bit_stream[0]);
	if(dataSize > (HD_STREAM_BUF_SIZE / 4))
		dataSize = HD_STREAM_BUF_SIZE / 4;
	bitStreamSize = dataSize * 4;
	for(k = 0; k < dataSize; k++) {
		data = bit_stream[k];
		MWRITE_WORD(HD_ADDR_BIT_STREAM + k * 4, data);
	}

	if(dataSize < (HD_STREAM_BUF_SIZE / 4)) {
		//set 0
		for(k = dataSize; k < (HD_STREAM_BUF_SIZE / 4); k++)
			MWRITE_WORD(HD_ADDR_BIT_STREAM + k * 4, 0);
	}

	flush_dcache_range(HD_ADDR_BIT_STREAM, HD_ADDR_BIT_STREAM+HD_STREAM_BUF_SIZE);
	// Download init common firmware. sheen
	// BIT_CODE_DOWN
	for(k = 0; k < 512; k += 4) {
		data = bit_code[k];
		VpuWriteReg(BIT_CODE_DOWN,(k << 16) | data);

		data = bit_code[k+1];
		VpuWriteReg(BIT_CODE_DOWN,((k+1) << 16) | data);

		data = bit_code[k+2];
		VpuWriteReg(BIT_CODE_DOWN,((k+2) << 16) | data);

		data = bit_code[k+3];
		VpuWriteReg(BIT_CODE_DOWN,((k+3) << 16) | data);
	}

	// Initialize the CODA
	VpuWriteReg(BIT_PARA_BUF_ADDR, HD_ADDR_BIT_PARA);	  // BIT_PARA_BUF_ADDR
	//VpuWriteReg(BIT_BASE+0x104, HD_ADDR_BIT_WORK);	  // BIT_WORK_BUF_ADDR
	VpuWriteReg(BIT_CODE_BUF_ADDR, HD_ADDR_BIT_CODE);	  // BIT_CODE_BUF_ADDR
	VpuWriteReg(BIT_BIT_STREAM_CTRL, 0x0);  					// BIT_BIT_STREAM_CTRL, 0= 64bit little endian
	VpuWriteReg(BIT_BIT_STREAM_PARAM, 0x0);  					// BIT_BIT_STREAM_PARAM
	//VpuWriteReg(BIT_BASE+0x110, 0x1); 					// Dpb Endian mode:1, 64bit big endian
	VpuWriteReg(BIT_FRAME_MEM_CTRL , 0x0); 					//BIT_FRAME_MEM_CTRL Dpb Endian mode:1, 0=64bit little endian

	VpuWriteReg(BIT_INT_ENABLE, 0x0); 					// BIT_INT_ENABLE
	VpuWriteReg(BIT_AXI_SRAM_USE, 0x0f0f);  				// BIT_AXI_SRAM_USE, 6 secAxi enable bit.(disable VC1(Ovl,Btp) )
	//VpuWriteReg(BIT_BASE+0x140, 0x78f);  				// BIT_AXI_SRAM_USE
	VpuWriteReg(BIT_BUSY_FLAG, 0x1); 					// BIT_BUSY_FLAG= 1
	VpuWriteReg(BIT_CODE_RESET, 0x1); 					// BIT_CODE_RESET
	VpuWriteReg(BIT_CODE_RUN , 0x1); 					// BIT_CODE_RUN
	while (VpuReadReg(BIT_BUSY_FLAG) == 1);   			// BIT_BUSY_FLAG = 0

	debug_printf("%s:(VPU_Init,InitializeVPU) load base code and init vpu done.\n", __func__);

	VpuWriteReg(BIT_TEMP_BUF_ADDR, HD_ADDR_TEMP_BUF);
	VpuWriteReg(BIT_RD_PTR, HD_ADDR_BIT_STREAM);	 // BIT_RD_PTR_0
	VpuWriteReg(BIT_WR_PTR, HD_ADDR_BIT_STREAM+HD_STREAM_BUF_SIZE); // BIT_WR_PTR_0, maximum is 15M, this value is for real bitstream write point position
	VpuWriteReg(BIT_BIT_STREAM_PARAM , 4); // BIT_BIT_STREAM_PARAM (bs input mode) If all the streams are feeded, the value is 4 , in demo stream, all the stream is feeded, should not exceed 15M

	VpuWriteReg(BIT_WORK_BUF_ADDR, HD_ADDR_BIT_WORK);	  // BIT_WORK_BUF_ADDR

	// wait for BIT_BUSY_FLAG = 0
	while (VpuReadReg(BIT_BUSY_FLAG ) == 1);   // BIT_BUSY_FLAG = 0

	data= VpuReadReg(BIT_RD_PTR);//cur bitstream addr

	debug_printf("%s:set bit stream buf done. BIT_RD_PTR 0x%x\n", __func__,data);

	//VpuWriteReg(BIT_BASE+0x124, HD_ADDR_BIT_STREAM); // BIT_WR_PTR_0, maximum is 15M, this value is for real bitstream write point position
	//VpuWriteReg(BIT_BASE+0x110, 0x1);  // Dpb Endian mode:1, 64bit little endian
	//VpuWriteReg(BIT_BASE+0x114, 0x0);  // BIT_BIT_STREAM_PARA

	//for BIT_RUN_COMMAND: SEQ_INIT. (BIT_BASE+0x180... reuse). sheen
	VpuWriteReg(CMD_DEC_SEQ_BB_START, HD_ADDR_BIT_STREAM);//CMD_DEC_SEQ_BB_START,seq Bitstream buffer SDRAM byte address
	VpuWriteReg(CMD_DEC_SEQ_BB_SIZE, HD_STREAM_BUF_SIZE/1024);		//CMD_DEC_SEQ_BB_SIZE, 15K, Bitstream buffer size in kilo bytes count
	//VpuWriteReg(BIT_BASE+0x190, 0x0);// ? sheen
	VpuWriteReg(CMD_DEC_SEQ_OPTION, 0x2);//bit[1],enable display buffer reordering.
	//VpuWriteReg(BIT_BASE+0x194, HD_ADDR_PS_SAVE_BUFFER);// CMD_DEC_SEQ_PS_BB_START ? sheen
	//VpuWriteReg(BIT_BASE+0x198, 0x200);							// CMD_DEC_SEQ_PS_BB_SIZE
	//VpuWriteReg(BIT_BASE+0x198, 0x80);// CMD_DEC_SEQ_PS_BB_SIZE ? sheen
	VpuWriteReg(CMD_DEC_SEQ_X264_MV_EN, 0x1);//support x264. sheen
	VpuWriteReg(CMD_DEC_SEQ_SPP_CHUNK_SIZE, 512);//GBU(get bit unit) size.

	//VpuWriteReg(0xA0000000+0x198, HD_ADDR_PS_SAVE_BUFFER+0x200);// CMD_DEC_SEQ_PS_BB_SIZE

	// Wait until busyFlag==0
	while (VpuReadReg(BIT_BUSY_FLAG ) == 1);   // BIT_BUSY_FLAG = 0

	debug_printf("%s: seq dec set done.\n", __func__);

	// CMD_DEC_SEQ_OPTION
	VpuWriteReg(BIT_BUSY_FLAG, 0x1);// set busy.sheen
	VpuWriteReg(BIT_WORK_BUF_ADDR, HD_ADDR_BIT_WORK);
	VpuWriteReg(BIT_RUN_INDEX, 0x0); 		//BIT_RUN_INDEX
	VpuWriteReg(BIT_RUN_COD_STD, 0x0); 		//BIT_RUN_COD_STD 0: H.264 DECODER
	// Command the CODA to initialize for the sequence level
	VpuWriteReg(BIT_RUN_AUX_STD, 0x0); 		//BIT_RUN_AUX_STD

	VpuWriteReg(BIT_RUN_COMMAND, 0x1); 		//BIT_RUN_COMMAND, SEQ_INIT=1  //////////source header
	// Wait until busyFlag==0
	while (VpuReadReg(BIT_BUSY_FLAG) == 1);   // BIT_BUSY_FLAG = 0

	data= VpuReadReg(BIT_RD_PTR);//cur bitstream addr

	debug_printf("%s:(VPU_DecGetInitialInfo) seq header init done. BIT_RD_PTR 0x%x\n", __func__,data);

#if 1
	//CONFIG IPB BUS
	for(k = 0; k < 144; k += 4)
		VpuWriteReg(BIT_BASE + 0x1800 + k, 0x4040);

	VpuWriteReg(BIT_BASE+0x1890, 0x0);
	VpuWriteReg(BIT_BASE+0x18a0, 0xc30);
	VpuWriteReg(BIT_BASE+0x18a4, 0xc30);
	VpuWriteReg(BIT_BASE+0x18a8, 0xc30);

	for(k = 0; k < 9; k++)
		VpuWriteReg(BIT_BASE + 0x18ac + 4 * k, k * 65);

	VpuWriteReg(BIT_BASE+0x18d0, 0x410);
	VpuWriteReg(BIT_BASE+0x18d4, 0x451);
	VpuWriteReg(BIT_BASE+0x18d8, 0x820);
	VpuWriteReg(BIT_BASE+0x18dc, 0x861);

	VpuWriteReg(BIT_BASE+0x18e0, 0x8a2);
	VpuWriteReg(BIT_BASE+0x18e4, 0x8e3);
	VpuWriteReg(BIT_BASE+0x18e8, 0x924);
	VpuWriteReg(BIT_BASE+0x18ec, 0x965);

	VpuWriteReg(BIT_BASE+0x18f0, 0x9a6);
	VpuWriteReg(BIT_BASE+0x18f4, 0x9e7);
	VpuWriteReg(BIT_BASE+0x18f8, 0xa28);
	VpuWriteReg(BIT_BASE+0x18fc, 0xa69);

	VpuWriteReg(BIT_BASE+0x1900, 0xaaa);
	VpuWriteReg(BIT_BASE+0x1904, 0xaeb);
	VpuWriteReg(BIT_BASE+0x1908, 0xb2c);
	VpuWriteReg(BIT_BASE+0x190c, 0xb6d);

	VpuWriteReg(BIT_BASE+0x1910, 0xbae);
	VpuWriteReg(BIT_BASE+0x1914, 0xbef);
	VpuWriteReg(BIT_BASE+0x1918, 0xc30);
	VpuWriteReg(BIT_BASE+0x191c, 0xc30);

	VpuWriteReg(BIT_BASE+0x1920, 0xc000000);
#endif

	// Wait until busyFlag==0
	while (VpuReadReg(BIT_BUSY_FLAG ) == 1);   // BIT_BUSY_FLAG = 0

	//VpuWriteReg(BIT_WORK_BUF_ADDR, HD_ADDR_BIT_WORK);  // BIT_WORK_BUF_ADDR

	// Allocate DPB to the IP
	//REG32(0x10000000) = VpuReadReg(BIT_BASE+0x1c0);   // seqInitStatus, 0 error, 1 success
	ret = VpuReadReg(RET_DEC_SEQ_SUCCESS);   	// seqInitStatus, 0 error, 1 success
	debug_printf("%s: seqInitStatus = %d\n", __func__, ret);
	if (ret == 1) {
		//REG32(0x10000004) = VpuReadReg(BIT_BASE+0x1c4);   // RET_DEC_SEQ_SRC_SIZE
		sizeX = VpuReadReg(RET_DEC_SEQ_SRC_SIZE) >> 16; // [31:16]
		sizeY = VpuReadReg(RET_DEC_SEQ_SRC_SIZE) & 0xffff; // [15: 0]
		//PicSize = sizeX * sizeY;
		//debug_printf("%s: sizex = %d, sizey = %d\n", __func__, sizeX, sizeY);

		//REG32(0x10000008) = VpuReadReg(BIT_BASE+0x1cc); // Minimum decoded frame buffer need to decode stream successfully.
		FrameBufNum = VpuReadReg(RET_DEC_SEQ_FRAME_NEED);
		//FrameBufNum = 32; //Allocate 32 frame buffer for decoding. resolution is limited to 1920x1080 (1920x1080x1.75x32=111 MB frame buffer size)
		if(FrameBufNum > HD_REG_FRAME_BUF_NUM)
			FrameBufNum = HD_REG_FRAME_BUF_NUM;
		debug_printf("%s: minimal request of frame buf num= %d\n", __func__, FrameBufNum);
		//for BIT_RUN_COMMAND: SET_FRAME_BUF
		VpuWriteReg(CMD_SET_FRAME_BUF_NUM, FrameBufNum); // Number of frames used for reference or output reodering.

		stride = ((sizeX + 15) & ~15);
		VpuWriteReg(CMD_SET_FRAME_BUF_STRIDE, stride);// 8 multiplier, resolution width
		PicSize = stride * ((sizeY + 15) & ~15);
		debug_printf("%s: bufStride= %d sizex = %d, sizey = %d\n", __func__, stride, sizeX, sizeY);

	} else {
		// Report Error message
		debug_printf("%s: seq dec err!!\n", __func__);
		return -1;
	}
	// Wait until busyFlag==0
	while (VpuReadReg(BIT_BUSY_FLAG ) == 1);// BIT_BUSY_FLAG = 0


	VpuWriteReg(CMD_SET_FRAME_AXI_BIT_ADDR, HD_ADDR_SEC_AXI_BIT);//second AXI address,ADDR_AXI_BIT
	VpuWriteReg(CMD_SET_FRAME_AXI_IPACDC_ADDR, HD_ADDR_SEC_AXI_IP);//second AXI address,ADDR_AXI_IP
	VpuWriteReg(CMD_SET_FRAME_AXI_DBKY_ADDR, HD_ADDR_SEC_AXI_DBKY);//second AXI address,ADDR_AXI_DBKY
	VpuWriteReg(CMD_SET_FRAME_AXI_DBKC_ADDR, HD_ADDR_SEC_AXI_DBKC);//second AXI address,ADDR_AXI_DBKC
#if 0 //for VC1 only
	VpuWriteReg(CMD_SET_FRAME_AXI_OVL_ADDR, HD_ADDR_SEC_AXI_OVL);//second AXI address,ADDR_AXI_OVL
	VpuWriteReg(CMD_SET_FRAME_AXI_BTP_ADDR, HD_ADDR_SEC_AXI_BTP);//second AXI address,ADDR_AXI_BTP
#else
	VpuWriteReg(CMD_SET_FRAME_AXI_OVL_ADDR, 0); //second AXI address,ADDR_AXI_OVL
	VpuWriteReg(CMD_SET_FRAME_AXI_BTP_ADDR, 0); //second AXI address,ADDR_AXI_BTP
#endif
	VpuWriteReg(CMD_SET_FRAME_CACHE_CONFIG, 0x7e0);//2D CACHE CONFIG

	VpuWriteReg(CMD_SET_FRAME_SLICE_BB_START, HD_ADDR_SLICE_BUFFER);//ADDR_SLICE_BUFFER
	VpuWriteReg(CMD_SET_FRAME_SLICE_BB_SIZE, SLICE_SAVE_SIZE);//ADDR_SLICE_SIZE

	while (VpuReadReg(BIT_BUSY_FLAG ) == 1); // BIT_BUSY_FLAG = 0
   /*
	VpuWriteReg(BIT_RUN_INDEX, 0x0); 	// BIT_RUN_INDEX
	VpuWriteReg(BIT_RUN_COD_STD, 0x0); 	// BIT_RUN_COD_STD, H.264
	VpuWriteReg(BIT_RUN_AUX_STD, 0x0); 	// BIT_RUN_AUX_STD
	VpuWriteReg(BIT_FRM_DIS_FLG, 0x0);  // BIT_FRM_DIS_FLG
	*/

	// 4b0000: H.264 DECODER
	// 4b0001: VC-1 DECODER
	// 4b0010: MPEG-2 DECODER
	// 4b0011: MPEG-4/DivX-3 DECODER
	// 4b0100: RV DECODER
	// 4b0101: AVS DECODER
	// 4b1000: MJPEG DECODER

	// Calculate frame buffer addresses
	{
	int  addrNextLuma = HD_ADDR_FRAME_BASE;

	for (k=0; k < FrameBufNum; k = k + 1) {
		DpbLum[k]	= addrNextLuma;
		DpbCb[k]	 = DpbLum[k]   + PicSize;
		DpbCr[k]	 = DpbCb[k]	+ PicSize/4;
		MvColBuf[k]  = DpbCr[k]	+ PicSize/4;
		addrNextLuma = MvColBuf[k] + PicSize/4;
		debug_printf("k=%d y=0x%x u=0x%x v=0x%x\n", k,DpbLum[k],DpbCb[k],DpbCr[k]);
	}

	// registering the base addresses of created frame buffers, little endian is ok?
	for (k=0; k < FrameBufNum; k = k + 2) {
		// 64 BIT BIG Endian
		MWRITE_WORD(HD_ADDR_BIT_PARA + (k*12)	  , DpbCb[k]   );
		MWRITE_WORD(HD_ADDR_BIT_PARA + (k*12) +  4, DpbLum[k]  );
		MWRITE_WORD(HD_ADDR_BIT_PARA + (k*12) +  8, DpbLum[k+1]);
		MWRITE_WORD(HD_ADDR_BIT_PARA + (k*12) + 12, DpbCr[k]   );
		MWRITE_WORD(HD_ADDR_BIT_PARA + (k*12) + 16, DpbCr[k+1] );
		MWRITE_WORD(HD_ADDR_BIT_PARA + (k*12) + 20, DpbCb[k+1] );
	}

	//mvCol buf
	for (k=0; k < FrameBufNum; k = k + 2) {
		// 64 BIT BIG Endian
		MWRITE_WORD(HD_ADDR_BIT_PARA + 384+(k*4)	 , MvColBuf[k+1]);
		MWRITE_WORD(HD_ADDR_BIT_PARA + 384+(k*4) +  4, MvColBuf[k]);
	}

	flush_dcache_range(HD_ADDR_BIT_PARA, HD_ADDR_BIT_PARA+PARA_BUF_SIZE);
	}

	VpuWriteReg(BIT_BUSY_FLAG, 0x1);// set busy.sheen
	VpuWriteReg(BIT_WORK_BUF_ADDR, HD_ADDR_BIT_WORK);
	VpuWriteReg(BIT_RUN_INDEX, 0x0); 	// BIT_RUN_INDEX
	VpuWriteReg(BIT_RUN_COD_STD, 0x0); 	// BIT_RUN_COD_STD, H.264
	VpuWriteReg(BIT_RUN_AUX_STD, 0x0); 	// BIT_RUN_AUX_STD
	VpuWriteReg(BIT_FRM_DIS_FLG, 0x0);  // BIT_FRM_DIS_FLG

	VpuWriteReg(BIT_RUN_COMMAND, 0x4); // BIT_RUN_COMMAND: SET_FRAME_BUF

	// Wait until busyFlag==0
	while (VpuReadReg(BIT_BUSY_FLAG ) == 1);   // BIT_BUSY_FLAG = 0

	// decode processing
	while(1) {
		//ret= VpuReadReg(BIT_BIT_STREAM_PARAM);
		debug_printf("%s: decoding...%d\n", __func__, dec_num);
		//VpuWriteReg(BIT_BIT_STREAM_PARAM, 0);  // BIT_BIT_STREAM_PARAM
		//VpuWriteReg(BIT_WORK_BUF_ADDR, HD_ADDR_BIT_WORK);	  // BIT_WORK_BUF_ADDR
		/*
		if(dec_num==0 || dec_num==1){
			for(k=0;k<0x200;k+=4)
			debug_printf("0x%03X: 0x%08X\n",k,VpuReadReg(k));
		}*/

		//for BIT_RUN_COMMAND: PIC_RUN
		VpuWriteReg(CMD_DEC_PIC_OPTION, 0x0); //I search,B skip,user data report...sheen
		VpuWriteReg(BIT_BASE+0x198, 0x0); //CMD_DEC_FRAME_SKIP_NUM .sheen
		VpuWriteReg(CMD_DEC_PIC_ROT_MODE, 0x0); //rotation and mirroring.sheen

		VpuWriteReg(BIT_BUSY_FLAG, 0x1);// set busy.sheen
		VpuWriteReg(BIT_WORK_BUF_ADDR, HD_ADDR_BIT_WORK);
		VpuWriteReg(BIT_RUN_INDEX, 0x0); //instance or process index.sheen
		VpuWriteReg(BIT_RUN_COD_STD, 0x0); //codec index, H.264 =0
		VpuWriteReg(BIT_RUN_AUX_STD, 0x0); //auxiliary codec index. H.264/AVC =0

		VpuWriteReg(BIT_RUN_COMMAND , 0x3); // BIT_RUN_COMMAND :PIC_RUN

		// decode 1 frame complete
		// Wait until busyFlag==0
		while (VpuReadReg(BIT_BUSY_FLAG ) == 1);	// BIT_BUSY_FLAG = 0

		// Read output information
		//REG32(0x1000000c)= VpuReadReg(BIT_BASE+0x1c4);   // Display frame index
		DispFrameIdx = VpuReadReg(RET_DEC_PIC_DISPLAY_IDX);   // Display frame index
		//REG32(0x10000010) = VpuReadReg(BIT_BASE+0x1dc);   // Decoded frame index
		DecDecFrameIdx = VpuReadReg(RET_DEC_PIC_DECODED_IDX);

		DecPicType = VpuReadReg(RET_DEC_PIC_TYPE) & 7;   // REC_DEC_PIC_TYPE
		ret= VpuReadReg(RET_DEC_PIC_SUCCESS);//PIC_RUN result
		PicSize= VpuReadReg(BIT_RD_PTR)-data; //frame bytes size
		data= VpuReadReg(BIT_RD_PTR);//cur bitstream addr

		debug_printf("%s:ret 0x%x type %d disIDX %d decIDX %d BIT_RD_PTR 0x%x frmSz %d\n", __func__, ret, DecPicType,DispFrameIdx, DecDecFrameIdx,data,PicSize);

		if(ret != 1 || PicSize <=0){
			debug_printf("%s: One frame dec err!\n", __func__);
			break;
		}

		if (DispFrameIdx == -1) // -1, no output ,end of the sequence.
		{
			// Decode successful
			debug_printf("%s: EOS 1.\n", __func__);
			break;

		} else if (DispFrameIdx >=0 /*DispFrameIdx != -3 && DispFrameIdx != -2*/) { // -2,-3, no display frame
			// Host handle output picture, such as display, then clear display flag
			// For example, if host wants to display index0 frame, the Y, CB, CR addresses are // located in DpbLum[0]), DpbCb[0], DpbCr[0];
			//  By calling VpuWriteReg(BIT_BASE+0x150, 0x0) to clear display flag of index0 after displaying index0 frame
			//32 bits, each bit match one frame index.sheen
			DispFrameIdx = VpuReadReg(BIT_FRM_DIS_FLG) & (~(1<<DispFrameIdx));

			VpuWriteReg(BIT_FRM_DIS_FLG, DispFrameIdx);
		}

		//check md5
		if(md5_check && DecDecFrameIdx>=0){
			flush_dcache_range(DpbLum[DecDecFrameIdx],DpbLum[DecDecFrameIdx]+sizeX*sizeY*3/2);
			md5((unsigned char*)DpbLum[DecDecFrameIdx], sizeX*sizeY*3/2, md5Data);

			debug_printf("md5 digest[0-7]=[0x%x %x %x %x %x %x %x %x]\n",md5Data[0],md5Data[1],md5Data[2],md5Data[3],md5Data[4],md5Data[5],md5Data[6],md5Data[7]);
			debug_printf("md5 digest[8-15]=[0x%x %x %x %x %x %x %x %x]\n",md5Data[8],md5Data[9],md5Data[10],md5Data[11],md5Data[12],md5Data[13],md5Data[14],md5Data[15]);
			for(k=0;k<16;k++){
				if(md5Data[k]!=yuv_md5_digest[dec_num*16+k]){
					debug_printf("md5 check fail!!!\n");
					return -2;
				}
			}
			debug_printf("md5 check pass!!!\n");
		}


		// copying yuv data to a file
		// You should use your own method to output decoded frames to outside, such as using USB library.
		dec_num++;

		if(dec_num >= FRAME_TEST_NUM )
			break;

		if((data - HD_ADDR_BIT_STREAM) >= (bitStreamSize - 7)){
			debug_printf("%s: EOS 2.\n", __func__);
			break;
		}
	}

	debug_printf("%s: end seq.\n", __func__);
	VpuWriteReg(BIT_BUSY_FLAG, 0x1);// set busy.sheen
	VpuWriteReg(BIT_RUN_COMMAND , 0x2); // BIT_RUN_COMMAND :SEQ_END
	while (VpuReadReg(BIT_BUSY_FLAG ) == 1);	// BIT_BUSY_FLAG = 0

	// You can directly output these 32 frame buffers to outside using USB driver.
	printf("%s: test success finish.\n", __func__);
	return 0;
}