Skip to content

Commit a3c6a7a

Browse files
farzad64esmil
authored andcommitted
nvdla: add NVDLA driver
Additional update from Prashant Gaikwad <pgaikwad@nvidia.com> Adapted for Linux 5.13 and the BeagleV Starlight board by <cybergaszcz@gmail.com> kernel test robot: fix platform_no_drv_owner.cocci warnings Geert: Use div_u64() in dla_get_time_us() Signed-off-by: kernel test robot <lkp@intel.com> Link: https://lore.kernel.org/r/20220119060057.GA1143@7f39e361da8f Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org> Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2203090905560.780932@ramsan.of.borg Signed-off-by: Emil Renner Berthing <kernel@esmil.dk>
1 parent 0764cec commit a3c6a7a

34 files changed

+32589
-0
lines changed

drivers/Kconfig

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,4 +245,6 @@ source "drivers/hte/Kconfig"
245245

246246
source "drivers/cdx/Kconfig"
247247

248+
source "drivers/nvdla/Kconfig"
249+
248250
endmenu

drivers/Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,3 +196,4 @@ obj-$(CONFIG_PECI) += peci/
196196
obj-$(CONFIG_HTE) += hte/
197197
obj-$(CONFIG_DRM_ACCEL) += accel/
198198
obj-$(CONFIG_CDX_BUS) += cdx/
199+
obj-$(CONFIG_NVDLA) += nvdla/

drivers/nvdla/Kconfig

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
config NVDLA
2+
tristate "The NVIDIA Deep Learning Accelerator"
3+
depends on DRM
4+
select DRM_GEM_DMA_HELPER

drivers/nvdla/Makefile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
2+
ccflags-y += -I$(srctree)/$(src)
3+
ccflags-y += -I$(srctree)/$(src)/include
4+
5+
nvdla-y := scheduler.o \
6+
engine.o \
7+
bdma.o \
8+
conv.o \
9+
sdp.o \
10+
cdp.o \
11+
pdp.o \
12+
rubik.o \
13+
cache.o \
14+
common.o \
15+
engine_data.o \
16+
engine_isr.o \
17+
engine_debug.o \
18+
nvdla_core_callbacks.o \
19+
nvdla_gem.o
20+
21+
obj-$(CONFIG_NVDLA) += nvdla.o

drivers/nvdla/bdma.c

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,280 @@
1+
/*
2+
* Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved.
3+
*
4+
* Redistribution and use in source and binary forms, with or without
5+
* modification, are permitted provided that the following conditions
6+
* are met:
7+
* * Redistributions of source code must retain the above copyright
8+
* notice, this list of conditions and the following disclaimer.
9+
* * Redistributions in binary form must reproduce the above copyright
10+
* notice, this list of conditions and the following disclaimer in the
11+
* documentation and/or other materials provided with the distribution.
12+
* * Neither the name of NVIDIA CORPORATION nor the names of its
13+
* contributors may be used to endorse or promote products derived
14+
* from this software without specific prior written permission.
15+
*
16+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
17+
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19+
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
20+
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21+
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22+
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23+
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24+
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27+
*/
28+
29+
#include <opendla.h>
30+
#include <dla_debug.h>
31+
#include <dla_err.h>
32+
#include <dla_interface.h>
33+
34+
#include "dla_engine_internal.h"
35+
#include "engine_debug.h"
36+
37+
static const uint8_t map_mem[] = {
38+
FIELD_ENUM(BDMA_CFG_CMD_0, SRC_RAM_TYPE, MC),
39+
FIELD_ENUM(BDMA_CFG_CMD_0, SRC_RAM_TYPE, CVSRAM),
40+
};
41+
42+
#if STAT_ENABLE
43+
void
44+
dla_bdma_stat_data(struct dla_processor *processor,
45+
struct dla_processor_group *group)
46+
{
47+
uint64_t end_time = 0;
48+
struct dla_bdma_stat_desc *bdma_stat;
49+
50+
bdma_stat = &processor->stat_data_desc->bdma_stat;
51+
52+
end_time = dla_get_time_us();
53+
54+
if (group->id == (uint32_t)0) {
55+
bdma_stat->read_stall = bdma_reg_read(STATUS_GRP0_READ_STALL);
56+
bdma_stat->write_stall = bdma_reg_read(STATUS_GRP0_WRITE_STALL);
57+
} else {
58+
bdma_stat->read_stall = bdma_reg_read(STATUS_GRP1_READ_STALL);
59+
bdma_stat->write_stall = bdma_reg_read(STATUS_GRP1_WRITE_STALL);
60+
}
61+
bdma_stat->runtime = (uint32_t)(end_time - group->start_time);
62+
}
63+
64+
void
65+
dla_bdma_dump_stat(struct dla_processor *processor)
66+
{
67+
struct dla_bdma_stat_desc *bdma_stat;
68+
69+
bdma_stat = &processor->stat_data_desc->bdma_stat;
70+
71+
dla_debug_bdma_stats(bdma_stat);
72+
}
73+
#endif /* STAT_ENABLE */
74+
75+
void
76+
dla_bdma_set_producer(int32_t group_id, int32_t rdma_group_id)
77+
{
78+
/*
79+
* There is no producer bit for BDMA operation,
80+
* interrupt pointer decides which outstanding request
81+
* to use for this BDMA operation
82+
*/
83+
}
84+
85+
int
86+
dla_bdma_enable(struct dla_processor_group *group)
87+
{
88+
struct dla_engine *engine = dla_get_engine();
89+
90+
dla_debug("Enter: %s\n", __func__);
91+
92+
if (group->surface_desc->bdma_surface.num_transfers == (uint16_t)0) {
93+
group->events |= ((uint8_t)1 << DLA_EVENT_OP_COMPLETED);
94+
goto exit;
95+
}
96+
97+
if (engine->stat_enable == (uint32_t)1) {
98+
bdma_reg_write(CFG_STATUS, FIELD_ENUM(BDMA_CFG_STATUS_0,
99+
STALL_COUNT_EN, YES));
100+
group->start_time = dla_get_time_us();
101+
}
102+
103+
/*
104+
* Launch BDMA transfer
105+
*/
106+
if (group->id == 0)
107+
bdma_reg_write(CFG_LAUNCH0, FIELD_ENUM(BDMA_CFG_LAUNCH0_0,
108+
GRP0_LAUNCH, YES));
109+
else
110+
bdma_reg_write(CFG_LAUNCH1, FIELD_ENUM(BDMA_CFG_LAUNCH1_0,
111+
GRP1_LAUNCH, YES));
112+
113+
exit:
114+
dla_debug("Exit: %s\n", __func__);
115+
return 0;
116+
}
117+
118+
void
119+
dla_bdma_rdma_check(struct dla_processor_group *group)
120+
{
121+
group->is_rdma_needed = 0;
122+
}
123+
124+
/*
125+
* Program BDMA slot for transfer
126+
*/
127+
static int32_t
128+
processor_bdma_program_slot(struct dla_bdma_surface_desc *bdma_surface,
129+
struct dla_bdma_transfer_desc *transfer)
130+
{
131+
int32_t ret = 0;
132+
uint64_t source_addr = 0;
133+
uint64_t destination_addr = 0;
134+
uint32_t high, low, reg;
135+
uint8_t bdma_free_slots = 0;
136+
struct dla_engine *engine = dla_get_engine();
137+
138+
dla_debug("Enter: %s\n", __func__);
139+
140+
/* make sure there're enough free slots */
141+
if (bdma_free_slots <= 0) {
142+
do {
143+
reg = bdma_reg_read(STATUS);
144+
reg = (reg & MASK(BDMA_STATUS_0, FREE_SLOT)) >>
145+
SHIFT(BDMA_STATUS_0, FREE_SLOT);
146+
} while (reg == 0);
147+
bdma_free_slots = (uint8_t)reg;
148+
}
149+
150+
dla_get_dma_address(engine->driver_context, engine->task->task_data,
151+
transfer->source_address,
152+
(void *)&source_addr,
153+
DESTINATION_DMA);
154+
dla_get_dma_address(engine->driver_context, engine->task->task_data,
155+
transfer->destination_address,
156+
(void *)&destination_addr,
157+
DESTINATION_DMA);
158+
159+
ASSERT_GOTO((transfer->line_repeat <= 8192),
160+
ret, ERR(INVALID_INPUT), exit);
161+
ASSERT_GOTO((transfer->surface_repeat <= 8192),
162+
ret, ERR(INVALID_INPUT), exit);
163+
ASSERT_GOTO((transfer->line_size % 32) == 0,
164+
ret, ERR(INVALID_INPUT), exit);
165+
ASSERT_GOTO(transfer->source_line >= transfer->line_size,
166+
ret, ERR(INVALID_INPUT), exit);
167+
ASSERT_GOTO(transfer->destination_line >= transfer->line_size,
168+
ret, ERR(INVALID_INPUT), exit);
169+
ASSERT_GOTO(transfer->source_surface >=
170+
(transfer->source_line * transfer->line_repeat),
171+
ret, ERR(INVALID_INPUT), exit);
172+
ASSERT_GOTO(transfer->destination_surface >=
173+
(transfer->destination_line * transfer->line_repeat),
174+
ret, ERR(INVALID_INPUT), exit);
175+
176+
/* config registers */
177+
high = HIGH32BITS(source_addr);
178+
low = LOW32BITS(source_addr);
179+
bdma_reg_write(CFG_SRC_ADDR_LOW, low);
180+
bdma_reg_write(CFG_SRC_ADDR_HIGH, high);
181+
high = HIGH32BITS(destination_addr);
182+
low = LOW32BITS(destination_addr);
183+
bdma_reg_write(CFG_DST_ADDR_LOW, low);
184+
bdma_reg_write(CFG_DST_ADDR_HIGH, high);
185+
bdma_reg_write(CFG_LINE, (transfer->line_size >> 5) - 1);
186+
reg = (map_mem[bdma_surface->source_type] <<
187+
SHIFT(BDMA_CFG_CMD_0, SRC_RAM_TYPE)) |
188+
(map_mem[bdma_surface->destination_type] <<
189+
SHIFT(BDMA_CFG_CMD_0, DST_RAM_TYPE));
190+
bdma_reg_write(CFG_CMD, reg);
191+
bdma_reg_write(CFG_LINE_REPEAT, transfer->line_repeat - 1);
192+
bdma_reg_write(CFG_SRC_LINE, transfer->source_line);
193+
bdma_reg_write(CFG_DST_LINE, transfer->destination_line);
194+
bdma_reg_write(CFG_SURF_REPEAT, transfer->surface_repeat - 1);
195+
bdma_reg_write(CFG_SRC_SURF, transfer->source_surface);
196+
bdma_reg_write(CFG_DST_SURF, transfer->destination_surface);
197+
bdma_reg_write(CFG_OP, FIELD_ENUM(BDMA_CFG_OP_0, EN, ENABLE));
198+
199+
dla_debug("Exit: %s\n", __func__);
200+
201+
exit:
202+
RETURN(ret);
203+
}
204+
205+
int
206+
dla_bdma_is_ready(struct dla_processor *processor,
207+
struct dla_processor_group *group)
208+
{
209+
struct dla_processor_group *next_group;
210+
211+
next_group = &processor->groups[!group->id];
212+
213+
/*
214+
* If another group is already programmed but not active then
215+
* do not program this operation as BDMA does not really
216+
* have shadow copies for groups. It will end programming
217+
* same group. Wait for another group to get enabled.
218+
*/
219+
if ((processor->group_status & (1 << next_group->id)) &&
220+
!next_group->active)
221+
return 0;
222+
223+
return 1;
224+
}
225+
226+
void
227+
dla_bdma_dump_config(struct dla_processor_group *group)
228+
{
229+
struct dla_bdma_op_desc *bdma_op;
230+
struct dla_bdma_surface_desc *bdma_surface;
231+
232+
bdma_surface = &group->surface_desc->bdma_surface;
233+
bdma_op = &group->operation_desc->bdma_op;
234+
235+
dla_debug_bdma_surface_desc(bdma_surface, group->roi_index);
236+
dla_debug_bdma_op_desc(bdma_op, group->roi_index);
237+
}
238+
239+
int
240+
dla_bdma_program(struct dla_processor_group *group)
241+
{
242+
int32_t i;
243+
int32_t ret = 0;
244+
struct dla_bdma_surface_desc *bdma_surface;
245+
struct dla_engine *engine = dla_get_engine();
246+
247+
dla_debug("Enter: %s\n", __func__);
248+
249+
if (!engine->config_data->bdma_enable) {
250+
dla_error("BDMA is not supported for this configuration\n");
251+
ret = ERR(INVALID_INPUT);
252+
goto exit;
253+
}
254+
255+
bdma_surface = &group->surface_desc->bdma_surface;
256+
257+
dla_debug("Num of transfers %u\n", bdma_surface->num_transfers);
258+
if (bdma_surface->num_transfers == (uint16_t)0)
259+
goto exit;
260+
261+
if (bdma_surface->num_transfers > NUM_MAX_BDMA_OPS) {
262+
dla_error("Invalid number of transfers\n");
263+
ret = ERR(INVALID_INPUT);
264+
goto exit;
265+
}
266+
267+
for (i = 0; i < bdma_surface->num_transfers; i++) {
268+
ret = processor_bdma_program_slot(bdma_surface,
269+
&bdma_surface->transfers[i]);
270+
if (ret)
271+
goto exit;
272+
}
273+
274+
dla_enable_intr(MASK(GLB_S_INTR_MASK_0, BDMA_DONE_MASK1) |
275+
MASK(GLB_S_INTR_MASK_0, BDMA_DONE_MASK0));
276+
277+
exit:
278+
dla_debug("Exit: %s\n", __func__);
279+
RETURN(ret);
280+
}

0 commit comments

Comments
 (0)