diff --git a/Documentation/DocBook/gpu.tmpl b/Documentation/DocBook/gpu.tmpl
index a8669330b456e..1692c4dd54872 100644
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
@@ -1816,7 +1816,7 @@ void intel_crt_init(struct drm_device *dev)
Description/Restrictions |
- DRM |
+ DRM |
Generic |
“rotation” |
BITMASK |
@@ -2068,7 +2068,7 @@ void intel_crt_init(struct drm_device *dev)
property to suggest an Y offset for a connector |
- Optional |
+ Optional |
“scaling mode” |
ENUM |
{ "None", "Full", "Center", "Full aspect" } |
@@ -2092,6 +2092,61 @@ void intel_crt_init(struct drm_device *dev)
TBD |
+ “DEGAMMA_LUT” |
+ BLOB |
+ 0 |
+ CRTC |
+ DRM property to set the degamma lookup table
+ (LUT) mapping pixel data from the framebuffer before it is
+ given to the transformation matrix. The data is an interpreted
+ as an array of struct drm_color_lut elements. Hardware might
+ choose not to use the full precision of the LUT elements nor
+ use all the elements of the LUT (for example the hardware
+ might choose to interpolate between LUT[0] and LUT[4]). |
+
+
+ “DEGAMMA_LUT_SIZE” |
+ RANGE | IMMUTABLE |
+ Min=0, Max=UINT_MAX |
+ CRTC |
+ DRM property to gives the size of the lookup
+ table to be set on the DEGAMMA_LUT property (the size depends
+ on the underlying hardware). |
+
+
+ “CTM” |
+ BLOB |
+ 0 |
+ CRTC |
+ DRM property to set the current
+ transformation matrix (CTM) apply to pixel data after the
+ lookup through the degamma LUT and before the lookup through
+ the gamma LUT. The data is an interpreted as a struct
+ drm_color_ctm. |
+
+
+ “GAMMA_LUT” |
+ BLOB |
+ 0 |
+ CRTC |
+ DRM property to set the gamma lookup table
+ (LUT) mapping pixel data after to the transformation matrix to
+ data sent to the connector. The data is an interpreted as an
+ array of struct drm_color_lut elements. Hardware might choose
+ not to use the full precision of the LUT elements nor use all
+ the elements of the LUT (for example the hardware might choose
+ to interpolate between LUT[0] and LUT[4]). |
+
+
+ “GAMMA_LUT_SIZE” |
+ RANGE | IMMUTABLE |
+ Min=0, Max=UINT_MAX |
+ CRTC |
+ DRM property to gives the size of the lookup
+ table to be set on the GAMMA_LUT property (the size depends on
+ the underlying hardware). |
+
+
i915 |
Generic |
"Broadcast RGB" |
@@ -2886,52 +2941,8 @@ void (*postclose) (struct drm_device *, struct drm_file *);
File Operations
- const struct file_operations *fops
- File operations for the DRM device node.
-
- Drivers must define the file operations structure that forms the DRM
- userspace API entry point, even though most of those operations are
- implemented in the DRM core. The open,
- release and ioctl
- operations are handled by
-
- .owner = THIS_MODULE,
- .open = drm_open,
- .release = drm_release,
- .unlocked_ioctl = drm_ioctl,
- #ifdef CONFIG_COMPAT
- .compat_ioctl = drm_compat_ioctl,
- #endif
-
-
-
- Drivers that implement private ioctls that requires 32/64bit
- compatibility support must provide their own
- compat_ioctl handler that processes private
- ioctls and calls drm_compat_ioctl for core ioctls.
-
-
- The read and poll
- operations provide support for reading DRM events and polling them. They
- are implemented by
-
- .poll = drm_poll,
- .read = drm_read,
- .llseek = no_llseek,
-
-
-
- The memory mapping implementation varies depending on how the driver
- manages memory. Pre-GEM drivers will use drm_mmap,
- while GEM-aware drivers will use drm_gem_mmap. See
- .
-
- .mmap = drm_gem_mmap,
-
-
-
- No other file operation is supported by the DRM API.
-
+!Pdrivers/gpu/drm/drm_fops.c file operations
+!Edrivers/gpu/drm/drm_fops.c
IOCTLs
@@ -3319,6 +3330,12 @@ int num_ioctls;
!Pdrivers/gpu/drm/i915/intel_csr.c csr support for dmc
!Idrivers/gpu/drm/i915/intel_csr.c
+
+ Video BIOS Table (VBT)
+!Pdrivers/gpu/drm/i915/intel_bios.c Video BIOS Table (VBT)
+!Idrivers/gpu/drm/i915/intel_bios.c
+!Idrivers/gpu/drm/i915/intel_bios.h
+
@@ -3460,6 +3477,7 @@ int num_ioctls;
Public constants
+!Finclude/linux/vga_switcheroo.h vga_switcheroo_handler_flags_t
!Finclude/linux/vga_switcheroo.h vga_switcheroo_client_id
!Finclude/linux/vga_switcheroo.h vga_switcheroo_state
@@ -3488,6 +3506,10 @@ int num_ioctls;
Backlight control
!Pdrivers/platform/x86/apple-gmux.c Backlight control
+
+ Public functions
+!Iinclude/linux/apple-gmux.h
+
diff --git a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
index 56a961aa50613..9f97df4d51524 100644
--- a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
+++ b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
@@ -35,6 +35,12 @@ Optional properties for HDMI:
as an interrupt/status bit in the HDMI controller
itself). See bindings/pinctrl/brcm,bcm2835-gpio.txt
+Required properties for V3D:
+- compatible: Should be "brcm,bcm2835-v3d"
+- reg: Physical base address and length of the V3D's registers
+- interrupts: The interrupt number
+ See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
+
Example:
pixelvalve@7e807000 {
compatible = "brcm,bcm2835-pixelvalve2";
@@ -60,6 +66,12 @@ hdmi: hdmi@7e902000 {
clock-names = "pixel", "hdmi";
};
+v3d: v3d@7ec00000 {
+ compatible = "brcm,bcm2835-v3d";
+ reg = <0x7ec00000 0x1000>;
+ interrupts = <1 10>;
+};
+
vc4: gpu {
compatible = "brcm,bcm2835-vc4";
};
diff --git a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
index 0e6f0c0248587..22756b3dede2a 100644
--- a/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
+++ b/Documentation/devicetree/bindings/display/exynos/exynos_dsim.txt
@@ -6,6 +6,7 @@ Required properties:
"samsung,exynos4210-mipi-dsi" /* for Exynos4 SoCs */
"samsung,exynos4415-mipi-dsi" /* for Exynos4415 SoC */
"samsung,exynos5410-mipi-dsi" /* for Exynos5410/5420/5440 SoCs */
+ "samsung,exynos5422-mipi-dsi" /* for Exynos5422/5800 SoCs */
"samsung,exynos5433-mipi-dsi" /* for Exynos5433 SoCs */
- reg: physical base address and length of the registers set for the device
- interrupts: should contain DSI interrupt
diff --git a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
index 27c3ce0db16a3..c7c6b9af87ac4 100644
--- a/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
+++ b/Documentation/devicetree/bindings/display/exynos/samsung-fimd.txt
@@ -12,7 +12,8 @@ Required properties:
"samsung,exynos3250-fimd"; /* for Exynos3250/3472 SoCs */
"samsung,exynos4210-fimd"; /* for Exynos4 SoCs */
"samsung,exynos4415-fimd"; /* for Exynos4415 SoC */
- "samsung,exynos5250-fimd"; /* for Exynos5 SoCs */
+ "samsung,exynos5250-fimd"; /* for Exynos5250 SoCs */
+ "samsung,exynos5420-fimd"; /* for Exynos5420/5422/5800 SoCs */
- reg: physical base address and length of the FIMD registers set.
diff --git a/Documentation/devicetree/bindings/display/msm/dsi.txt b/Documentation/devicetree/bindings/display/msm/dsi.txt
index e7423bea1424a..f5948c48b9a23 100644
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@@ -44,9 +44,34 @@ Optional properties:
- pinctrl-names: the pin control state names; should contain "default"
- pinctrl-0: the default pinctrl state (active)
- pinctrl-n: the "sleep" pinctrl state
-- port: DSI controller output port. This contains one endpoint subnode, with its
- remote-endpoint set to the phandle of the connected panel's endpoint.
- See Documentation/devicetree/bindings/graph.txt for device graph info.
+- port: DSI controller output port, containing one endpoint subnode.
+
+ DSI Endpoint properties:
+ - remote-endpoint: set to phandle of the connected panel's endpoint.
+ See Documentation/devicetree/bindings/graph.txt for device graph info.
+ - qcom,data-lane-map: this describes how the logical DSI lanes are mapped
+ to the physical lanes on the given platform. The value contained in
+ index n describes what logical data lane is mapped to the physical data
+ lane n (DATAn, where n lies between 0 and 3).
+
+ For example:
+
+ qcom,data-lane-map = <3 0 1 2>;
+
+ The above mapping describes that the logical data lane DATA3 is mapped to
+ the physical data lane DATA0, logical DATA0 to physical DATA1, logic DATA1
+ to phys DATA2 and logic DATA2 to phys DATA3.
+
+ There are only a limited number of physical to logical mappings possible:
+
+ "0123": Logic 0->Phys 0; Logic 1->Phys 1; Logic 2->Phys 2; Logic 3->Phys 3;
+ "3012": Logic 3->Phys 0; Logic 0->Phys 1; Logic 1->Phys 2; Logic 2->Phys 3;
+ "2301": Logic 2->Phys 0; Logic 3->Phys 1; Logic 0->Phys 2; Logic 1->Phys 3;
+ "1230": Logic 1->Phys 0; Logic 2->Phys 1; Logic 3->Phys 2; Logic 0->Phys 3;
+ "0321": Logic 0->Phys 0; Logic 3->Phys 1; Logic 2->Phys 2; Logic 1->Phys 3;
+ "1032": Logic 1->Phys 0; Logic 0->Phys 1; Logic 3->Phys 2; Logic 2->Phys 3;
+ "2103": Logic 2->Phys 0; Logic 1->Phys 1; Logic 0->Phys 2; Logic 3->Phys 3;
+ "3210": Logic 3->Phys 0; Logic 2->Phys 1; Logic 1->Phys 2; Logic 0->Phys 3;
DSI PHY:
Required properties:
@@ -131,6 +156,7 @@ Example:
port {
dsi0_out: endpoint {
remote-endpoint = <&panel_in>;
+ lanes = <0 1 2 3>;
};
};
};
diff --git a/Documentation/devicetree/bindings/display/msm/hdmi.txt b/Documentation/devicetree/bindings/display/msm/hdmi.txt
index 379ee2ea9a3d8..b63f614e0c045 100644
--- a/Documentation/devicetree/bindings/display/msm/hdmi.txt
+++ b/Documentation/devicetree/bindings/display/msm/hdmi.txt
@@ -11,6 +11,7 @@ Required properties:
- reg: Physical base address and length of the controller's registers
- reg-names: "core_physical"
- interrupts: The interrupt signal from the hdmi block.
+- power-domains: Should be <&mmcc MDSS_GDSC>.
- clocks: device clocks
See ../clocks/clock-bindings.txt for details.
- qcom,hdmi-tx-ddc-clk-gpio: ddc clk pin
@@ -18,6 +19,8 @@ Required properties:
- qcom,hdmi-tx-hpd-gpio: hpd pin
- core-vdda-supply: phandle to supply regulator
- hdmi-mux-supply: phandle to mux regulator
+- phys: the phandle for the HDMI PHY device
+- phy-names: the name of the corresponding PHY device
Optional properties:
- qcom,hdmi-tx-mux-en-gpio: hdmi mux enable pin
@@ -27,15 +30,38 @@ Optional properties:
- pinctrl-0: the default pinctrl state (active)
- pinctrl-1: the "sleep" pinctrl state
+HDMI PHY:
+Required properties:
+- compatible: Could be the following
+ * "qcom,hdmi-phy-8660"
+ * "qcom,hdmi-phy-8960"
+ * "qcom,hdmi-phy-8974"
+ * "qcom,hdmi-phy-8084"
+ * "qcom,hdmi-phy-8996"
+- #phy-cells: Number of cells in a PHY specifier; Should be 0.
+- reg: Physical base address and length of the registers of the PHY sub blocks.
+- reg-names: The names of register regions. The following regions are required:
+ * "hdmi_phy"
+ * "hdmi_pll"
+ For HDMI PHY on msm8996, these additional register regions are required:
+ * "hdmi_tx_l0"
+ * "hdmi_tx_l1"
+ * "hdmi_tx_l3"
+ * "hdmi_tx_l4"
+- power-domains: Should be <&mmcc MDSS_GDSC>.
+- clocks: device clocks
+ See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details.
+- core-vdda-supply: phandle to vdda regulator device node
+
Example:
/ {
...
- hdmi: qcom,hdmi-tx-8960@4a00000 {
+ hdmi: hdmi@4a00000 {
compatible = "qcom,hdmi-tx-8960";
reg-names = "core_physical";
- reg = <0x04a00000 0x1000>;
+ reg = <0x04a00000 0x2f0>;
interrupts = ;
power-domains = <&mmcc MDSS_GDSC>;
clock-names =
@@ -54,5 +80,21 @@ Example:
pinctrl-names = "default", "sleep";
pinctrl-0 = <&hpd_active &ddc_active &cec_active>;
pinctrl-1 = <&hpd_suspend &ddc_suspend &cec_suspend>;
+
+ phys = <&hdmi_phy>;
+ phy-names = "hdmi_phy";
+ };
+
+ hdmi_phy: phy@4a00400 {
+ compatible = "qcom,hdmi-phy-8960";
+ reg-names = "hdmi_phy",
+ "hdmi_pll";
+ reg = <0x4a00400 0x60>,
+ <0x4a00500 0x100>;
+ #phy-cells = <0>;
+ power-domains = <&mmcc MDSS_GDSC>;
+ clock-names = "slave_iface_clk";
+ clocks = <&mmcc HDMI_S_AHB_CLK>;
+ core-vdda-supply = <&pm8921_hdmi_mvs>;
};
};
diff --git a/Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt b/Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt
new file mode 100644
index 0000000000000..8c5de692c55ce
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/lg,lp120up1.txt
@@ -0,0 +1,7 @@
+LG 12.0" (1920x1280 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "lg,lp120up1"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt b/Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt
new file mode 100644
index 0000000000000..088a6cea5015f
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/panel/urt,umsh-8596md.txt
@@ -0,0 +1,16 @@
+United Radiant Technology UMSH-8596MD-xT 7.0" WVGA TFT LCD panel
+
+Supported are LVDS versions (-11T, -19T) and parallel ones
+(-T, -1T, -7T, -20T).
+
+Required properties:
+- compatible: should be one of:
+ "urt,umsh-8596md-t",
+ "urt,umsh-8596md-1t",
+ "urt,umsh-8596md-7t",
+ "urt,umsh-8596md-11t",
+ "urt,umsh-8596md-19t",
+ "urt,umsh-8596md-20t".
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
diff --git a/Documentation/devicetree/bindings/display/renesas,du.txt b/Documentation/devicetree/bindings/display/renesas,du.txt
index eccd4f4867b29..0d30e42e40bea 100644
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@@ -8,6 +8,7 @@ Required Properties:
- "renesas,du-r8a7791" for R8A7791 (R-Car M2-W) compatible DU
- "renesas,du-r8a7793" for R8A7793 (R-Car M2-N) compatible DU
- "renesas,du-r8a7794" for R8A7794 (R-Car E2) compatible DU
+ - "renesas,du-r8a7795" for R8A7795 (R-Car H3) compatible DU
- reg: A list of base address and length of each memory resource, one for
each entry in the reg-names property.
@@ -24,7 +25,7 @@ Required Properties:
- clock-names: Name of the clocks. This property is model-dependent.
- R8A7779 uses a single functional clock. The clock doesn't need to be
named.
- - R8A779[0134] use one functional clock per channel and one clock per LVDS
+ - R8A779[01345] use one functional clock per channel and one clock per LVDS
encoder (if available). The functional clocks must be named "du.x" with
"x" being the channel numerical index. The LVDS clocks must be named
"lvds.x" with "x" being the LVDS encoder numerical index.
@@ -41,13 +42,14 @@ bindings specified in Documentation/devicetree/bindings/graph.txt.
The following table lists for each supported model the port number
corresponding to each DU output.
- Port 0 Port1 Port2
+ Port 0 Port1 Port2 Port3
-----------------------------------------------------------------------------
- R8A7779 (H1) DPAD 0 DPAD 1 -
- R8A7790 (H2) DPAD LVDS 0 LVDS 1
- R8A7791 (M2-W) DPAD LVDS 0 -
- R8A7793 (M2-N) DPAD LVDS 0 -
- R8A7794 (E2) DPAD 0 DPAD 1 -
+ R8A7779 (H1) DPAD 0 DPAD 1 - -
+ R8A7790 (H2) DPAD LVDS 0 LVDS 1 -
+ R8A7791 (M2-W) DPAD LVDS 0 - -
+ R8A7793 (M2-N) DPAD LVDS 0 - -
+ R8A7794 (E2) DPAD 0 DPAD 1 - -
+ R8A7795 (H3) DPAD HDMI 0 HDMI 1 LVDS
Example: R8A7790 (R-Car H2) DU
diff --git a/Documentation/devicetree/bindings/display/rockchip/inno_hdmi-rockchip.txt b/Documentation/devicetree/bindings/display/rockchip/inno_hdmi-rockchip.txt
new file mode 100644
index 0000000000000..8096a29f9776a
--- /dev/null
+++ b/Documentation/devicetree/bindings/display/rockchip/inno_hdmi-rockchip.txt
@@ -0,0 +1,50 @@
+Rockchip specific extensions to the Innosilicon HDMI
+================================
+
+Required properties:
+- compatible:
+ "rockchip,rk3036-inno-hdmi";
+- reg:
+ Physical base address and length of the controller's registers.
+- clocks, clock-names:
+ Phandle to hdmi controller clock, name should be "pclk"
+- interrupts:
+ HDMI interrupt number
+- ports:
+ Contain one port node with endpoint definitions as defined in
+ Documentation/devicetree/bindings/graph.txt.
+- pinctrl-0, pinctrl-name:
+ Switch the iomux of HPD/CEC pins to HDMI function.
+
+Example:
+hdmi: hdmi@20034000 {
+ compatible = "rockchip,rk3036-inno-hdmi";
+ reg = <0x20034000 0x4000>;
+ interrupts = ;
+ clocks = <&cru PCLK_HDMI>;
+ clock-names = "pclk";
+ pinctrl-names = "default";
+ pinctrl-0 = <&hdmi_ctl>;
+ status = "disabled";
+
+ hdmi_in: port {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ hdmi_in_lcdc: endpoint@0 {
+ reg = <0>;
+ remote-endpoint = <&lcdc_out_hdmi>;
+ };
+ };
+};
+
+&pinctrl {
+ hdmi {
+ hdmi_ctl: hdmi-ctl {
+ rockchip,pins = <1 8 RK_FUNC_1 &pcfg_pull_none>,
+ <1 9 RK_FUNC_1 &pcfg_pull_none>,
+ <1 10 RK_FUNC_1 &pcfg_pull_none>,
+ <1 11 RK_FUNC_1 &pcfg_pull_none>;
+ };
+ };
+
+};
diff --git a/Documentation/dma-buf-sharing.txt b/Documentation/dma-buf-sharing.txt
index 480c8de3c2c44..32ac32e773e11 100644
--- a/Documentation/dma-buf-sharing.txt
+++ b/Documentation/dma-buf-sharing.txt
@@ -257,17 +257,15 @@ Access to a dma_buf from the kernel context involves three steps:
Interface:
int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
- size_t start, size_t len,
enum dma_data_direction direction)
This allows the exporter to ensure that the memory is actually available for
cpu access - the exporter might need to allocate or swap-in and pin the
backing storage. The exporter also needs to ensure that cpu access is
- coherent for the given range and access direction. The range and access
- direction can be used by the exporter to optimize the cache flushing, i.e.
- access outside of the range or with a different direction (read instead of
- write) might return stale or even bogus data (e.g. when the exporter needs to
- copy the data to temporary storage).
+ coherent for the access direction. The direction can be used by the exporter
+ to optimize the cache flushing, i.e. access with a different direction (read
+ instead of write) might return stale or even bogus data (e.g. when the
+ exporter needs to copy the data to temporary storage).
This step might fail, e.g. in oom conditions.
@@ -322,14 +320,13 @@ Access to a dma_buf from the kernel context involves three steps:
3. Finish access
- When the importer is done accessing the range specified in begin_cpu_access,
- it needs to announce this to the exporter (to facilitate cache flushing and
- unpinning of any pinned resources). The result of any dma_buf kmap calls
- after end_cpu_access is undefined.
+ When the importer is done accessing the CPU, it needs to announce this to
+ the exporter (to facilitate cache flushing and unpinning of any pinned
+ resources). The result of any dma_buf kmap calls after end_cpu_access is
+ undefined.
Interface:
void dma_buf_end_cpu_access(struct dma_buf *dma_buf,
- size_t start, size_t len,
enum dma_data_direction dir);
@@ -353,7 +350,26 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
handles, too). So it's beneficial to support this in a similar fashion on
dma-buf to have a good transition path for existing Android userspace.
- No special interfaces, userspace simply calls mmap on the dma-buf fd.
+ No special interfaces, userspace simply calls mmap on the dma-buf fd, making
+ sure that the cache synchronization ioctl (DMA_BUF_IOCTL_SYNC) is *always*
+ used when the access happens. This is discussed next paragraphs.
+
+ Some systems might need some sort of cache coherency management e.g. when
+ CPU and GPU domains are being accessed through dma-buf at the same time. To
+ circumvent this problem there are begin/end coherency markers, that forward
+ directly to existing dma-buf device drivers vfunc hooks. Userspace can make
+ use of those markers through the DMA_BUF_IOCTL_SYNC ioctl. The sequence
+ would be used like following:
+ - mmap dma-buf fd
+ - for each drawing/upload cycle in CPU 1. SYNC_START ioctl, 2. read/write
+ to mmap area 3. SYNC_END ioctl. This can be repeated as often as you
+ want (with the new data being consumed by the GPU or say scanout device)
+ - munmap once you don't need the buffer any more
+
+ Therefore, for correctness and optimal performance, systems with the memory
+ cache shared by the GPU and CPU i.e. the "coherent" and also the
+ "incoherent" are always required to use SYNC_START and SYNC_END before and
+ after, respectively, when accessing the mapped address.
2. Supporting existing mmap interfaces in importers
diff --git a/MAINTAINERS b/MAINTAINERS
index da636eac9258c..29d9779dc3d23 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -847,6 +847,12 @@ S: Maintained
F: drivers/net/arcnet/
F: include/uapi/linux/if_arcnet.h
+ARM HDLCD DRM DRIVER
+M: Liviu Dudau
+S: Supported
+F: drivers/gpu/drm/arm/
+F: Documentation/devicetree/bindings/display/arm,hdlcd.txt
+
ARM MFM AND FLOPPY DRIVERS
M: Ian Molton
S: Maintained
@@ -3754,7 +3760,7 @@ F: drivers/gpu/vga/
F: include/drm/
F: include/uapi/drm/
-RADEON DRM DRIVERS
+RADEON and AMDGPU DRM DRIVERS
M: Alex Deucher
M: Christian König
L: dri-devel@lists.freedesktop.org
@@ -3762,6 +3768,8 @@ T: git git://people.freedesktop.org/~agd5f/linux
S: Supported
F: drivers/gpu/drm/radeon/
F: include/uapi/drm/radeon*
+F: drivers/gpu/drm/amd/
+F: include/uapi/drm/amdgpu*
DRM PANEL DRIVERS
M: Thierry Reding
@@ -3806,7 +3814,7 @@ F: include/drm/exynos*
F: include/uapi/drm/exynos*
DRM DRIVERS FOR FREESCALE DCU
-M: Jianwei Wang
+M: Stefan Agner
M: Alison Wang
L: dri-devel@lists.freedesktop.org
S: Supported
diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
index 1341a94cc7793..aef87fdbd1879 100644
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@@ -555,8 +555,10 @@ static unsigned int intel_gtt_mappable_entries(void)
static void intel_gtt_teardown_scratch_page(void)
{
set_pages_wb(intel_private.scratch_page, 1);
- pci_unmap_page(intel_private.pcidev, intel_private.scratch_page_dma,
- PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+ if (intel_private.needs_dmar)
+ pci_unmap_page(intel_private.pcidev,
+ intel_private.scratch_page_dma,
+ PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
__free_page(intel_private.scratch_page);
}
@@ -1346,16 +1348,6 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
{
int i, mask;
- /*
- * Can be called from the fake agp driver but also directly from
- * drm/i915.ko. Hence we need to check whether everything is set up
- * already.
- */
- if (intel_private.driver) {
- intel_private.refcount++;
- return 1;
- }
-
for (i = 0; intel_gtt_chipsets[i].name != NULL; i++) {
if (gpu_pdev) {
if (gpu_pdev->device ==
@@ -1376,16 +1368,26 @@ int intel_gmch_probe(struct pci_dev *bridge_pdev, struct pci_dev *gpu_pdev,
if (!intel_private.driver)
return 0;
- intel_private.refcount++;
-
#if IS_ENABLED(CONFIG_AGP_INTEL)
if (bridge) {
+ if (INTEL_GTT_GEN > 1)
+ return 0;
+
bridge->driver = &intel_fake_agp_driver;
bridge->dev_private_data = &intel_private;
bridge->dev = bridge_pdev;
}
#endif
+
+ /*
+ * Can be called from the fake agp driver but also directly from
+ * drm/i915.ko. Hence we need to check whether everything is set up
+ * already.
+ */
+ if (intel_private.refcount++)
+ return 1;
+
intel_private.bridge_dev = pci_dev_get(bridge_pdev);
dev_info(&bridge_pdev->dev, "Intel %s Chipset\n", intel_gtt_chipsets[i].name);
@@ -1430,6 +1432,8 @@ void intel_gmch_remove(void)
if (--intel_private.refcount)
return;
+ if (intel_private.scratch_page)
+ intel_gtt_teardown_scratch_page();
if (intel_private.pcidev)
pci_dev_put(intel_private.pcidev);
if (intel_private.bridge_dev)
diff --git a/drivers/dma-buf/dma-buf.c b/drivers/dma-buf/dma-buf.c
index 155c1464948e7..9810d1df0691f 100644
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@@ -34,6 +34,8 @@
#include
#include
+#include
+
static inline int is_dma_buf_file(struct file *);
struct dma_buf_list {
@@ -251,11 +253,54 @@ static unsigned int dma_buf_poll(struct file *file, poll_table *poll)
return events;
}
+static long dma_buf_ioctl(struct file *file,
+ unsigned int cmd, unsigned long arg)
+{
+ struct dma_buf *dmabuf;
+ struct dma_buf_sync sync;
+ enum dma_data_direction direction;
+
+ dmabuf = file->private_data;
+
+ switch (cmd) {
+ case DMA_BUF_IOCTL_SYNC:
+ if (copy_from_user(&sync, (void __user *) arg, sizeof(sync)))
+ return -EFAULT;
+
+ if (sync.flags & ~DMA_BUF_SYNC_VALID_FLAGS_MASK)
+ return -EINVAL;
+
+ switch (sync.flags & DMA_BUF_SYNC_RW) {
+ case DMA_BUF_SYNC_READ:
+ direction = DMA_FROM_DEVICE;
+ break;
+ case DMA_BUF_SYNC_WRITE:
+ direction = DMA_TO_DEVICE;
+ break;
+ case DMA_BUF_SYNC_RW:
+ direction = DMA_BIDIRECTIONAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (sync.flags & DMA_BUF_SYNC_END)
+ dma_buf_end_cpu_access(dmabuf, direction);
+ else
+ dma_buf_begin_cpu_access(dmabuf, direction);
+
+ return 0;
+ default:
+ return -ENOTTY;
+ }
+}
+
static const struct file_operations dma_buf_fops = {
.release = dma_buf_release,
.mmap = dma_buf_mmap_internal,
.llseek = dma_buf_llseek,
.poll = dma_buf_poll,
+ .unlocked_ioctl = dma_buf_ioctl,
};
/*
@@ -539,13 +584,11 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
* preparations. Coherency is only guaranteed in the specified range for the
* specified access direction.
* @dmabuf: [in] buffer to prepare cpu access for.
- * @start: [in] start of range for cpu access.
- * @len: [in] length of range for cpu access.
* @direction: [in] length of range for cpu access.
*
* Can return negative error values, returns 0 on success.
*/
-int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+int dma_buf_begin_cpu_access(struct dma_buf *dmabuf,
enum dma_data_direction direction)
{
int ret = 0;
@@ -554,8 +597,7 @@ int dma_buf_begin_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
return -EINVAL;
if (dmabuf->ops->begin_cpu_access)
- ret = dmabuf->ops->begin_cpu_access(dmabuf, start,
- len, direction);
+ ret = dmabuf->ops->begin_cpu_access(dmabuf, direction);
return ret;
}
@@ -567,19 +609,17 @@ EXPORT_SYMBOL_GPL(dma_buf_begin_cpu_access);
* actions. Coherency is only guaranteed in the specified range for the
* specified access direction.
* @dmabuf: [in] buffer to complete cpu access for.
- * @start: [in] start of range for cpu access.
- * @len: [in] length of range for cpu access.
* @direction: [in] length of range for cpu access.
*
* This call must always succeed.
*/
-void dma_buf_end_cpu_access(struct dma_buf *dmabuf, size_t start, size_t len,
+void dma_buf_end_cpu_access(struct dma_buf *dmabuf,
enum dma_data_direction direction)
{
WARN_ON(!dmabuf);
if (dmabuf->ops->end_cpu_access)
- dmabuf->ops->end_cpu_access(dmabuf, start, len, direction);
+ dmabuf->ops->end_cpu_access(dmabuf, direction);
}
EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 8ae7ab68cb978..f2a74d0b68ae6 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -25,6 +25,14 @@ config DRM_MIPI_DSI
bool
depends on DRM
+config DRM_DP_AUX_CHARDEV
+ bool "DRM DP AUX Interface"
+ depends on DRM
+ help
+ Choose this option to enable a /dev/drm_dp_auxN node that allows to
+ read and write values to arbitrary DPCD registers on the DP aux
+ channel.
+
config DRM_KMS_HELPER
tristate
depends on DRM
@@ -106,6 +114,8 @@ config DRM_TDFX
Choose this option if you have a 3dfx Banshee or Voodoo3 (or later),
graphics card. If M is selected, the module will be called tdfx.
+source "drivers/gpu/drm/arm/Kconfig"
+
config DRM_R128
tristate "ATI Rage 128"
depends on DRM && PCI
@@ -162,6 +172,8 @@ config DRM_AMDGPU
source "drivers/gpu/drm/amd/amdgpu/Kconfig"
source "drivers/gpu/drm/amd/powerplay/Kconfig"
+source "drivers/gpu/drm/amd/acp/Kconfig"
+
source "drivers/gpu/drm/nouveau/Kconfig"
config DRM_I810
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 61766dec6a8d3..6eb94fc561dc2 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -22,10 +22,13 @@ drm-$(CONFIG_OF) += drm_of.o
drm-$(CONFIG_AGP) += drm_agpsupport.o
drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
- drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o
+ drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
+ drm_kms_helper_common.o
+
drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
+drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
@@ -33,6 +36,7 @@ CFLAGS_drm_trace_points.o := -I$(src)
obj-$(CONFIG_DRM) += drm.o
obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
+obj-$(CONFIG_DRM_ARM) += arm/
obj-$(CONFIG_DRM_TTM) += ttm/
obj-$(CONFIG_DRM_TDFX) += tdfx/
obj-$(CONFIG_DRM_R128) += r128/
diff --git a/drivers/gpu/drm/amd/acp/Kconfig b/drivers/gpu/drm/amd/acp/Kconfig
new file mode 100644
index 0000000000000..0f734ee052743
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Kconfig
@@ -0,0 +1,10 @@
+menu "ACP Configuration"
+
+config DRM_AMD_ACP
+ bool "Enable ACP IP support"
+ select MFD_CORE
+ select PM_GENERIC_DOMAINS if PM
+ help
+ Choose this option to enable ACP IP support for AMD SOCs.
+
+endmenu
diff --git a/drivers/gpu/drm/amd/acp/Makefile b/drivers/gpu/drm/amd/acp/Makefile
new file mode 100644
index 0000000000000..8363cb57915b0
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the ACP, which is a sub-component
+# of AMDSOC/AMDGPU drm driver.
+# It provides the HW control for ACP related functionalities.
+
+subdir-ccflags-y += -I$(AMDACPPATH)/ -I$(AMDACPPATH)/include
+
+AMD_ACP_FILES := $(AMDACPPATH)/acp_hw.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c b/drivers/gpu/drm/amd/acp/acp_hw.c
similarity index 64%
rename from drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c
rename to drivers/gpu/drm/amd/acp/acp_hw.c
index 341dc560acbbf..7af83f142b4b8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm206.c
+++ b/drivers/gpu/drm/amd/acp/acp_hw.c
@@ -1,5 +1,5 @@
/*
- * Copyright 2015 Red Hat Inc.
+ * Copyright 2015 Advanced Micro Devices, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
@@ -19,30 +19,32 @@
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*
- * Authors: Ben Skeggs
*/
-#include "gf100.h"
-#include "ctxgf100.h"
-
-#include
-
-static const struct gf100_gr_func
-gm206_gr = {
- .init = gm204_gr_init,
- .mmio = gm204_gr_pack_mmio,
- .ppc_nr = 2,
- .grctx = &gm206_grctx,
- .sclass = {
- { -1, -1, FERMI_TWOD_A },
- { -1, -1, KEPLER_INLINE_TO_MEMORY_B },
- { -1, -1, MAXWELL_B, &gf100_fermi },
- { -1, -1, MAXWELL_COMPUTE_B },
- {}
- }
-};
-
-int
-gm206_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
+
+#include
+#include
+#include
+#include
+#include
+
+#include "acp_gfx_if.h"
+
+#define ACP_MODE_I2S 0
+#define ACP_MODE_AZ 1
+
+#define mmACP_AZALIA_I2S_SELECT 0x51d4
+
+int amd_acp_hw_init(void *cgs_device,
+ unsigned acp_version_major, unsigned acp_version_minor)
{
- return gf100_gr_new_(&gm206_gr, device, index, pgr);
+ unsigned int acp_mode = ACP_MODE_I2S;
+
+ if ((acp_version_major == 2) && (acp_version_minor == 2))
+ acp_mode = cgs_read_register(cgs_device,
+ mmACP_AZALIA_I2S_SELECT);
+
+ if (acp_mode != ACP_MODE_I2S)
+ return -ENODEV;
+
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
new file mode 100644
index 0000000000000..bccf47b638991
--- /dev/null
+++ b/drivers/gpu/drm/amd/acp/include/acp_gfx_if.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+*/
+
+#ifndef _ACP_GFX_IF_H
+#define _ACP_GFX_IF_H
+
+#include
+#include "cgs_linux.h"
+#include "cgs_common.h"
+
+int amd_acp_hw_init(void *cgs_device,
+ unsigned acp_version_major, unsigned acp_version_minor);
+
+#endif /* _ACP_GFX_IF_H */
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 20c9539abc36e..c7fcdcedaadbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -8,7 +8,8 @@ ccflags-y := -Iinclude/drm -I$(FULL_AMD_PATH)/include/asic_reg \
-I$(FULL_AMD_PATH)/include \
-I$(FULL_AMD_PATH)/amdgpu \
-I$(FULL_AMD_PATH)/scheduler \
- -I$(FULL_AMD_PATH)/powerplay/inc
+ -I$(FULL_AMD_PATH)/powerplay/inc \
+ -I$(FULL_AMD_PATH)/acp/include
amdgpu-y := amdgpu_drv.o
@@ -20,7 +21,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
amdgpu_fb.o amdgpu_gem.o amdgpu_ring.o \
amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
- atombios_encoders.o amdgpu_semaphore.o amdgpu_sa.o atombios_i2c.o \
+ atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o
@@ -92,7 +93,17 @@ amdgpu-y += amdgpu_cgs.o
amdgpu-y += \
../scheduler/gpu_scheduler.o \
../scheduler/sched_fence.o \
- amdgpu_sched.o
+ amdgpu_job.o
+
+# ACP componet
+ifneq ($(CONFIG_DRM_AMD_ACP),)
+amdgpu-y += amdgpu_acp.o
+
+AMDACPPATH := ../acp
+include $(FULL_AMD_PATH)/acp/Makefile
+
+amdgpu-y += $(AMD_ACP_FILES)
+endif
amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5e7770f9a415b..c4a21c6428f5f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -53,6 +53,7 @@
#include "amdgpu_ucode.h"
#include "amdgpu_gds.h"
#include "amd_powerplay.h"
+#include "amdgpu_acp.h"
#include "gpu_scheduler.h"
@@ -74,7 +75,6 @@ extern int amdgpu_dpm;
extern int amdgpu_smc_load_fw;
extern int amdgpu_aspm;
extern int amdgpu_runtime_pm;
-extern int amdgpu_hard_reset;
extern unsigned amdgpu_ip_block_mask;
extern int amdgpu_bapm;
extern int amdgpu_deep_color;
@@ -82,10 +82,8 @@ extern int amdgpu_vm_size;
extern int amdgpu_vm_block_size;
extern int amdgpu_vm_fault_stop;
extern int amdgpu_vm_debug;
-extern int amdgpu_enable_scheduler;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
-extern int amdgpu_enable_semaphores;
extern int amdgpu_powerplay;
extern unsigned amdgpu_pcie_gen_cap;
extern unsigned amdgpu_pcie_lane_cap;
@@ -108,9 +106,6 @@ extern unsigned amdgpu_pcie_lane_cap;
/* max number of IP instances */
#define AMDGPU_MAX_SDMA_INSTANCES 2
-/* number of hw syncs before falling back on blocking */
-#define AMDGPU_NUM_SYNCS 4
-
/* hardcode that limit for now */
#define AMDGPU_VA_RESERVED_SIZE (8 << 20)
@@ -146,11 +141,9 @@ extern unsigned amdgpu_pcie_lane_cap;
#define CIK_CURSOR_HEIGHT 128
struct amdgpu_device;
-struct amdgpu_fence;
struct amdgpu_ib;
struct amdgpu_vm;
struct amdgpu_ring;
-struct amdgpu_semaphore;
struct amdgpu_cs_parser;
struct amdgpu_job;
struct amdgpu_irq_src;
@@ -248,7 +241,7 @@ struct amdgpu_vm_pte_funcs {
unsigned count);
/* write pte one entry at a time with addr mapping */
void (*write_pte)(struct amdgpu_ib *ib,
- uint64_t pe,
+ const dma_addr_t *pages_addr, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
/* for linear pte/pde updates without addr mapping */
@@ -256,8 +249,6 @@ struct amdgpu_vm_pte_funcs {
uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags);
- /* pad the indirect buffer to the necessary number of dw */
- void (*pad_ib)(struct amdgpu_ib *ib);
};
/* provided by the gmc block */
@@ -295,12 +286,11 @@ struct amdgpu_ring_funcs {
struct amdgpu_ib *ib);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags);
- bool (*emit_semaphore)(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait);
+ void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
uint64_t pd_addr);
void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+ void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size,
@@ -310,6 +300,8 @@ struct amdgpu_ring_funcs {
int (*test_ib)(struct amdgpu_ring *ring);
/* insert NOP packets */
void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
+ /* pad the indirect buffer to the necessary number of dw */
+ void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
};
/*
@@ -355,13 +347,15 @@ struct amdgpu_fence_driver {
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
- uint64_t sync_seq[AMDGPU_MAX_RINGS];
- atomic64_t last_seq;
+ uint32_t sync_seq;
+ atomic_t last_seq;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
struct timer_list fallback_timer;
- wait_queue_head_t fence_queue;
+ unsigned num_fences_mask;
+ spinlock_t lock;
+ struct fence **fences;
};
/* some special values for the owner field */
@@ -371,19 +365,6 @@ struct amdgpu_fence_driver {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
-struct amdgpu_fence {
- struct fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
- uint64_t seq;
-
- /* filp or special value for fence creator */
- void *owner;
-
- wait_queue_t fence_wake;
-};
-
struct amdgpu_user_fence {
/* write-back bo */
struct amdgpu_bo *bo;
@@ -395,24 +376,18 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
- struct amdgpu_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
void amdgpu_fence_process(struct amdgpu_ring *ring);
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
-bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
- struct amdgpu_ring *ring);
-void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
- struct amdgpu_ring *ring);
-
/*
* TTM.
*/
@@ -431,6 +406,8 @@ struct amdgpu_mman {
/* buffer handling */
const struct amdgpu_buffer_funcs *buffer_funcs;
struct amdgpu_ring *buffer_funcs_ring;
+ /* Scheduler entity for buffer moves */
+ struct amd_sched_entity entity;
};
int amdgpu_copy_buffer(struct amdgpu_ring *ring,
@@ -445,9 +422,9 @@ struct amdgpu_bo_list_entry {
struct amdgpu_bo *robj;
struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va;
- unsigned prefered_domains;
- unsigned allowed_domains;
uint32_t priority;
+ struct page **user_pages;
+ int user_invalidated;
};
struct amdgpu_bo_va_mapping {
@@ -459,7 +436,6 @@ struct amdgpu_bo_va_mapping {
/* bo virtual addresses in a specific vm */
struct amdgpu_bo_va {
- struct mutex mutex;
/* protected by bo being reserved */
struct list_head bo_list;
struct fence *last_pt_update;
@@ -483,7 +459,8 @@ struct amdgpu_bo {
/* Protected by gem.mutex */
struct list_head list;
/* Protected by tbo.reserved */
- u32 initial_domain;
+ u32 prefered_domains;
+ u32 allowed_domains;
struct ttm_place placements[AMDGPU_GEM_DOMAIN_MAX + 1];
struct ttm_placement placement;
struct ttm_buffer_object tbo;
@@ -505,7 +482,6 @@ struct amdgpu_bo {
struct amdgpu_bo *parent;
struct ttm_bo_kmap_obj dma_buf_vmap;
- pid_t pid;
struct amdgpu_mn *mn;
struct list_head mn_list;
};
@@ -554,11 +530,14 @@ int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
* Assumption is that there won't be hole (all object on same
* alignment).
*/
+
+#define AMDGPU_SA_NUM_FENCE_LISTS 32
+
struct amdgpu_sa_manager {
wait_queue_head_t wq;
struct amdgpu_bo *bo;
struct list_head *hole;
- struct list_head flist[AMDGPU_MAX_RINGS];
+ struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
struct list_head olist;
unsigned size;
uint64_t gpu_addr;
@@ -580,13 +559,7 @@ struct amdgpu_sa_bo {
/*
* GEM objects.
*/
-struct amdgpu_gem {
- struct mutex mutex;
- struct list_head objects;
-};
-
-int amdgpu_gem_init(struct amdgpu_device *adev);
-void amdgpu_gem_fini(struct amdgpu_device *adev);
+void amdgpu_gem_force_release(struct amdgpu_device *adev);
int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
int alignment, u32 initial_domain,
u64 flags, bool kernel,
@@ -598,32 +571,10 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
int amdgpu_mode_dumb_mmap(struct drm_file *filp,
struct drm_device *dev,
uint32_t handle, uint64_t *offset_p);
-
-/*
- * Semaphores.
- */
-struct amdgpu_semaphore {
- struct amdgpu_sa_bo *sa_bo;
- signed waiters;
- uint64_t gpu_addr;
-};
-
-int amdgpu_semaphore_create(struct amdgpu_device *adev,
- struct amdgpu_semaphore **semaphore);
-bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore);
-bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore);
-void amdgpu_semaphore_free(struct amdgpu_device *adev,
- struct amdgpu_semaphore **semaphore,
- struct fence *fence);
-
/*
* Synchronization
*/
struct amdgpu_sync {
- struct amdgpu_semaphore *semaphores[AMDGPU_NUM_SYNCS];
- struct fence *sync_to[AMDGPU_MAX_RINGS];
DECLARE_HASHTABLE(fences, 4);
struct fence *last_vm_update;
};
@@ -635,12 +586,11 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
struct reservation_object *resv,
void *owner);
-int amdgpu_sync_rings(struct amdgpu_sync *sync,
- struct amdgpu_ring *ring);
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_wait(struct amdgpu_sync *sync);
-void amdgpu_sync_free(struct amdgpu_device *adev, struct amdgpu_sync *sync,
- struct fence *fence);
+void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
/*
* GART structures, functions & helpers
@@ -758,6 +708,7 @@ struct amdgpu_flip_work {
struct fence *excl;
unsigned shared_count;
struct fence **shared;
+ struct fence_cb cb;
};
@@ -770,12 +721,11 @@ struct amdgpu_ib {
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
- struct amdgpu_ring *ring;
- struct amdgpu_fence *fence;
struct amdgpu_user_fence *user;
struct amdgpu_vm *vm;
+ unsigned vm_id;
+ uint64_t vm_pd_addr;
struct amdgpu_ctx *ctx;
- struct amdgpu_sync sync;
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
uint32_t oa_base, oa_size;
@@ -794,13 +744,14 @@ enum amdgpu_ring_type {
extern struct amd_sched_backend_ops amdgpu_sched_ops;
-int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_ib *ibs,
- unsigned num_ibs,
- int (*free_job)(struct amdgpu_job *),
- void *owner,
- struct fence **fence);
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+ struct amdgpu_job **job);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+ struct amdgpu_job **job);
+void amdgpu_job_free(struct amdgpu_job *job);
+int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct amd_sched_entity *entity, void *owner,
+ struct fence **f);
struct amdgpu_ring {
struct amdgpu_device *adev;
@@ -809,7 +760,6 @@ struct amdgpu_ring {
struct amd_gpu_scheduler sched;
spinlock_t fence_lock;
- struct mutex *ring_lock;
struct amdgpu_bo *ring_obj;
volatile uint32_t *ring;
unsigned rptr_offs;
@@ -818,7 +768,7 @@ struct amdgpu_ring {
unsigned wptr;
unsigned wptr_old;
unsigned ring_size;
- unsigned ring_free_dw;
+ unsigned max_dw;
int count_dw;
uint64_t gpu_addr;
uint32_t align_mask;
@@ -826,8 +776,6 @@ struct amdgpu_ring {
bool ready;
u32 nop;
u32 idx;
- u64 last_semaphore_signal_addr;
- u64 last_semaphore_wait_addr;
u32 me;
u32 pipe;
u32 queue;
@@ -840,7 +788,6 @@ struct amdgpu_ring {
struct amdgpu_ctx *current_ctx;
enum amdgpu_ring_type type;
char name[16];
- bool is_pte_ring;
};
/*
@@ -884,13 +831,14 @@ struct amdgpu_vm_pt {
};
struct amdgpu_vm_id {
- unsigned id;
- uint64_t pd_gpu_addr;
+ struct amdgpu_vm_manager_id *mgr_id;
+ uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct fence *flushed_updates;
+ struct fence *flushed_updates;
};
struct amdgpu_vm {
+ /* tree of virtual addresses mapped */
struct rb_root va;
/* protecting invalidated */
@@ -915,30 +863,47 @@ struct amdgpu_vm {
/* for id and flush management per ring */
struct amdgpu_vm_id ids[AMDGPU_MAX_RINGS];
- /* for interval tree */
- spinlock_t it_lock;
+
/* protecting freed */
spinlock_t freed_lock;
+
+ /* Scheduler entity for page table updates */
+ struct amd_sched_entity entity;
+};
+
+struct amdgpu_vm_manager_id {
+ struct list_head list;
+ struct fence *active;
+ atomic_long_t owner;
+
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_base;
+ uint32_t oa_size;
};
struct amdgpu_vm_manager {
- struct {
- struct fence *active;
- atomic_long_t owner;
- } ids[AMDGPU_NUM_VM];
+ /* Handling of VMIDs */
+ struct mutex lock;
+ unsigned num_ids;
+ struct list_head ids_lru;
+ struct amdgpu_vm_manager_id ids[AMDGPU_NUM_VM];
uint32_t max_pfn;
- /* number of VMIDs */
- unsigned nvm;
/* vram base address for page table entry */
u64 vram_base_offset;
/* is vm enabled? */
bool enabled;
/* vm pte handling */
const struct amdgpu_vm_pte_funcs *vm_pte_funcs;
- struct amdgpu_ring *vm_pte_funcs_ring;
+ struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS];
+ unsigned vm_pte_num_rings;
+ atomic_t vm_pte_next_ring;
};
+void amdgpu_vm_manager_init(struct amdgpu_device *adev);
void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
@@ -949,14 +914,15 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync);
+ struct amdgpu_sync *sync, struct fence *fence,
+ unsigned *vm_id, uint64_t *vm_pd_addr);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
- struct amdgpu_vm *vm,
- struct fence *updates);
-void amdgpu_vm_fence(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct fence *fence);
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr);
+ unsigned vm_id, uint64_t pd_addr,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
@@ -982,7 +948,6 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
uint64_t addr);
void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
struct amdgpu_bo_va *bo_va);
-int amdgpu_vm_free_job(struct amdgpu_job *job);
/*
* context related structures
@@ -1010,10 +975,6 @@ struct amdgpu_ctx_mgr {
struct idr ctx_handles;
};
-int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
- struct amdgpu_ctx *ctx);
-void amdgpu_ctx_fini(struct amdgpu_ctx *ctx);
-
struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id);
int amdgpu_ctx_put(struct amdgpu_ctx *ctx);
@@ -1048,13 +1009,15 @@ struct amdgpu_bo_list {
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj;
- bool has_userptr;
+ unsigned first_userptr;
unsigned num_entries;
struct amdgpu_bo_list_entry *array;
};
struct amdgpu_bo_list *
amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+ struct list_head *validated);
void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
@@ -1128,6 +1091,7 @@ struct amdgpu_gca_config {
unsigned multi_gpu_tile_size;
unsigned mc_arb_ramcfg;
unsigned gb_addr_config;
+ unsigned num_rbs;
uint32_t tile_mode_array[32];
uint32_t macrotile_mode_array[16];
@@ -1170,23 +1134,20 @@ struct amdgpu_gfx {
unsigned ce_ram_size;
};
-int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib);
-int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_ib *ib, void *owner);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ib, struct fence *last_vm_update,
+ struct fence **f);
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
int amdgpu_ib_ring_tests(struct amdgpu_device *adev);
-/* Ring access between begin & end cannot sleep */
-void amdgpu_ring_free_size(struct amdgpu_ring *ring);
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
-int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw);
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_ring_commit(struct amdgpu_ring *ring);
-void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring);
void amdgpu_ring_undo(struct amdgpu_ring *ring);
-void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring);
unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
uint32_t **data);
int amdgpu_ring_restore(struct amdgpu_ring *ring,
@@ -1196,7 +1157,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src, unsigned irq_type,
enum amdgpu_ring_type ring_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
/*
* CS.
@@ -1205,47 +1165,58 @@ struct amdgpu_cs_chunk {
uint32_t chunk_id;
uint32_t length_dw;
uint32_t *kdata;
- void __user *user_ptr;
};
struct amdgpu_cs_parser {
struct amdgpu_device *adev;
struct drm_file *filp;
struct amdgpu_ctx *ctx;
- struct amdgpu_bo_list *bo_list;
+
/* chunks */
unsigned nchunks;
struct amdgpu_cs_chunk *chunks;
- /* relocations */
- struct amdgpu_bo_list_entry vm_pd;
- struct list_head validated;
- struct fence *fence;
- struct amdgpu_ib *ibs;
- uint32_t num_ibs;
+ /* scheduler job object */
+ struct amdgpu_job *job;
- struct ww_acquire_ctx ticket;
+ /* buffer objects */
+ struct ww_acquire_ctx ticket;
+ struct amdgpu_bo_list *bo_list;
+ struct amdgpu_bo_list_entry vm_pd;
+ struct list_head validated;
+ struct fence *fence;
+ uint64_t bytes_moved_threshold;
+ uint64_t bytes_moved;
/* user fence */
- struct amdgpu_user_fence uf;
struct amdgpu_bo_list_entry uf_entry;
};
struct amdgpu_job {
struct amd_sched_job base;
struct amdgpu_device *adev;
+ struct amdgpu_ring *ring;
+ struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
+ struct fence *fence; /* the hw fence */
uint32_t num_ibs;
void *owner;
struct amdgpu_user_fence uf;
- int (*free_job)(struct amdgpu_job *job);
};
#define to_amdgpu_job(sched_job) \
container_of((sched_job), struct amdgpu_job, base)
-static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p, uint32_t ib_idx, int idx)
+static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
+ uint32_t ib_idx, int idx)
+{
+ return p->job->ibs[ib_idx].ptr[idx];
+}
+
+static inline void amdgpu_set_ib_value(struct amdgpu_cs_parser *p,
+ uint32_t ib_idx, int idx,
+ uint32_t value)
{
- return p->ibs[ib_idx].ptr[idx];
+ p->job->ibs[ib_idx].ptr[idx] = value;
}
/*
@@ -1497,6 +1468,7 @@ enum amdgpu_dpm_forced_level {
AMDGPU_DPM_FORCED_LEVEL_AUTO = 0,
AMDGPU_DPM_FORCED_LEVEL_LOW = 1,
AMDGPU_DPM_FORCED_LEVEL_HIGH = 2,
+ AMDGPU_DPM_FORCED_LEVEL_MANUAL = 3,
};
struct amdgpu_vce_state {
@@ -1626,6 +1598,7 @@ struct amdgpu_uvd {
struct amdgpu_ring ring;
struct amdgpu_irq_src irq;
bool address_64_bit;
+ struct amd_sched_entity entity;
};
/*
@@ -1650,6 +1623,7 @@ struct amdgpu_vce {
struct amdgpu_ring ring[AMDGPU_MAX_VCE_RINGS];
struct amdgpu_irq_src irq;
unsigned harvest_config;
+ struct amd_sched_entity entity;
};
/*
@@ -1883,6 +1857,18 @@ void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
void amdgpu_cgs_destroy_device(void *cgs_device);
+/*
+ * CGS
+ */
+void *amdgpu_cgs_create_device(struct amdgpu_device *adev);
+void amdgpu_cgs_destroy_device(void *cgs_device);
+
+
+/* GPU virtualization */
+struct amdgpu_virtualization {
+ bool supports_sr_iov;
+};
+
/*
* Core structure, functions and helpers.
*/
@@ -1903,6 +1889,10 @@ struct amdgpu_device {
struct drm_device *ddev;
struct pci_dev *pdev;
+#ifdef CONFIG_DRM_AMD_ACP
+ struct amdgpu_acp acp;
+#endif
+
/* ASIC */
enum amd_asic_type asic_type;
uint32_t family;
@@ -1979,7 +1969,6 @@ struct amdgpu_device {
/* memory management */
struct amdgpu_mman mman;
- struct amdgpu_gem gem;
struct amdgpu_vram_scratch vram_scratch;
struct amdgpu_wb wb;
atomic64_t vram_usage;
@@ -1997,7 +1986,6 @@ struct amdgpu_device {
/* rings */
unsigned fence_context;
- struct mutex ring_lock;
unsigned num_rings;
struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
bool ib_pool_ready;
@@ -2009,6 +1997,7 @@ struct amdgpu_device {
/* powerplay */
struct amd_powerplay powerplay;
bool pp_enabled;
+ bool pp_force_state_enabled;
/* dpm */
struct amdgpu_pm pm;
@@ -2025,7 +2014,6 @@ struct amdgpu_device {
struct amdgpu_sdma sdma;
/* uvd */
- bool has_uvd;
struct amdgpu_uvd uvd;
/* vce */
@@ -2050,8 +2038,7 @@ struct amdgpu_device {
/* amdkfd interface */
struct kfd_dev *kfd;
- /* kernel conext for IB submission */
- struct amdgpu_ctx kernel_ctx;
+ struct amdgpu_virtualization virtualization;
};
bool amdgpu_device_is_px(struct drm_device *dev);
@@ -2072,20 +2059,6 @@ void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v);
u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
-/*
- * Cast helper
- */
-extern const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
-{
- struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
-
- if (__f->base.ops == &amdgpu_fence_ops)
- return __f;
-
- return NULL;
-}
-
/*
* Registers read & write functions.
*/
@@ -2156,7 +2129,6 @@ static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
ring->ring[ring->wptr++] = v;
ring->wptr &= ring->ptr_mask;
ring->count_dw--;
- ring->ring_free_dw--;
}
static inline struct amdgpu_sdma_instance *
@@ -2192,9 +2164,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_gart_flush_gpu_tlb(adev, vmid) (adev)->gart.gart_funcs->flush_gpu_tlb((adev), (vmid))
#define amdgpu_gart_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gart.gart_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags))
#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count)))
-#define amdgpu_vm_write_pte(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (addr), (count), (incr), (flags)))
+#define amdgpu_vm_write_pte(adev, ib, pa, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pa), (pe), (addr), (count), (incr), (flags)))
#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
-#define amdgpu_vm_pad_ib(adev, ib) ((adev)->vm_manager.vm_pte_funcs->pad_ib((ib)))
#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
@@ -2202,11 +2173,13 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
-#define amdgpu_ring_emit_semaphore(r, semaphore, emit_wait) (r)->funcs->emit_semaphore((r), (semaphore), (emit_wait))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
+#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
@@ -2298,6 +2271,21 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_dpm_get_performance_level(adev) \
(adev)->powerplay.pp_funcs->get_performance_level((adev)->powerplay.pp_handle)
+#define amdgpu_dpm_get_pp_num_states(adev, data) \
+ (adev)->powerplay.pp_funcs->get_pp_num_states((adev)->powerplay.pp_handle, data)
+
+#define amdgpu_dpm_get_pp_table(adev, table) \
+ (adev)->powerplay.pp_funcs->get_pp_table((adev)->powerplay.pp_handle, table)
+
+#define amdgpu_dpm_set_pp_table(adev, buf, size) \
+ (adev)->powerplay.pp_funcs->set_pp_table((adev)->powerplay.pp_handle, buf, size)
+
+#define amdgpu_dpm_print_clock_levels(adev, type, buf) \
+ (adev)->powerplay.pp_funcs->print_clock_levels((adev)->powerplay.pp_handle, type, buf)
+
+#define amdgpu_dpm_force_clock_level(adev, type, level) \
+ (adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)
+
#define amdgpu_dpm_dispatch_task(adev, event_id, input, output) \
(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))
@@ -2308,7 +2296,6 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev);
void amdgpu_pci_config_reset(struct amdgpu_device *adev);
bool amdgpu_card_posted(struct amdgpu_device *adev);
void amdgpu_update_display_priority(struct amdgpu_device *adev);
-bool amdgpu_boot_test_post_card(struct amdgpu_device *adev);
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
@@ -2316,11 +2303,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
struct amdgpu_ring **out_ring);
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags);
bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+ int *last_invalidated);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
new file mode 100644
index 0000000000000..d6b0bff510aaa
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -0,0 +1,500 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#include
+#include
+#include
+#include
+#include
+
+#include "amdgpu.h"
+#include "atom.h"
+#include "amdgpu_acp.h"
+
+#include "acp_gfx_if.h"
+
+#define ACP_TILE_ON_MASK 0x03
+#define ACP_TILE_OFF_MASK 0x02
+#define ACP_TILE_ON_RETAIN_REG_MASK 0x1f
+#define ACP_TILE_OFF_RETAIN_REG_MASK 0x20
+
+#define ACP_TILE_P1_MASK 0x3e
+#define ACP_TILE_P2_MASK 0x3d
+#define ACP_TILE_DSP0_MASK 0x3b
+#define ACP_TILE_DSP1_MASK 0x37
+
+#define ACP_TILE_DSP2_MASK 0x2f
+
+#define ACP_DMA_REGS_END 0x146c0
+#define ACP_I2S_PLAY_REGS_START 0x14840
+#define ACP_I2S_PLAY_REGS_END 0x148b4
+#define ACP_I2S_CAP_REGS_START 0x148b8
+#define ACP_I2S_CAP_REGS_END 0x1496c
+
+#define ACP_I2S_COMP1_CAP_REG_OFFSET 0xac
+#define ACP_I2S_COMP2_CAP_REG_OFFSET 0xa8
+#define ACP_I2S_COMP1_PLAY_REG_OFFSET 0x6c
+#define ACP_I2S_COMP2_PLAY_REG_OFFSET 0x68
+
+#define mmACP_PGFSM_RETAIN_REG 0x51c9
+#define mmACP_PGFSM_CONFIG_REG 0x51ca
+#define mmACP_PGFSM_READ_REG_0 0x51cc
+
+#define mmACP_MEM_SHUT_DOWN_REQ_LO 0x51f8
+#define mmACP_MEM_SHUT_DOWN_REQ_HI 0x51f9
+#define mmACP_MEM_SHUT_DOWN_STS_LO 0x51fa
+#define mmACP_MEM_SHUT_DOWN_STS_HI 0x51fb
+
+#define ACP_TIMEOUT_LOOP 0x000000FF
+#define ACP_DEVS 3
+#define ACP_SRC_ID 162
+
+enum {
+ ACP_TILE_P1 = 0,
+ ACP_TILE_P2,
+ ACP_TILE_DSP0,
+ ACP_TILE_DSP1,
+ ACP_TILE_DSP2,
+};
+
+static int acp_sw_init(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ adev->acp.parent = adev->dev;
+
+ adev->acp.cgs_device =
+ amdgpu_cgs_create_device(adev);
+ if (!adev->acp.cgs_device)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int acp_sw_fini(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (adev->acp.cgs_device)
+ amdgpu_cgs_destroy_device(adev->acp.cgs_device);
+
+ return 0;
+}
+
+/* power off a tile/block within ACP */
+static int acp_suspend_tile(void *cgs_dev, int tile)
+{
+ u32 val = 0;
+ u32 count = 0;
+
+ if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
+ pr_err("Invalid ACP tile : %d to suspend\n", tile);
+ return -1;
+ }
+
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
+ val &= ACP_TILE_ON_MASK;
+
+ if (val == 0x0) {
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
+ val = val | (1 << tile);
+ cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
+ cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
+ 0x500 + tile);
+
+ count = ACP_TIMEOUT_LOOP;
+ while (true) {
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ + tile);
+ val = val & ACP_TILE_ON_MASK;
+ if (val == ACP_TILE_OFF_MASK)
+ break;
+ if (--count == 0) {
+ pr_err("Timeout reading ACP PGFSM status\n");
+ return -ETIMEDOUT;
+ }
+ udelay(100);
+ }
+
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
+
+ val |= ACP_TILE_OFF_RETAIN_REG_MASK;
+ cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
+ }
+ return 0;
+}
+
+/* power on a tile/block within ACP */
+static int acp_resume_tile(void *cgs_dev, int tile)
+{
+ u32 val = 0;
+ u32 count = 0;
+
+ if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) {
+ pr_err("Invalid ACP tile to resume\n");
+ return -1;
+ }
+
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile);
+ val = val & ACP_TILE_ON_MASK;
+
+ if (val != 0x0) {
+ cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG,
+ 0x600 + tile);
+ count = ACP_TIMEOUT_LOOP;
+ while (true) {
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0
+ + tile);
+ val = val & ACP_TILE_ON_MASK;
+ if (val == 0x0)
+ break;
+ if (--count == 0) {
+ pr_err("Timeout reading ACP PGFSM status\n");
+ return -ETIMEDOUT;
+ }
+ udelay(100);
+ }
+ val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG);
+ if (tile == ACP_TILE_P1)
+ val = val & (ACP_TILE_P1_MASK);
+ else if (tile == ACP_TILE_P2)
+ val = val & (ACP_TILE_P2_MASK);
+
+ cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val);
+ }
+ return 0;
+}
+
+struct acp_pm_domain {
+ void *cgs_dev;
+ struct generic_pm_domain gpd;
+};
+
+static int acp_poweroff(struct generic_pm_domain *genpd)
+{
+ int i, ret;
+ struct acp_pm_domain *apd;
+
+ apd = container_of(genpd, struct acp_pm_domain, gpd);
+ if (apd != NULL) {
+ /* Donot return abruptly if any of power tile fails to suspend.
+ * Log it and continue powering off other tile
+ */
+ for (i = 4; i >= 0 ; i--) {
+ ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
+ if (ret)
+ pr_err("ACP tile %d tile suspend failed\n", i);
+ }
+ }
+ return 0;
+}
+
+static int acp_poweron(struct generic_pm_domain *genpd)
+{
+ int i, ret;
+ struct acp_pm_domain *apd;
+
+ apd = container_of(genpd, struct acp_pm_domain, gpd);
+ if (apd != NULL) {
+ for (i = 0; i < 2; i++) {
+ ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i);
+ if (ret) {
+ pr_err("ACP tile %d resume failed\n", i);
+ break;
+ }
+ }
+
+ /* Disable DSPs which are not going to be used */
+ for (i = 0; i < 3; i++) {
+ ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i);
+ /* Continue suspending other DSP, even if one fails */
+ if (ret)
+ pr_err("ACP DSP %d suspend failed\n", i);
+ }
+ }
+ return 0;
+}
+
+static struct device *get_mfd_cell_dev(const char *device_name, int r)
+{
+ char auto_dev_name[25];
+ struct device *dev;
+
+ snprintf(auto_dev_name, sizeof(auto_dev_name),
+ "%s.%d.auto", device_name, r);
+ dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name);
+ dev_info(dev, "device %s added to pm domain\n", auto_dev_name);
+
+ return dev;
+}
+
+/**
+ * acp_hw_init - start and test ACP block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static int acp_hw_init(void *handle)
+{
+ int r, i;
+ uint64_t acp_base;
+ struct device *dev;
+ struct i2s_platform_data *i2s_pdata;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ const struct amdgpu_ip_block_version *ip_version =
+ amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_ACP);
+
+ if (!ip_version)
+ return -EINVAL;
+
+ r = amd_acp_hw_init(adev->acp.cgs_device,
+ ip_version->major, ip_version->minor);
+ /* -ENODEV means board uses AZ rather than ACP */
+ if (r == -ENODEV)
+ return 0;
+ else if (r)
+ return r;
+
+ r = cgs_get_pci_resource(adev->acp.cgs_device, CGS_RESOURCE_TYPE_MMIO,
+ 0x5289, 0, &acp_base);
+ if (r == -ENODEV)
+ return 0;
+ else if (r)
+ return r;
+
+ adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL);
+ if (adev->acp.acp_genpd == NULL)
+ return -ENOMEM;
+
+ adev->acp.acp_genpd->gpd.name = "ACP_AUDIO";
+ adev->acp.acp_genpd->gpd.power_off = acp_poweroff;
+ adev->acp.acp_genpd->gpd.power_on = acp_poweron;
+
+
+ adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device;
+
+ pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false);
+
+ adev->acp.acp_cell = kzalloc(sizeof(struct mfd_cell) * ACP_DEVS,
+ GFP_KERNEL);
+
+ if (adev->acp.acp_cell == NULL)
+ return -ENOMEM;
+
+ adev->acp.acp_res = kzalloc(sizeof(struct resource) * 4, GFP_KERNEL);
+
+ if (adev->acp.acp_res == NULL) {
+ kfree(adev->acp.acp_cell);
+ return -ENOMEM;
+ }
+
+ i2s_pdata = kzalloc(sizeof(struct i2s_platform_data) * 2, GFP_KERNEL);
+ if (i2s_pdata == NULL) {
+ kfree(adev->acp.acp_res);
+ kfree(adev->acp.acp_cell);
+ return -ENOMEM;
+ }
+
+ i2s_pdata[0].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET;
+ i2s_pdata[0].cap = DWC_I2S_PLAY;
+ i2s_pdata[0].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[0].i2s_reg_comp1 = ACP_I2S_COMP1_PLAY_REG_OFFSET;
+ i2s_pdata[0].i2s_reg_comp2 = ACP_I2S_COMP2_PLAY_REG_OFFSET;
+
+ i2s_pdata[1].quirks = DW_I2S_QUIRK_COMP_REG_OFFSET |
+ DW_I2S_QUIRK_COMP_PARAM1;
+ i2s_pdata[1].cap = DWC_I2S_RECORD;
+ i2s_pdata[1].snd_rates = SNDRV_PCM_RATE_8000_96000;
+ i2s_pdata[1].i2s_reg_comp1 = ACP_I2S_COMP1_CAP_REG_OFFSET;
+ i2s_pdata[1].i2s_reg_comp2 = ACP_I2S_COMP2_CAP_REG_OFFSET;
+
+ adev->acp.acp_res[0].name = "acp2x_dma";
+ adev->acp.acp_res[0].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[0].start = acp_base;
+ adev->acp.acp_res[0].end = acp_base + ACP_DMA_REGS_END;
+
+ adev->acp.acp_res[1].name = "acp2x_dw_i2s_play";
+ adev->acp.acp_res[1].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[1].start = acp_base + ACP_I2S_PLAY_REGS_START;
+ adev->acp.acp_res[1].end = acp_base + ACP_I2S_PLAY_REGS_END;
+
+ adev->acp.acp_res[2].name = "acp2x_dw_i2s_cap";
+ adev->acp.acp_res[2].flags = IORESOURCE_MEM;
+ adev->acp.acp_res[2].start = acp_base + ACP_I2S_CAP_REGS_START;
+ adev->acp.acp_res[2].end = acp_base + ACP_I2S_CAP_REGS_END;
+
+ adev->acp.acp_res[3].name = "acp2x_dma_irq";
+ adev->acp.acp_res[3].flags = IORESOURCE_IRQ;
+ adev->acp.acp_res[3].start = amdgpu_irq_create_mapping(adev, 162);
+ adev->acp.acp_res[3].end = adev->acp.acp_res[3].start;
+
+ adev->acp.acp_cell[0].name = "acp_audio_dma";
+ adev->acp.acp_cell[0].num_resources = 4;
+ adev->acp.acp_cell[0].resources = &adev->acp.acp_res[0];
+
+ adev->acp.acp_cell[1].name = "designware-i2s";
+ adev->acp.acp_cell[1].num_resources = 1;
+ adev->acp.acp_cell[1].resources = &adev->acp.acp_res[1];
+ adev->acp.acp_cell[1].platform_data = &i2s_pdata[0];
+ adev->acp.acp_cell[1].pdata_size = sizeof(struct i2s_platform_data);
+
+ adev->acp.acp_cell[2].name = "designware-i2s";
+ adev->acp.acp_cell[2].num_resources = 1;
+ adev->acp.acp_cell[2].resources = &adev->acp.acp_res[2];
+ adev->acp.acp_cell[2].platform_data = &i2s_pdata[1];
+ adev->acp.acp_cell[2].pdata_size = sizeof(struct i2s_platform_data);
+
+ r = mfd_add_hotplug_devices(adev->acp.parent, adev->acp.acp_cell,
+ ACP_DEVS);
+ if (r)
+ return r;
+
+ for (i = 0; i < ACP_DEVS ; i++) {
+ dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+ r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev);
+ if (r) {
+ dev_err(dev, "Failed to add dev to genpd\n");
+ return r;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * acp_hw_fini - stop the hardware block
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+static int acp_hw_fini(void *handle)
+{
+ int i, ret;
+ struct device *dev;
+
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ for (i = 0; i < ACP_DEVS ; i++) {
+ dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i);
+ ret = pm_genpd_remove_device(&adev->acp.acp_genpd->gpd, dev);
+ /* If removal fails, dont giveup and try rest */
+ if (ret)
+ dev_err(dev, "remove dev from genpd failed\n");
+ }
+
+ mfd_remove_devices(adev->acp.parent);
+ kfree(adev->acp.acp_res);
+ kfree(adev->acp.acp_genpd);
+ kfree(adev->acp.acp_cell);
+
+ return 0;
+}
+
+static int acp_suspend(void *handle)
+{
+ return 0;
+}
+
+static int acp_resume(void *handle)
+{
+ int i, ret;
+ struct acp_pm_domain *apd;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ /* SMU block will power on ACP irrespective of ACP runtime status.
+ * Power off explicitly based on genpd ACP runtime status so that ACP
+ * hw and ACP-genpd status are in sync.
+ * 'suspend_power_off' represents "Power status before system suspend"
+ */
+ if (adev->acp.acp_genpd->gpd.suspend_power_off == true) {
+ apd = container_of(&adev->acp.acp_genpd->gpd,
+ struct acp_pm_domain, gpd);
+
+ for (i = 4; i >= 0 ; i--) {
+ ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i);
+ if (ret)
+ pr_err("ACP tile %d tile suspend failed\n", i);
+ }
+ }
+ return 0;
+}
+
+static int acp_early_init(void *handle)
+{
+ return 0;
+}
+
+static bool acp_is_idle(void *handle)
+{
+ return true;
+}
+
+static int acp_wait_for_idle(void *handle)
+{
+ return 0;
+}
+
+static int acp_soft_reset(void *handle)
+{
+ return 0;
+}
+
+static void acp_print_status(void *handle)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ dev_info(adev->dev, "ACP STATUS\n");
+}
+
+static int acp_set_clockgating_state(void *handle,
+ enum amd_clockgating_state state)
+{
+ return 0;
+}
+
+static int acp_set_powergating_state(void *handle,
+ enum amd_powergating_state state)
+{
+ return 0;
+}
+
+const struct amd_ip_funcs acp_ip_funcs = {
+ .early_init = acp_early_init,
+ .late_init = NULL,
+ .sw_init = acp_sw_init,
+ .sw_fini = acp_sw_fini,
+ .hw_init = acp_hw_init,
+ .hw_fini = acp_hw_fini,
+ .suspend = acp_suspend,
+ .resume = acp_resume,
+ .is_idle = acp_is_idle,
+ .wait_for_idle = acp_wait_for_idle,
+ .soft_reset = acp_soft_reset,
+ .print_status = acp_print_status,
+ .set_clockgating_state = acp_set_clockgating_state,
+ .set_powergating_state = acp_set_powergating_state,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
new file mode 100644
index 0000000000000..f6e32a6391074
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef __AMDGPU_ACP_H__
+#define __AMDGPU_ACP_H__
+
+#include
+
+struct amdgpu_acp {
+ struct device *parent;
+ void *cgs_device;
+ struct amd_acp_private *private;
+ struct mfd_cell *acp_cell;
+ struct resource *acp_res;
+ struct acp_pm_domain *acp_genpd;
+};
+
+extern const struct amd_ip_funcs acp_ip_funcs;
+
+#endif /* __AMDGPU_ACP_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 84d68d658f8a0..32809f749903c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,25 +30,38 @@ const struct kfd2kgd_calls *kfd2kgd;
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-bool amdgpu_amdkfd_init(void)
+int amdgpu_amdkfd_init(void)
{
+ int ret;
+
#if defined(CONFIG_HSA_AMD_MODULE)
- bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+ int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
- return false;
+ return -ENOENT;
+
+ ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
+ if (ret) {
+ symbol_put(kgd2kfd_init);
+ kgd2kfd = NULL;
+ }
+
+#elif defined(CONFIG_HSA_AMD)
+ ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
+ if (ret)
+ kgd2kfd = NULL;
+
+#else
+ ret = -ENOENT;
#endif
- return true;
+
+ return ret;
}
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
{
-#if defined(CONFIG_HSA_AMD_MODULE)
- bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-#endif
-
switch (rdev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
@@ -62,35 +75,7 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
return false;
}
-#if defined(CONFIG_HSA_AMD_MODULE)
- kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
- if (kgd2kfd_init_p == NULL) {
- kfd2kgd = NULL;
- return false;
- }
-
- if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
- symbol_put(kgd2kfd_init);
- kfd2kgd = NULL;
- kgd2kfd = NULL;
-
- return false;
- }
-
return true;
-#elif defined(CONFIG_HSA_AMD)
- if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
- kfd2kgd = NULL;
- kgd2kfd = NULL;
- return false;
- }
-
- return true;
-#else
- kfd2kgd = NULL;
- return false;
-#endif
}
void amdgpu_amdkfd_fini(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a8be765542e64..de530f68d4e39 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -36,7 +36,7 @@ struct kgd_mem {
void *cpu_ptr;
};
-bool amdgpu_amdkfd_init(void);
+int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index 9416e0f5c1db2..84b0ce39ee14d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -1514,6 +1514,19 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
return -EINVAL;
}
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev)
+{
+ int index = GetIndexIntoMasterTable(DATA, GPUVirtualizationInfo);
+ u8 frev, crev;
+ u16 data_offset, size;
+
+ if (amdgpu_atom_parse_data_header(adev->mode_info.atom_context, index, &size,
+ &frev, &crev, &data_offset))
+ return true;
+
+ return false;
+}
+
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock)
{
uint32_t bios_6_scratch;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 0ebb959ea4358..9e1442053fe4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -196,6 +196,8 @@ int amdgpu_atombios_init_mc_reg_table(struct amdgpu_device *adev,
u8 module_index,
struct atom_mc_reg_table *reg_table);
+bool amdgpu_atombios_has_gpu_virtualization_table(struct amdgpu_device *adev);
+
void amdgpu_atombios_scratch_regs_lock(struct amdgpu_device *adev, bool lock);
void amdgpu_atombios_scratch_regs_init(struct amdgpu_device *adev);
void amdgpu_atombios_scratch_regs_save(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index 3c895863fcf50..0020a0ea43ffb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,6 +63,10 @@ bool amdgpu_has_atpx(void) {
return amdgpu_atpx_priv.atpx_detected;
}
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+ return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
/**
* amdgpu_atpx_call - call an ATPX method
*
@@ -142,10 +146,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
*/
static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
{
- /* make sure required functions are enabled */
- /* dGPU power control is required */
- atpx->functions.power_cntl = true;
-
if (atpx->functions.px_params) {
union acpi_object *info;
struct atpx_px_params output;
@@ -552,13 +552,14 @@ static bool amdgpu_atpx_detect(void)
void amdgpu_register_atpx_handler(void)
{
bool r;
+ enum vga_switcheroo_handler_flags_t handler_flags = 0;
/* detect if we have any ATPX + 2 VGA in the system */
r = amdgpu_atpx_detect();
if (!r)
return;
- vga_switcheroo_register_handler(&amdgpu_atpx_handler);
+ vga_switcheroo_register_handler(&amdgpu_atpx_handler, handler_flags);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index f82a2dd83874d..eacd810fc09b1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -32,6 +32,9 @@
#include "amdgpu.h"
#include "amdgpu_trace.h"
+#define AMDGPU_BO_LIST_MAX_PRIORITY 32u
+#define AMDGPU_BO_LIST_NUM_BUCKETS (AMDGPU_BO_LIST_MAX_PRIORITY + 1)
+
static int amdgpu_bo_list_create(struct amdgpu_fpriv *fpriv,
struct amdgpu_bo_list **result,
int *id)
@@ -88,8 +91,9 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo;
struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo;
- bool has_userptr = false;
+ unsigned last_entry = 0, first_userptr = num_entries;
unsigned i;
+ int r;
array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
if (!array)
@@ -97,33 +101,43 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
for (i = 0; i < num_entries; ++i) {
- struct amdgpu_bo_list_entry *entry = &array[i];
+ struct amdgpu_bo_list_entry *entry;
struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
+ struct mm_struct *usermm;
gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
- if (!gobj)
+ if (!gobj) {
+ r = -ENOENT;
goto error_free;
+ }
- entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
drm_gem_object_unreference_unlocked(gobj);
- entry->priority = info[i].bo_priority;
- entry->prefered_domains = entry->robj->initial_domain;
- entry->allowed_domains = entry->prefered_domains;
- if (entry->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
- entry->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
- if (amdgpu_ttm_tt_has_userptr(entry->robj->tbo.ttm)) {
- has_userptr = true;
- entry->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
- entry->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+
+ usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
+ if (usermm) {
+ if (usermm != current->mm) {
+ amdgpu_bo_unref(&bo);
+ r = -EPERM;
+ goto error_free;
+ }
+ entry = &array[--first_userptr];
+ } else {
+ entry = &array[last_entry++];
}
+
+ entry->robj = bo;
+ entry->priority = min(info[i].bo_priority,
+ AMDGPU_BO_LIST_MAX_PRIORITY);
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
- if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
+ if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GDS)
gds_obj = entry->robj;
- if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
+ if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_GWS)
gws_obj = entry->robj;
- if (entry->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
+ if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
oa_obj = entry->robj;
trace_amdgpu_bo_list_set(list, entry->robj);
@@ -137,15 +151,17 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
list->gds_obj = gds_obj;
list->gws_obj = gws_obj;
list->oa_obj = oa_obj;
- list->has_userptr = has_userptr;
+ list->first_userptr = first_userptr;
list->array = array;
list->num_entries = num_entries;
return 0;
error_free:
+ while (i--)
+ amdgpu_bo_unref(&array[i].robj);
drm_free_large(array);
- return -ENOENT;
+ return r;
}
struct amdgpu_bo_list *
@@ -161,6 +177,37 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
return result;
}
+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+ struct list_head *validated)
+{
+ /* This is based on the bucket sort with O(n) time complexity.
+ * An item with priority "i" is added to bucket[i]. The lists are then
+ * concatenated in descending order.
+ */
+ struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
+ unsigned i;
+
+ for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
+ INIT_LIST_HEAD(&bucket[i]);
+
+ /* Since buffers which appear sooner in the relocation list are
+ * likely to be used more often than buffers which appear later
+ * in the list, the sort mustn't change the ordering of buffers
+ * with the same priority, i.e. it must be stable.
+ */
+ for (i = 0; i < list->num_entries; i++) {
+ unsigned priority = list->array[i].priority;
+
+ list_add_tail(&list->array[i].tv.head,
+ &bucket[priority]);
+ list->array[i].user_pages = NULL;
+ }
+
+ /* Connect the sorted buckets in the output list. */
+ for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
+ list_splice(&bucket[i], validated);
+}
+
void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
{
mutex_unlock(&list->lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index b882e8175615f..9392e50a7ba48 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -25,52 +25,12 @@
* Jerome Glisse
*/
#include
+#include
#include
#include
#include "amdgpu.h"
#include "amdgpu_trace.h"
-#define AMDGPU_CS_MAX_PRIORITY 32u
-#define AMDGPU_CS_NUM_BUCKETS (AMDGPU_CS_MAX_PRIORITY + 1)
-
-/* This is based on the bucket sort with O(n) time complexity.
- * An item with priority "i" is added to bucket[i]. The lists are then
- * concatenated in descending order.
- */
-struct amdgpu_cs_buckets {
- struct list_head bucket[AMDGPU_CS_NUM_BUCKETS];
-};
-
-static void amdgpu_cs_buckets_init(struct amdgpu_cs_buckets *b)
-{
- unsigned i;
-
- for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++)
- INIT_LIST_HEAD(&b->bucket[i]);
-}
-
-static void amdgpu_cs_buckets_add(struct amdgpu_cs_buckets *b,
- struct list_head *item, unsigned priority)
-{
- /* Since buffers which appear sooner in the relocation list are
- * likely to be used more often than buffers which appear later
- * in the list, the sort mustn't change the ordering of buffers
- * with the same priority, i.e. it must be stable.
- */
- list_add_tail(item, &b->bucket[min(priority, AMDGPU_CS_MAX_PRIORITY)]);
-}
-
-static void amdgpu_cs_buckets_get_list(struct amdgpu_cs_buckets *b,
- struct list_head *out_list)
-{
- unsigned i;
-
- /* Connect the sorted buckets in the output list. */
- for (i = 0; i < AMDGPU_CS_NUM_BUCKETS; i++) {
- list_splice(&b->bucket[i], out_list);
- }
-}
-
int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
u32 ip_instance, u32 ring,
struct amdgpu_ring **out_ring)
@@ -128,6 +88,7 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
}
static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
+ struct amdgpu_user_fence *uf,
struct drm_amdgpu_cs_chunk_fence *fence_data)
{
struct drm_gem_object *gobj;
@@ -139,20 +100,19 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
if (gobj == NULL)
return -EINVAL;
- p->uf.bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
- p->uf.offset = fence_data->offset;
+ uf->bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ uf->offset = fence_data->offset;
- if (amdgpu_ttm_tt_has_userptr(p->uf.bo->tbo.ttm)) {
+ if (amdgpu_ttm_tt_get_usermm(uf->bo->tbo.ttm)) {
drm_gem_object_unreference_unlocked(gobj);
return -EINVAL;
}
- p->uf_entry.robj = amdgpu_bo_ref(p->uf.bo);
- p->uf_entry.prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
- p->uf_entry.allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
+ p->uf_entry.robj = amdgpu_bo_ref(uf->bo);
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.shared = true;
+ p->uf_entry.user_pages = NULL;
drm_gem_object_unreference_unlocked(gobj);
return 0;
@@ -160,11 +120,12 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
{
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
union drm_amdgpu_cs *cs = data;
uint64_t *chunk_array_user;
uint64_t *chunk_array;
- struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- unsigned size;
+ struct amdgpu_user_fence uf = {};
+ unsigned size, num_ibs = 0;
int i;
int ret;
@@ -181,15 +142,12 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_chunk;
}
- p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
-
/* get chunks */
- INIT_LIST_HEAD(&p->validated);
chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
ret = -EFAULT;
- goto put_bo_list;
+ goto put_ctx;
}
p->nchunks = cs->in.num_chunks;
@@ -197,7 +155,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
GFP_KERNEL);
if (!p->chunks) {
ret = -ENOMEM;
- goto put_bo_list;
+ goto put_ctx;
}
for (i = 0; i < p->nchunks; i++) {
@@ -217,7 +175,6 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
size = p->chunks[i].length_dw;
cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
- p->chunks[i].user_ptr = cdata;
p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
if (p->chunks[i].kdata == NULL) {
@@ -233,7 +190,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
switch (p->chunks[i].chunk_id) {
case AMDGPU_CHUNK_ID_IB:
- p->num_ibs++;
+ ++num_ibs;
break;
case AMDGPU_CHUNK_ID_FENCE:
@@ -243,7 +200,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
goto free_partial_kdata;
}
- ret = amdgpu_cs_user_fence_chunk(p, (void *)p->chunks[i].kdata);
+ ret = amdgpu_cs_user_fence_chunk(p, &uf, (void *)p->chunks[i].kdata);
if (ret)
goto free_partial_kdata;
@@ -258,12 +215,11 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
}
-
- p->ibs = kcalloc(p->num_ibs, sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!p->ibs) {
- ret = -ENOMEM;
+ ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job);
+ if (ret)
goto free_all_kdata;
- }
+
+ p->job->uf = uf;
kfree(chunk_array);
return 0;
@@ -274,9 +230,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
for (; i >= 0; i--)
drm_free_large(p->chunks[i].kdata);
kfree(p->chunks);
-put_bo_list:
- if (p->bo_list)
- amdgpu_bo_list_put(p->bo_list);
+put_ctx:
amdgpu_ctx_put(p->ctx);
free_chunk:
kfree(chunk_array);
@@ -336,96 +290,198 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
return max(bytes_moved_threshold, 1024*1024ull);
}
-int amdgpu_cs_list_validate(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
+int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
struct list_head *validated)
{
struct amdgpu_bo_list_entry *lobj;
- struct amdgpu_bo *bo;
- u64 bytes_moved = 0, initial_bytes_moved;
- u64 bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(adev);
+ u64 initial_bytes_moved;
int r;
list_for_each_entry(lobj, validated, tv.head) {
- bo = lobj->robj;
- if (!bo->pin_count) {
- u32 domain = lobj->prefered_domains;
- u32 current_domain =
- amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
-
- /* Check if this buffer will be moved and don't move it
- * if we have moved too many buffers for this IB already.
- *
- * Note that this allows moving at least one buffer of
- * any size, because it doesn't take the current "bo"
- * into account. We don't want to disallow buffer moves
- * completely.
- */
- if ((lobj->allowed_domains & current_domain) != 0 &&
- (domain & current_domain) == 0 && /* will be moved */
- bytes_moved > bytes_moved_threshold) {
- /* don't move it */
- domain = current_domain;
- }
+ struct amdgpu_bo *bo = lobj->robj;
+ bool binding_userptr = false;
+ struct mm_struct *usermm;
+ uint32_t domain;
+
+ usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
+ if (usermm && usermm != current->mm)
+ return -EPERM;
+
+ /* Check if we have user pages and nobody bound the BO already */
+ if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
+ size_t size = sizeof(struct page *);
+
+ size *= bo->tbo.ttm->num_pages;
+ memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
+ binding_userptr = true;
+ }
- retry:
- amdgpu_ttm_placement_from_domain(bo, domain);
- initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
- r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
- bytes_moved += atomic64_read(&adev->num_bytes_moved) -
- initial_bytes_moved;
-
- if (unlikely(r)) {
- if (r != -ERESTARTSYS && domain != lobj->allowed_domains) {
- domain = lobj->allowed_domains;
- goto retry;
- }
- return r;
+ if (bo->pin_count)
+ continue;
+
+ /* Avoid moving this one if we have moved too many buffers
+ * for this IB already.
+ *
+ * Note that this allows moving at least one buffer of
+ * any size, because it doesn't take the current "bo"
+ * into account. We don't want to disallow buffer moves
+ * completely.
+ */
+ if (p->bytes_moved <= p->bytes_moved_threshold)
+ domain = bo->prefered_domains;
+ else
+ domain = bo->allowed_domains;
+
+ retry:
+ amdgpu_ttm_placement_from_domain(bo, domain);
+ initial_bytes_moved = atomic64_read(&bo->adev->num_bytes_moved);
+ r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+ p->bytes_moved += atomic64_read(&bo->adev->num_bytes_moved) -
+ initial_bytes_moved;
+
+ if (unlikely(r)) {
+ if (r != -ERESTARTSYS && domain != bo->allowed_domains) {
+ domain = bo->allowed_domains;
+ goto retry;
}
+ return r;
+ }
+
+ if (binding_userptr) {
+ drm_free_large(lobj->user_pages);
+ lobj->user_pages = NULL;
}
- lobj->bo_va = amdgpu_vm_bo_find(vm, bo);
}
return 0;
}
-static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
+static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_cs_buckets buckets;
+ struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
bool need_mmap_lock = false;
- int i, r;
+ unsigned i, tries = 10;
+ int r;
- if (p->bo_list) {
- need_mmap_lock = p->bo_list->has_userptr;
- amdgpu_cs_buckets_init(&buckets);
- for (i = 0; i < p->bo_list->num_entries; i++)
- amdgpu_cs_buckets_add(&buckets, &p->bo_list->array[i].tv.head,
- p->bo_list->array[i].priority);
+ INIT_LIST_HEAD(&p->validated);
- amdgpu_cs_buckets_get_list(&buckets, &p->validated);
+ p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
+ if (p->bo_list) {
+ need_mmap_lock = p->bo_list->first_userptr !=
+ p->bo_list->num_entries;
+ amdgpu_bo_list_get_list(p->bo_list, &p->validated);
}
INIT_LIST_HEAD(&duplicates);
amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);
- if (p->uf.bo)
+ if (p->job->uf.bo)
list_add(&p->uf_entry.tv.head, &p->validated);
if (need_mmap_lock)
down_read(¤t->mm->mmap_sem);
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
- if (unlikely(r != 0))
- goto error_reserve;
+ while (1) {
+ struct list_head need_pages;
+ unsigned i;
+
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+ &duplicates);
+ if (unlikely(r != 0))
+ goto error_free_pages;
+
+ /* Without a BO list we don't have userptr BOs */
+ if (!p->bo_list)
+ break;
+
+ INIT_LIST_HEAD(&need_pages);
+ for (i = p->bo_list->first_userptr;
+ i < p->bo_list->num_entries; ++i) {
+
+ e = &p->bo_list->array[i];
+
+ if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
+ &e->user_invalidated) && e->user_pages) {
+
+ /* We acquired a page array, but somebody
+ * invalidated it. Free it an try again
+ */
+ release_pages(e->user_pages,
+ e->robj->tbo.ttm->num_pages,
+ false);
+ drm_free_large(e->user_pages);
+ e->user_pages = NULL;
+ }
+
+ if (e->robj->tbo.ttm->state != tt_bound &&
+ !e->user_pages) {
+ list_del(&e->tv.head);
+ list_add(&e->tv.head, &need_pages);
+
+ amdgpu_bo_unreserve(e->robj);
+ }
+ }
+
+ if (list_empty(&need_pages))
+ break;
+
+ /* Unreserve everything again. */
+ ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+ /* We tried to often, just abort */
+ if (!--tries) {
+ r = -EDEADLK;
+ goto error_free_pages;
+ }
+
+ /* Fill the page arrays for all useptrs. */
+ list_for_each_entry(e, &need_pages, tv.head) {
+ struct ttm_tt *ttm = e->robj->tbo.ttm;
+
+ e->user_pages = drm_calloc_large(ttm->num_pages,
+ sizeof(struct page*));
+ if (!e->user_pages) {
+ r = -ENOMEM;
+ goto error_free_pages;
+ }
+
+ r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
+ if (r) {
+ drm_free_large(e->user_pages);
+ e->user_pages = NULL;
+ goto error_free_pages;
+ }
+ }
+
+ /* And try again. */
+ list_splice(&need_pages, &p->validated);
+ }
amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
- r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &duplicates);
+ p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+ p->bytes_moved = 0;
+
+ r = amdgpu_cs_list_validate(p, &duplicates);
+ if (r)
+ goto error_validate;
+
+ r = amdgpu_cs_list_validate(p, &p->validated);
if (r)
goto error_validate;
- r = amdgpu_cs_list_validate(p->adev, &fpriv->vm, &p->validated);
+ if (p->bo_list) {
+ struct amdgpu_vm *vm = &fpriv->vm;
+ unsigned i;
+
+ for (i = 0; i < p->bo_list->num_entries; i++) {
+ struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+
+ p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
+ }
+ }
error_validate:
if (r) {
@@ -433,10 +489,26 @@ static int amdgpu_cs_parser_relocs(struct amdgpu_cs_parser *p)
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
}
-error_reserve:
+error_free_pages:
+
if (need_mmap_lock)
up_read(¤t->mm->mmap_sem);
+ if (p->bo_list) {
+ for (i = p->bo_list->first_userptr;
+ i < p->bo_list->num_entries; ++i) {
+ e = &p->bo_list->array[i];
+
+ if (!e->user_pages)
+ continue;
+
+ release_pages(e->user_pages,
+ e->robj->tbo.ttm->num_pages,
+ false);
+ drm_free_large(e->user_pages);
+ }
+ }
+
return r;
}
@@ -447,7 +519,7 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
list_for_each_entry(e, &p->validated, tv.head) {
struct reservation_object *resv = e->robj->tbo.resv;
- r = amdgpu_sync_resv(p->adev, &p->ibs[0].sync, resv, p->filp);
+ r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp);
if (r)
return r;
@@ -510,11 +582,8 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, bo
for (i = 0; i < parser->nchunks; i++)
drm_free_large(parser->chunks[i].kdata);
kfree(parser->chunks);
- if (parser->ibs)
- for (i = 0; i < parser->num_ibs; i++)
- amdgpu_ib_free(parser->adev, &parser->ibs[i]);
- kfree(parser->ibs);
- amdgpu_bo_unref(&parser->uf.bo);
+ if (parser->job)
+ amdgpu_job_free(parser->job);
amdgpu_bo_unref(&parser->uf_entry.robj);
}
@@ -530,7 +599,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
if (r)
return r;
- r = amdgpu_sync_fence(adev, &p->ibs[0].sync, vm->page_directory_fence);
+ r = amdgpu_sync_fence(adev, &p->job->sync, vm->page_directory_fence);
if (r)
return r;
@@ -556,14 +625,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
return r;
f = bo_va->last_pt_update;
- r = amdgpu_sync_fence(adev, &p->ibs[0].sync, f);
+ r = amdgpu_sync_fence(adev, &p->job->sync, f);
if (r)
return r;
}
}
- r = amdgpu_vm_clear_invalids(adev, vm, &p->ibs[0].sync);
+ r = amdgpu_vm_clear_invalids(adev, vm, &p->job->sync);
if (amdgpu_vm_debug && p->bo_list) {
/* Invalidate all BOs to test for userspace bugs */
@@ -581,29 +650,25 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p,
}
static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
- struct amdgpu_cs_parser *parser)
+ struct amdgpu_cs_parser *p)
{
- struct amdgpu_fpriv *fpriv = parser->filp->driver_priv;
+ struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
- struct amdgpu_ring *ring;
+ struct amdgpu_ring *ring = p->job->ring;
int i, r;
- if (parser->num_ibs == 0)
- return 0;
-
/* Only for UVD/VCE VM emulation */
- for (i = 0; i < parser->num_ibs; i++) {
- ring = parser->ibs[i].ring;
- if (ring->funcs->parse_cs) {
- r = amdgpu_ring_parse_cs(ring, parser, i);
+ if (ring->funcs->parse_cs) {
+ for (i = 0; i < p->job->num_ibs; i++) {
+ r = amdgpu_ring_parse_cs(ring, p, i);
if (r)
return r;
}
}
- r = amdgpu_bo_vm_update_pte(parser, vm);
+ r = amdgpu_bo_vm_update_pte(p, vm);
if (!r)
- amdgpu_cs_sync_rings(parser);
+ amdgpu_cs_sync_rings(p);
return r;
}
@@ -626,14 +691,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
int i, j;
int r;
- for (i = 0, j = 0; i < parser->nchunks && j < parser->num_ibs; i++) {
+ for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) {
struct amdgpu_cs_chunk *chunk;
struct amdgpu_ib *ib;
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct amdgpu_ring *ring;
chunk = &parser->chunks[i];
- ib = &parser->ibs[j];
+ ib = &parser->job->ibs[j];
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
@@ -645,6 +710,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
if (r)
return r;
+ if (parser->job->ring && parser->job->ring != ring)
+ return -EINVAL;
+
+ parser->job->ring = ring;
+
if (ring->funcs->parse_cs) {
struct amdgpu_bo_va_mapping *m;
struct amdgpu_bo *aobj = NULL;
@@ -673,7 +743,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
kptr += chunk_ib->va_start - offset;
- r = amdgpu_ib_get(ring, NULL, chunk_ib->ib_bytes, ib);
+ r = amdgpu_ib_get(adev, NULL, chunk_ib->ib_bytes, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -682,7 +752,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
amdgpu_bo_kunmap(aobj);
} else {
- r = amdgpu_ib_get(ring, vm, 0, ib);
+ r = amdgpu_ib_get(adev, vm, 0, ib);
if (r) {
DRM_ERROR("Failed to get ib !\n");
return r;
@@ -697,15 +767,12 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
j++;
}
- if (!parser->num_ibs)
- return 0;
-
/* add GDS resources to first IB */
if (parser->bo_list) {
struct amdgpu_bo *gds = parser->bo_list->gds_obj;
struct amdgpu_bo *gws = parser->bo_list->gws_obj;
struct amdgpu_bo *oa = parser->bo_list->oa_obj;
- struct amdgpu_ib *ib = &parser->ibs[0];
+ struct amdgpu_ib *ib = &parser->job->ibs[0];
if (gds) {
ib->gds_base = amdgpu_bo_gpu_offset(gds);
@@ -721,15 +788,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
}
}
/* wrap the last IB with user fence */
- if (parser->uf.bo) {
- struct amdgpu_ib *ib = &parser->ibs[parser->num_ibs - 1];
+ if (parser->job->uf.bo) {
+ struct amdgpu_ib *ib = &parser->job->ibs[parser->job->num_ibs - 1];
/* UVD & VCE fw doesn't support user fences */
- if (ib->ring->type == AMDGPU_RING_TYPE_UVD ||
- ib->ring->type == AMDGPU_RING_TYPE_VCE)
+ if (parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
+ parser->job->ring->type == AMDGPU_RING_TYPE_VCE)
return -EINVAL;
- ib->user = &parser->uf;
+ ib->user = &parser->job->uf;
}
return 0;
@@ -739,14 +806,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
struct amdgpu_cs_parser *p)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
- struct amdgpu_ib *ib;
int i, j, r;
- if (!p->num_ibs)
- return 0;
-
- /* Add dependencies to first IB */
- ib = &p->ibs[0];
for (i = 0; i < p->nchunks; ++i) {
struct drm_amdgpu_cs_chunk_dep *deps;
struct amdgpu_cs_chunk *chunk;
@@ -784,7 +845,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
return r;
} else if (fence) {
- r = amdgpu_sync_fence(adev, &ib->sync, fence);
+ r = amdgpu_sync_fence(adev, &p->job->sync,
+ fence);
fence_put(fence);
amdgpu_ctx_put(ctx);
if (r)
@@ -796,15 +858,36 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,
return 0;
}
-static int amdgpu_cs_free_job(struct amdgpu_job *job)
+static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
+ union drm_amdgpu_cs *cs)
{
- int i;
- if (job->ibs)
- for (i = 0; i < job->num_ibs; i++)
- amdgpu_ib_free(job->adev, &job->ibs[i]);
- kfree(job->ibs);
- if (job->uf.bo)
- amdgpu_bo_unref(&job->uf.bo);
+ struct amdgpu_ring *ring = p->job->ring;
+ struct amd_sched_fence *fence;
+ struct amdgpu_job *job;
+
+ job = p->job;
+ p->job = NULL;
+
+ job->base.sched = &ring->sched;
+ job->base.s_entity = &p->ctx->rings[ring->idx].entity;
+ job->owner = p->filp;
+
+ fence = amd_sched_fence_create(job->base.s_entity, p->filp);
+ if (!fence) {
+ amdgpu_job_free(job);
+ return -ENOMEM;
+ }
+
+ job->base.s_fence = fence;
+ p->fence = fence_get(&fence->base);
+
+ cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring,
+ &fence->base);
+ job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
+
+ trace_amdgpu_cs_ioctl(job);
+ amd_sched_entity_push_job(&job->base);
+
return 0;
}
@@ -829,7 +912,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
r = amdgpu_cs_handle_lockup(adev, r);
return r;
}
- r = amdgpu_cs_parser_relocs(&parser);
+ r = amdgpu_cs_parser_bos(&parser, data);
if (r == -ENOMEM)
DRM_ERROR("Not enough memory for command submission!\n");
else if (r && r != -ERESTARTSYS)
@@ -848,68 +931,14 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (r)
goto out;
- for (i = 0; i < parser.num_ibs; i++)
+ for (i = 0; i < parser.job->num_ibs; i++)
trace_amdgpu_cs(&parser, i);
r = amdgpu_cs_ib_vm_chunk(adev, &parser);
if (r)
goto out;
- if (amdgpu_enable_scheduler && parser.num_ibs) {
- struct amdgpu_ring * ring = parser.ibs->ring;
- struct amd_sched_fence *fence;
- struct amdgpu_job *job;
-
- job = kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
- if (!job) {
- r = -ENOMEM;
- goto out;
- }
-
- job->base.sched = &ring->sched;
- job->base.s_entity = &parser.ctx->rings[ring->idx].entity;
- job->adev = parser.adev;
- job->owner = parser.filp;
- job->free_job = amdgpu_cs_free_job;
-
- job->ibs = parser.ibs;
- job->num_ibs = parser.num_ibs;
- parser.ibs = NULL;
- parser.num_ibs = 0;
-
- if (job->ibs[job->num_ibs - 1].user) {
- job->uf = parser.uf;
- job->ibs[job->num_ibs - 1].user = &job->uf;
- parser.uf.bo = NULL;
- }
-
- fence = amd_sched_fence_create(job->base.s_entity,
- parser.filp);
- if (!fence) {
- r = -ENOMEM;
- amdgpu_cs_free_job(job);
- kfree(job);
- goto out;
- }
- job->base.s_fence = fence;
- parser.fence = fence_get(&fence->base);
-
- cs->out.handle = amdgpu_ctx_add_fence(parser.ctx, ring,
- &fence->base);
- job->ibs[job->num_ibs - 1].sequence = cs->out.handle;
-
- trace_amdgpu_cs_ioctl(job);
- amd_sched_entity_push_job(&job->base);
-
- } else {
- struct amdgpu_fence *fence;
-
- r = amdgpu_ib_schedule(adev, parser.num_ibs, parser.ibs,
- parser.filp);
- fence = parser.ibs[parser.num_ibs - 1].fence;
- parser.fence = fence_get(&fence->base);
- cs->out.handle = parser.ibs[parser.num_ibs - 1].sequence;
- }
+ r = amdgpu_cs_submit(&parser, cs);
out:
amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
@@ -980,30 +1009,36 @@ struct amdgpu_bo_va_mapping *
amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
uint64_t addr, struct amdgpu_bo **bo)
{
- struct amdgpu_bo_list_entry *reloc;
struct amdgpu_bo_va_mapping *mapping;
+ unsigned i;
+
+ if (!parser->bo_list)
+ return NULL;
addr /= AMDGPU_GPU_PAGE_SIZE;
- list_for_each_entry(reloc, &parser->validated, tv.head) {
- if (!reloc->bo_va)
+ for (i = 0; i < parser->bo_list->num_entries; i++) {
+ struct amdgpu_bo_list_entry *lobj;
+
+ lobj = &parser->bo_list->array[i];
+ if (!lobj->bo_va)
continue;
- list_for_each_entry(mapping, &reloc->bo_va->valids, list) {
+ list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
if (mapping->it.start > addr ||
addr > mapping->it.last)
continue;
- *bo = reloc->bo_va->bo;
+ *bo = lobj->bo_va->bo;
return mapping;
}
- list_for_each_entry(mapping, &reloc->bo_va->invalids, list) {
+ list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
if (mapping->it.start > addr ||
addr > mapping->it.last)
continue;
- *bo = reloc->bo_va->bo;
+ *bo = lobj->bo_va->bo;
return mapping;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 17d1fb12128a2..17e13621fae96 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -25,8 +25,7 @@
#include
#include "amdgpu.h"
-int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
- struct amdgpu_ctx *ctx)
+static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx)
{
unsigned i, j;
int r;
@@ -35,44 +34,38 @@ int amdgpu_ctx_init(struct amdgpu_device *adev, enum amd_sched_priority pri,
ctx->adev = adev;
kref_init(&ctx->refcount);
spin_lock_init(&ctx->ring_lock);
- ctx->fences = kzalloc(sizeof(struct fence *) * amdgpu_sched_jobs *
- AMDGPU_MAX_RINGS, GFP_KERNEL);
+ ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
+ sizeof(struct fence*), GFP_KERNEL);
if (!ctx->fences)
return -ENOMEM;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
ctx->rings[i].sequence = 1;
- ctx->rings[i].fences = (void *)ctx->fences + sizeof(struct fence *) *
- amdgpu_sched_jobs * i;
+ ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
}
- if (amdgpu_enable_scheduler) {
- /* create context entity for each ring */
- for (i = 0; i < adev->num_rings; i++) {
- struct amd_sched_rq *rq;
- if (pri >= AMD_SCHED_MAX_PRIORITY) {
- kfree(ctx->fences);
- return -EINVAL;
- }
- rq = &adev->rings[i]->sched.sched_rq[pri];
- r = amd_sched_entity_init(&adev->rings[i]->sched,
- &ctx->rings[i].entity,
- rq, amdgpu_sched_jobs);
- if (r)
- break;
- }
-
- if (i < adev->num_rings) {
- for (j = 0; j < i; j++)
- amd_sched_entity_fini(&adev->rings[j]->sched,
- &ctx->rings[j].entity);
- kfree(ctx->fences);
- return r;
- }
+ /* create context entity for each ring */
+ for (i = 0; i < adev->num_rings; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ struct amd_sched_rq *rq;
+
+ rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+ r = amd_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
+ rq, amdgpu_sched_jobs);
+ if (r)
+ break;
+ }
+
+ if (i < adev->num_rings) {
+ for (j = 0; j < i; j++)
+ amd_sched_entity_fini(&adev->rings[j]->sched,
+ &ctx->rings[j].entity);
+ kfree(ctx->fences);
+ return r;
}
return 0;
}
-void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
+static void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
{
struct amdgpu_device *adev = ctx->adev;
unsigned i, j;
@@ -85,11 +78,9 @@ void amdgpu_ctx_fini(struct amdgpu_ctx *ctx)
fence_put(ctx->rings[i].fences[j]);
kfree(ctx->fences);
- if (amdgpu_enable_scheduler) {
- for (i = 0; i < adev->num_rings; i++)
- amd_sched_entity_fini(&adev->rings[i]->sched,
- &ctx->rings[i].entity);
- }
+ for (i = 0; i < adev->num_rings; i++)
+ amd_sched_entity_fini(&adev->rings[i]->sched,
+ &ctx->rings[i].entity);
}
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
@@ -112,7 +103,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
return r;
}
*id = (uint32_t)r;
- r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_NORMAL, ctx);
+ r = amdgpu_ctx_init(adev, ctx);
if (r) {
idr_remove(&mgr->ctx_handles, *id);
*id = 0;
@@ -200,18 +191,18 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
id = args->in.ctx_id;
switch (args->in.op) {
- case AMDGPU_CTX_OP_ALLOC_CTX:
- r = amdgpu_ctx_alloc(adev, fpriv, &id);
- args->out.alloc.ctx_id = id;
- break;
- case AMDGPU_CTX_OP_FREE_CTX:
- r = amdgpu_ctx_free(fpriv, id);
- break;
- case AMDGPU_CTX_OP_QUERY_STATE:
- r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
- break;
- default:
- return -EINVAL;
+ case AMDGPU_CTX_OP_ALLOC_CTX:
+ r = amdgpu_ctx_alloc(adev, fpriv, &id);
+ args->out.alloc.ctx_id = id;
+ break;
+ case AMDGPU_CTX_OP_FREE_CTX:
+ r = amdgpu_ctx_free(fpriv, id);
+ break;
+ case AMDGPU_CTX_OP_QUERY_STATE:
+ r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
+ break;
+ default:
+ return -EINVAL;
}
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 51bfc114584ed..612117478b570 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -62,6 +62,12 @@ static const char *amdgpu_asic_name[] = {
"LAST",
};
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
bool amdgpu_device_is_px(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
@@ -635,31 +641,6 @@ bool amdgpu_card_posted(struct amdgpu_device *adev)
}
-/**
- * amdgpu_boot_test_post_card - check and possibly initialize the hw
- *
- * @adev: amdgpu_device pointer
- *
- * Check if the asic is initialized and if not, attempt to initialize
- * it (all asics).
- * Returns true if initialized or false if not.
- */
-bool amdgpu_boot_test_post_card(struct amdgpu_device *adev)
-{
- if (amdgpu_card_posted(adev))
- return true;
-
- if (adev->bios) {
- DRM_INFO("GPU not posted. posting now...\n");
- if (adev->is_atom_bios)
- amdgpu_atom_asic_init(adev->mode_info.atom_context);
- return true;
- } else {
- dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
- return false;
- }
-}
-
/**
* amdgpu_dummy_page_init - init dummy page used by the driver
*
@@ -959,12 +940,6 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
amdgpu_sched_jobs);
amdgpu_sched_jobs = roundup_pow_of_two(amdgpu_sched_jobs);
}
- /* vramlimit must be a power of two */
- if (!amdgpu_check_pot_argument(amdgpu_vram_limit)) {
- dev_warn(adev->dev, "vram limit (%d) must be a power of 2\n",
- amdgpu_vram_limit);
- amdgpu_vram_limit = 0;
- }
if (amdgpu_gart_size != -1) {
/* gtt size must be power of two and greater or equal to 32M */
@@ -1434,7 +1409,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
adev->mman.buffer_funcs = NULL;
adev->mman.buffer_funcs_ring = NULL;
adev->vm_manager.vm_pte_funcs = NULL;
- adev->vm_manager.vm_pte_funcs_ring = NULL;
+ adev->vm_manager.vm_pte_num_rings = 0;
adev->gart.gart_funcs = NULL;
adev->fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
@@ -1455,9 +1430,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
/* mutex initialization are all done here so we
* can recall function without having locking issues */
- mutex_init(&adev->ring_lock);
+ mutex_init(&adev->vm_manager.lock);
atomic_set(&adev->irq.ih.lock, 0);
- mutex_init(&adev->gem.mutex);
mutex_init(&adev->pm.mutex);
mutex_init(&adev->gfx.gpu_clock_mutex);
mutex_init(&adev->srbm_mutex);
@@ -1511,7 +1485,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_runtime_pm == 1)
runtime = true;
- if (amdgpu_device_is_px(ddev))
+ if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
runtime = true;
vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
if (runtime)
@@ -1531,8 +1505,13 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
+ /* See if the asic supports SR-IOV */
+ adev->virtualization.supports_sr_iov =
+ amdgpu_atombios_has_gpu_virtualization_table(adev);
+
/* Post card if necessary */
- if (!amdgpu_card_posted(adev)) {
+ if (!amdgpu_card_posted(adev) ||
+ adev->virtualization.supports_sr_iov) {
if (!adev->bios) {
dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
return -EINVAL;
@@ -1577,11 +1556,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
return r;
}
- r = amdgpu_ctx_init(adev, AMD_SCHED_PRIORITY_KERNEL, &adev->kernel_ctx);
- if (r) {
- dev_err(adev->dev, "failed to create kernel context (%d).\n", r);
- return r;
- }
r = amdgpu_ib_ring_tests(adev);
if (r)
DRM_ERROR("ib ring test failed (%d).\n", r);
@@ -1645,7 +1619,6 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
adev->shutdown = true;
/* evict vram memory */
amdgpu_bo_evict_vram(adev);
- amdgpu_ctx_fini(&adev->kernel_ctx);
amdgpu_ib_pool_fini(adev);
amdgpu_fence_driver_fini(adev);
amdgpu_fbdev_fini(adev);
@@ -1894,6 +1867,9 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
retry:
r = amdgpu_asic_reset(adev);
+ /* post card */
+ amdgpu_atom_asic_init(adev->mode_info.atom_context);
+
if (!r) {
dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
r = amdgpu_resume(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 1846d65b72859..f0ed974bd4e09 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -35,32 +35,30 @@
#include
#include
-static void amdgpu_flip_wait_fence(struct amdgpu_device *adev,
- struct fence **f)
+static void amdgpu_flip_callback(struct fence *f, struct fence_cb *cb)
{
- struct amdgpu_fence *fence;
- long r;
+ struct amdgpu_flip_work *work =
+ container_of(cb, struct amdgpu_flip_work, cb);
- if (*f == NULL)
- return;
+ fence_put(f);
+ schedule_work(&work->flip_work);
+}
- fence = to_amdgpu_fence(*f);
- if (fence) {
- r = fence_wait(&fence->base, false);
- if (r == -EDEADLK)
- r = amdgpu_gpu_reset(adev);
- } else
- r = fence_wait(*f, false);
+static bool amdgpu_flip_handle_fence(struct amdgpu_flip_work *work,
+ struct fence **f)
+{
+ struct fence *fence= *f;
- if (r)
- DRM_ERROR("failed to wait on page flip fence (%ld)!\n", r);
+ if (fence == NULL)
+ return false;
- /* We continue with the page flip even if we failed to wait on
- * the fence, otherwise the DRM core and userspace will be
- * confused about which BO the CRTC is scanning out
- */
- fence_put(*f);
*f = NULL;
+
+ if (!fence_add_callback(fence, &work->cb, amdgpu_flip_callback))
+ return true;
+
+ fence_put(*f);
+ return false;
}
static void amdgpu_flip_work_func(struct work_struct *__work)
@@ -76,9 +74,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
int vpos, hpos, stat, min_udelay = 0;
struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
- amdgpu_flip_wait_fence(adev, &work->excl);
+ if (amdgpu_flip_handle_fence(work, &work->excl))
+ return;
+
for (i = 0; i < work->shared_count; ++i)
- amdgpu_flip_wait_fence(adev, &work->shared[i]);
+ if (amdgpu_flip_handle_fence(work, &work->shared[i]))
+ return;
/* We borrow the event spin lock for protecting flip_status */
spin_lock_irqsave(&crtc->dev->event_lock, flags);
@@ -130,12 +131,12 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
vblank->framedur_ns / 1000,
vblank->linedur_ns / 1000, stat, vpos, hpos);
- /* do the flip (mmio) */
- adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
/* set the flip status */
amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
-
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+
+ /* Do the flip (mmio) */
+ adev->mode_info.funcs->page_flip(adev, work->crtc_id, work->base);
}
/*
@@ -254,7 +255,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
/* update crtc fb */
crtc->primary->fb = fb;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
- queue_work(amdgpu_crtc->pflip_queue, &work->flip_work);
+ amdgpu_flip_work_func(&work->flip_work);
return 0;
vblank_cleanup:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 9ef1db87cf260..f1e17d60055a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -69,7 +69,6 @@ int amdgpu_dpm = -1;
int amdgpu_smc_load_fw = 1;
int amdgpu_aspm = -1;
int amdgpu_runtime_pm = -1;
-int amdgpu_hard_reset = 0;
unsigned amdgpu_ip_block_mask = 0xffffffff;
int amdgpu_bapm = -1;
int amdgpu_deep_color = 0;
@@ -78,10 +77,8 @@ int amdgpu_vm_block_size = -1;
int amdgpu_vm_fault_stop = 0;
int amdgpu_vm_debug = 0;
int amdgpu_exp_hw_support = 0;
-int amdgpu_enable_scheduler = 1;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
-int amdgpu_enable_semaphores = 0;
int amdgpu_powerplay = -1;
unsigned amdgpu_pcie_gen_cap = 0;
unsigned amdgpu_pcie_lane_cap = 0;
@@ -128,9 +125,6 @@ module_param_named(aspm, amdgpu_aspm, int, 0444);
MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
module_param_named(runpm, amdgpu_runtime_pm, int, 0444);
-MODULE_PARM_DESC(hard_reset, "PCI config reset (1 = force enable, 0 = disable (default))");
-module_param_named(hard_reset, amdgpu_hard_reset, int, 0444);
-
MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);
@@ -155,18 +149,12 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);
-MODULE_PARM_DESC(enable_scheduler, "enable SW GPU scheduler (1 = enable (default), 0 = disable)");
-module_param_named(enable_scheduler, amdgpu_enable_scheduler, int, 0444);
-
MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);
MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
-MODULE_PARM_DESC(enable_semaphores, "Enable semaphores (1 = enable, 0 = disable (default))");
-module_param_named(enable_semaphores, amdgpu_enable_semaphores, int, 0644);
-
#ifdef CONFIG_DRM_AMD_POWERPLAY
MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
module_param_named(powerplay, amdgpu_powerplay, int, 0444);
@@ -330,6 +318,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+ /*
+ * Initialize amdkfd before starting radeon. If it was not loaded yet,
+ * defer radeon probing
+ */
+ ret = amdgpu_amdkfd_init();
+ if (ret == -EPROBE_DEFER)
+ return ret;
+
/* Get rid of things like offb */
ret = amdgpu_kick_out_firmware_fb(pdev);
if (ret)
@@ -559,6 +555,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
static int __init amdgpu_init(void)
{
+ amdgpu_sync_init();
#ifdef CONFIG_VGA_CONSOLE
if (vgacon_text_force()) {
DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
@@ -572,8 +569,6 @@ static int __init amdgpu_init(void)
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
- amdgpu_amdkfd_init();
-
/* let modprobe override vga console setting */
return drm_pci_init(driver, pdriver);
}
@@ -583,6 +578,7 @@ static void __exit amdgpu_exit(void)
amdgpu_amdkfd_fini();
drm_pci_exit(driver, pdriver);
amdgpu_unregister_atpx_handler();
+ amdgpu_sync_fini();
}
module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 3671f9f220bd4..4303b447efe89 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
* that the the relevant GPU caches have been flushed.
*/
+struct amdgpu_fence {
+ struct fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+};
+
static struct kmem_cache *amdgpu_fence_slab;
static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+/*
+ * Cast helper
+ */
+static const struct fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+{
+ struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+ if (__f->base.ops == &amdgpu_fence_ops)
+ return __f;
+
+ return NULL;
+}
+
/**
* amdgpu_fence_write - write a fence value
*
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
if (drv->cpu_addr)
seq = le32_to_cpu(*drv->cpu_addr);
else
- seq = lower_32_bits(atomic64_read(&drv->last_seq));
+ seq = atomic_read(&drv->last_seq);
return seq;
}
@@ -91,32 +112,41 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @owner: creator of the fence
- * @fence: amdgpu fence object
+ * @f: resulting fence object
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
- struct amdgpu_fence **fence)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_fence *fence;
+ struct fence **ptr;
+ uint32_t seq;
- /* we are protected by the ring emission mutex */
- *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
- if ((*fence) == NULL) {
+ fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+ if (fence == NULL)
return -ENOMEM;
- }
- (*fence)->seq = ++ring->fence_drv.sync_seq[ring->idx];
- (*fence)->ring = ring;
- (*fence)->owner = owner;
- fence_init(&(*fence)->base, &amdgpu_fence_ops,
- &ring->fence_drv.fence_queue.lock,
- adev->fence_context + ring->idx,
- (*fence)->seq);
+
+ seq = ++ring->fence_drv.sync_seq;
+ fence->ring = ring;
+ fence_init(&fence->base, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx,
+ seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
- (*fence)->seq,
- AMDGPU_FENCE_FLAG_INT);
+ seq, AMDGPU_FENCE_FLAG_INT);
+
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+ BUG_ON(rcu_dereference_protected(*ptr, 1));
+
+ rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
+ *f = &fence->base;
+
return 0;
}
@@ -134,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_activity - check for fence activity
+ * amdgpu_fence_process - check for fence activity
*
* @ring: pointer to struct amdgpu_ring
*
* Checks the current fence value and calculates the last
- * signalled fence value. Returns true if activity occured
- * on the ring, and the fence_queue should be waken up.
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
*/
-static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
{
- uint64_t seq, last_seq, last_emitted;
- unsigned count_loop = 0;
- bool wake = false;
-
- /* Note there is a scenario here for an infinite loop but it's
- * very unlikely to happen. For it to happen, the current polling
- * process need to be interrupted by another process and another
- * process needs to update the last_seq btw the atomic read and
- * xchg of the current process.
- *
- * More over for this to go in infinite loop there need to be
- * continuously new fence signaled ie amdgpu_fence_read needs
- * to return a different value each time for both the currently
- * polling process and the other process that xchg the last_seq
- * btw atomic read and xchg of the current process. And the
- * value the other process set as last seq must be higher than
- * the seq value we just read. Which means that current process
- * need to be interrupted after amdgpu_fence_read and before
- * atomic xchg.
- *
- * To be even more safe we count the number of time we loop and
- * we bail after 10 loop just accepting the fact that we might
- * have temporarly set the last_seq not to the true real last
- * seq but to an older one.
- */
- last_seq = atomic64_read(&ring->fence_drv.last_seq);
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint32_t seq, last_seq;
+ int r;
+
do {
- last_emitted = ring->fence_drv.sync_seq[ring->idx];
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
seq = amdgpu_fence_read(ring);
- seq |= last_seq & 0xffffffff00000000LL;
- if (seq < last_seq) {
- seq &= 0xffffffff;
- seq |= last_emitted & 0xffffffff00000000LL;
- }
- if (seq <= last_seq || seq > last_emitted) {
- break;
- }
- /* If we loop over we don't want to return without
- * checking if a fence is signaled as it means that the
- * seq we just read is different from the previous on.
- */
- wake = true;
- last_seq = seq;
- if ((count_loop++) > 10) {
- /* We looped over too many time leave with the
- * fact that we might have set an older fence
- * seq then the current real last seq as signaled
- * by the hw.
- */
- break;
- }
- } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (seq < last_emitted)
+ if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
- return wake;
-}
+ while (last_seq != seq) {
+ struct fence *fence, **ptr;
-/**
- * amdgpu_fence_process - process a fence
- *
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
- *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
- */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
-{
- if (amdgpu_fence_activity(ring))
- wake_up_all(&ring->fence_drv.fence_queue);
+ ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+ /* There is always exactly one thread signaling this fence slot */
+ fence = rcu_dereference_protected(*ptr, 1);
+ rcu_assign_pointer(*ptr, NULL);
+
+ BUG_ON(!fence);
+
+ r = fence_signal(fence);
+ if (!r)
+ FENCE_TRACE(fence, "signaled from irq context\n");
+ else
+ BUG();
+
+ fence_put(fence);
+ }
}
/**
@@ -233,83 +222,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
amdgpu_fence_process(ring);
}
-/**
- * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
- *
- * @ring: ring the fence is associated with
- * @seq: sequence number
- *
- * Check if the last signaled fence sequnce number is >= the requested
- * sequence number (all asics).
- * Returns true if the fence has signaled (current fence value
- * is >= requested value) or false if it has not (current fence
- * value is < the requested value. Helper function for
- * amdgpu_fence_signaled().
- */
-static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
-{
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- /* poll new last sequence at least once */
- amdgpu_fence_process(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- return false;
-}
-
-/*
- * amdgpu_ring_wait_seq_timeout - wait for seq of the specific ring to signal
- * @ring: ring to wait on for the seq number
- * @seq: seq number wait for
- *
- * return value:
- * 0: seq signaled, and gpu not hang
- * -EDEADL: GPU hang detected
- * -EINVAL: some paramter is not valid
- */
-static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
-{
- bool signaled = false;
-
- BUG_ON(!ring);
- if (seq > ring->fence_drv.sync_seq[ring->idx])
- return -EINVAL;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return 0;
-
- amdgpu_fence_schedule_fallback(ring);
- wait_event(ring->fence_drv.fence_queue, (
- (signaled = amdgpu_fence_seq_signaled(ring, seq))));
-
- if (signaled)
- return 0;
- else
- return -EDEADLK;
-}
-
-/**
- * amdgpu_fence_wait_next - wait for the next fence to signal
- *
- * @adev: amdgpu device pointer
- * @ring: ring index the fence is associated with
- *
- * Wait for the next fence on the requested ring to signal (all asics).
- * Returns 0 if the next fence has passed, error for all other cases.
- * Caller must hold ring lock.
- */
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
-{
- uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
-
- if (seq >= ring->fence_drv.sync_seq[ring->idx])
- return -ENOENT;
-
- return amdgpu_fence_ring_wait_seq(ring, seq);
-}
-
/**
* amdgpu_fence_wait_empty - wait for all fences to signal
*
@@ -318,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
*
* Wait for all fences on the requested ring to signal (all asics).
* Returns 0 if the fences have passed, error for all other cases.
- * Caller must hold ring lock.
*/
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
{
- uint64_t seq = ring->fence_drv.sync_seq[ring->idx];
+ uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
+ struct fence *fence, **ptr;
+ int r;
if (!seq)
return 0;
- return amdgpu_fence_ring_wait_seq(ring, seq);
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ rcu_read_lock();
+ fence = rcu_dereference(*ptr);
+ if (!fence || !fence_get_rcu(fence)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ r = fence_wait(fence, false);
+ fence_put(fence);
+ return r;
}
/**
@@ -347,75 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here.
*/
amdgpu_fence_process(ring);
- emitted = ring->fence_drv.sync_seq[ring->idx]
- - atomic64_read(&ring->fence_drv.last_seq);
- /* to avoid 32bits warp around */
- if (emitted > 0x10000000)
- emitted = 0x10000000;
-
- return (unsigned)emitted;
-}
-
-/**
- * amdgpu_fence_need_sync - do we need a semaphore
- *
- * @fence: amdgpu fence object
- * @dst_ring: which ring to check against
- *
- * Check if the fence needs to be synced against another ring
- * (all asics). If so, we need to emit a semaphore.
- * Returns true if we need to sync with another ring, false if
- * not.
- */
-bool amdgpu_fence_need_sync(struct amdgpu_fence *fence,
- struct amdgpu_ring *dst_ring)
-{
- struct amdgpu_fence_driver *fdrv;
-
- if (!fence)
- return false;
-
- if (fence->ring == dst_ring)
- return false;
-
- /* we are protected by the ring mutex */
- fdrv = &dst_ring->fence_drv;
- if (fence->seq <= fdrv->sync_seq[fence->ring->idx])
- return false;
-
- return true;
-}
-
-/**
- * amdgpu_fence_note_sync - record the sync point
- *
- * @fence: amdgpu fence object
- * @dst_ring: which ring to check against
- *
- * Note the sequence number at which point the fence will
- * be synced with the requested ring (all asics).
- */
-void amdgpu_fence_note_sync(struct amdgpu_fence *fence,
- struct amdgpu_ring *dst_ring)
-{
- struct amdgpu_fence_driver *dst, *src;
- unsigned i;
-
- if (!fence)
- return;
-
- if (fence->ring == dst_ring)
- return;
-
- /* we are protected by the ring mutex */
- src = &fence->ring->fence_drv;
- dst = &dst_ring->fence_drv;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- if (i == dst_ring->idx)
- continue;
-
- dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
- }
+ emitted = 0x100000000ull;
+ emitted -= atomic_read(&ring->fence_drv.last_seq);
+ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+ return lower_32_bits(emitted);
}
/**
@@ -447,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
}
- amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src;
@@ -465,47 +324,55 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
+ * @num_hw_submission: number of entries on the hardware queue
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission)
{
- int i, r;
+ long timeout;
+ int r;
+
+ /* Check that num_hw_submission is a power of two */
+ if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+ return -EINVAL;
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- ring->fence_drv.sync_seq[i] = 0;
-
- atomic64_set(&ring->fence_drv.last_seq, 0);
+ ring->fence_drv.sync_seq = 0;
+ atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
(unsigned long)ring);
- init_waitqueue_head(&ring->fence_drv.fence_queue);
-
- if (amdgpu_enable_scheduler) {
- long timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
- if (timeout == 0) {
- /*
- * FIXME:
- * Delayed workqueue cannot use it directly,
- * so the scheduler will not use delayed workqueue if
- * MAX_SCHEDULE_TIMEOUT is set.
- * Currently keep it simple and silly.
- */
- timeout = MAX_SCHEDULE_TIMEOUT;
- }
- r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
- amdgpu_sched_hw_submission,
- timeout, ring->name);
- if (r) {
- DRM_ERROR("Failed to create scheduler on ring %s.\n",
- ring->name);
- return r;
- }
+ ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+ spin_lock_init(&ring->fence_drv.lock);
+ ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+ GFP_KERNEL);
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
+
+ timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+ if (timeout == 0) {
+ /*
+ * FIXME:
+ * Delayed workqueue cannot use it directly,
+ * so the scheduler will not use delayed workqueue if
+ * MAX_SCHEDULE_TIMEOUT is set.
+ * Currently keep it simple and silly.
+ */
+ timeout = MAX_SCHEDULE_TIMEOUT;
+ }
+ r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
+ num_hw_submission,
+ timeout, ring->name);
+ if (r) {
+ DRM_ERROR("Failed to create scheduler on ring %s.\n",
+ ring->name);
+ return r;
}
return 0;
@@ -548,11 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{
- int i, r;
+ unsigned i, j;
+ int r;
- if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
- kmem_cache_destroy(amdgpu_fence_slab);
- mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -563,14 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
/* no need to trigger GPU reset as we are unloading */
amdgpu_fence_driver_force_completion(adev);
}
- wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+ fence_put(ring->fence_drv.fences[i]);
+ kfree(ring->fence_drv.fences);
ring->fence_drv.initialized = false;
}
- mutex_unlock(&adev->ring_lock);
+
+ if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+ kmem_cache_destroy(amdgpu_fence_slab);
}
/**
@@ -585,7 +454,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
{
int i, r;
- mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
@@ -602,7 +470,6 @@ void amdgpu_fence_driver_suspend(struct amdgpu_device *adev)
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
- mutex_unlock(&adev->ring_lock);
}
/**
@@ -621,7 +488,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
{
int i;
- mutex_lock(&adev->ring_lock);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
if (!ring || !ring->fence_drv.initialized)
@@ -631,7 +497,6 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev)
amdgpu_irq_get(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
}
- mutex_unlock(&adev->ring_lock);
}
/**
@@ -651,7 +516,7 @@ void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev)
if (!ring || !ring->fence_drv.initialized)
continue;
- amdgpu_fence_write(ring, ring->fence_drv.sync_seq[i]);
+ amdgpu_fence_write(ring, ring->fence_drv.sync_seq);
}
}
@@ -671,103 +536,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
}
/**
- * amdgpu_fence_is_signaled - test if fence is signaled
- *
- * @f: fence to test
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
*
- * Test the fence sequence number if it is already signaled. If it isn't
- * signaled start fence processing. Returns True if the fence is signaled.
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
*/
-static bool amdgpu_fence_is_signaled(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_ring *ring = fence->ring;
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
-
- amdgpu_fence_process(ring);
+ if (!timer_pending(&ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
+ FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return false;
+ return true;
}
/**
- * amdgpu_fence_check_signaled - callback from fence_queue
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
*
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
+ * Free up the fence memory after the RCU grace period.
*/
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static void amdgpu_fence_free(struct rcu_head *rcu)
{
- struct amdgpu_fence *fence;
- struct amdgpu_device *adev;
- u64 seq;
- int ret;
-
- fence = container_of(wait, struct amdgpu_fence, fence_wake);
- adev = fence->ring->adev;
-
- /*
- * We cannot use amdgpu_fence_process here because we're already
- * in the waitqueue, in a call from wake_up_all.
- */
- seq = atomic64_read(&fence->ring->fence_drv.last_seq);
- if (seq >= fence->seq) {
- ret = fence_signal_locked(&fence->base);
- if (!ret)
- FENCE_TRACE(&fence->base, "signaled from irq context\n");
- else
- FENCE_TRACE(&fence->base, "was already signaled\n");
-
- __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_put(&fence->base);
- } else
- FENCE_TRACE(&fence->base, "pending\n");
- return 0;
+ struct fence *f = container_of(rcu, struct fence, rcu);
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ kmem_cache_free(amdgpu_fence_slab, fence);
}
/**
- * amdgpu_fence_enable_signaling - enable signalling on fence
+ * amdgpu_fence_release - callback that fence can be freed
+ *
* @fence: fence
*
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
*/
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return false;
-
- fence->fence_wake.flags = 0;
- fence->fence_wake.private = NULL;
- fence->fence_wake.func = amdgpu_fence_check_signaled;
- __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_get(f);
- if (!timer_pending(&ring->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(ring);
- FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return true;
-}
-
static void amdgpu_fence_release(struct fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- kmem_cache_free(amdgpu_fence_slab, fence);
+ call_rcu(&f->rcu, amdgpu_fence_free);
}
-const struct fence_ops amdgpu_fence_ops = {
+static const struct fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
- .signaled = amdgpu_fence_is_signaled,
.wait = fence_default_wait,
.release = amdgpu_fence_release,
};
@@ -781,7 +600,7 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
- int i, j;
+ int i;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -791,31 +610,41 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
amdgpu_fence_process(ring);
seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
- seq_printf(m, "Last signaled fence 0x%016llx\n",
- (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
- seq_printf(m, "Last emitted 0x%016llx\n",
- ring->fence_drv.sync_seq[i]);
-
- for (j = 0; j < AMDGPU_MAX_RINGS; ++j) {
- struct amdgpu_ring *other = adev->rings[j];
- if (i != j && other && other->fence_drv.initialized &&
- ring->fence_drv.sync_seq[j])
- seq_printf(m, "Last sync to ring %d 0x%016llx\n",
- j, ring->fence_drv.sync_seq[j]);
- }
+ seq_printf(m, "Last signaled fence 0x%08x\n",
+ atomic_read(&ring->fence_drv.last_seq));
+ seq_printf(m, "Last emitted 0x%08x\n",
+ ring->fence_drv.sync_seq);
}
return 0;
}
+/**
+ * amdgpu_debugfs_gpu_reset - manually trigger a gpu reset
+ *
+ * Manually trigger a gpu reset at the next fence wait.
+ */
+static int amdgpu_debugfs_gpu_reset(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct amdgpu_device *adev = dev->dev_private;
+
+ seq_printf(m, "gpu reset\n");
+ amdgpu_gpu_reset(adev);
+
+ return 0;
+}
+
static struct drm_info_list amdgpu_debugfs_fence_list[] = {
{"amdgpu_fence_info", &amdgpu_debugfs_fence_info, 0, NULL},
+ {"amdgpu_gpu_reset", &amdgpu_debugfs_gpu_reset, 0, NULL}
};
#endif
int amdgpu_debugfs_fence_init(struct amdgpu_device *adev)
{
#if defined(CONFIG_DEBUG_FS)
- return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 1);
+ return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_fence_list, 2);
#else
return 0;
#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index d20c2a8929cbd..fa6a27bff298b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -26,6 +26,7 @@
* Jerome Glisse
*/
#include
+#include
#include
#include
#include "amdgpu.h"
@@ -83,24 +84,32 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,
return r;
}
*obj = &robj->gem_base;
- robj->pid = task_pid_nr(current);
-
- mutex_lock(&adev->gem.mutex);
- list_add_tail(&robj->list, &adev->gem.objects);
- mutex_unlock(&adev->gem.mutex);
return 0;
}
-int amdgpu_gem_init(struct amdgpu_device *adev)
+void amdgpu_gem_force_release(struct amdgpu_device *adev)
{
- INIT_LIST_HEAD(&adev->gem.objects);
- return 0;
-}
+ struct drm_device *ddev = adev->ddev;
+ struct drm_file *file;
-void amdgpu_gem_fini(struct amdgpu_device *adev)
-{
- amdgpu_bo_force_delete(adev);
+ mutex_lock(&ddev->struct_mutex);
+
+ list_for_each_entry(file, &ddev->filelist, lhead) {
+ struct drm_gem_object *gobj;
+ int handle;
+
+ WARN_ONCE(1, "Still active user space clients!\n");
+ spin_lock(&file->table_lock);
+ idr_for_each_entry(&file->object_idr, gobj, handle) {
+ WARN_ONCE(1, "And also active allocations!\n");
+ drm_gem_object_unreference(gobj);
+ }
+ idr_destroy(&file->object_idr);
+ spin_unlock(&file->table_lock);
+ }
+
+ mutex_unlock(&ddev->struct_mutex);
}
/*
@@ -132,25 +141,40 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv)
{
- struct amdgpu_bo *rbo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = rbo->adev;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = bo->adev;
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+
+ struct amdgpu_bo_list_entry vm_pd;
+ struct list_head list, duplicates;
+ struct ttm_validate_buffer tv;
+ struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
int r;
- r = amdgpu_bo_reserve(rbo, true);
+
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&duplicates);
+
+ tv.bo = &bo->tbo;
+ tv.shared = true;
+ list_add(&tv.head, &list);
+
+ amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
- bo_va = amdgpu_vm_bo_find(vm, rbo);
+ bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
}
}
- amdgpu_bo_unreserve(rbo);
+ ttm_eu_backoff_reservation(&ticket, &list);
}
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -235,12 +259,10 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_USERPTR_REGISTER))
return -EINVAL;
- if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
- !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
- !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
+ if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
+ !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
- /* if we want to write to it we must require anonymous
- memory and install a MMU notifier */
+ /* if we want to write to it we must install a MMU notifier */
return -EACCES;
}
@@ -252,6 +274,8 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
goto handle_lockup;
bo = gem_to_amdgpu_bo(gobj);
+ bo->prefered_domains = AMDGPU_GEM_DOMAIN_GTT;
+ bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT;
r = amdgpu_ttm_tt_set_userptr(bo->tbo.ttm, args->addr, args->flags);
if (r)
goto release_object;
@@ -264,18 +288,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
down_read(¤t->mm->mmap_sem);
+
+ r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ bo->tbo.ttm->pages);
+ if (r)
+ goto unlock_mmap_sem;
+
r = amdgpu_bo_reserve(bo, true);
- if (r) {
- up_read(¤t->mm->mmap_sem);
- goto release_object;
- }
+ if (r)
+ goto free_pages;
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
amdgpu_bo_unreserve(bo);
- up_read(¤t->mm->mmap_sem);
if (r)
- goto release_object;
+ goto free_pages;
+
+ up_read(¤t->mm->mmap_sem);
}
r = drm_gem_handle_create(filp, gobj, &handle);
@@ -287,6 +316,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
args->handle = handle;
return 0;
+free_pages:
+ release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
+
+unlock_mmap_sem:
+ up_read(¤t->mm->mmap_sem);
+
release_object:
drm_gem_object_unreference_unlocked(gobj);
@@ -308,7 +343,7 @@ int amdgpu_mode_dumb_mmap(struct drm_file *filp,
return -ENOENT;
}
robj = gem_to_amdgpu_bo(gobj);
- if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm) ||
+ if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm) ||
(robj->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)) {
drm_gem_object_unreference_unlocked(gobj);
return -EPERM;
@@ -559,11 +594,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
tv.shared = true;
list_add(&tv.head, &list);
- if (args->operation == AMDGPU_VA_OP_MAP) {
- tv_pd.bo = &fpriv->vm.page_directory->tbo;
- tv_pd.shared = true;
- list_add(&tv_pd.head, &list);
- }
+ tv_pd.bo = &fpriv->vm.page_directory->tbo;
+ tv_pd.shared = true;
+ list_add(&tv_pd.head, &list);
+
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) {
drm_gem_object_unreference_unlocked(gobj);
@@ -629,7 +663,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
info.bo_size = robj->gem_base.size;
info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
- info.domains = robj->initial_domain;
+ info.domains = robj->prefered_domains;
info.domain_flags = robj->flags;
amdgpu_bo_unreserve(robj);
if (copy_to_user(out, &info, sizeof(info)))
@@ -637,14 +671,18 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
break;
}
case AMDGPU_GEM_OP_SET_PLACEMENT:
- if (amdgpu_ttm_tt_has_userptr(robj->tbo.ttm)) {
+ if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
r = -EPERM;
amdgpu_bo_unreserve(robj);
break;
}
- robj->initial_domain = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT |
- AMDGPU_GEM_DOMAIN_CPU);
+ robj->prefered_domains = args->value & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU);
+ robj->allowed_domains = robj->prefered_domains;
+ if (robj->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+ robj->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
+
amdgpu_bo_unreserve(robj);
break;
default:
@@ -689,38 +727,73 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
}
#if defined(CONFIG_DEBUG_FS)
+static int amdgpu_debugfs_gem_bo_info(int id, void *ptr, void *data)
+{
+ struct drm_gem_object *gobj = ptr;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+ struct seq_file *m = data;
+
+ unsigned domain;
+ const char *placement;
+ unsigned pin_count;
+
+ domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
+ switch (domain) {
+ case AMDGPU_GEM_DOMAIN_VRAM:
+ placement = "VRAM";
+ break;
+ case AMDGPU_GEM_DOMAIN_GTT:
+ placement = " GTT";
+ break;
+ case AMDGPU_GEM_DOMAIN_CPU:
+ default:
+ placement = " CPU";
+ break;
+ }
+ seq_printf(m, "\t0x%08x: %12ld byte %s @ 0x%010Lx",
+ id, amdgpu_bo_size(bo), placement,
+ amdgpu_bo_gpu_offset(bo));
+
+ pin_count = ACCESS_ONCE(bo->pin_count);
+ if (pin_count)
+ seq_printf(m, " pin count %d", pin_count);
+ seq_printf(m, "\n");
+
+ return 0;
+}
+
static int amdgpu_debugfs_gem_info(struct seq_file *m, void *data)
{
struct drm_info_node *node = (struct drm_info_node *)m->private;
struct drm_device *dev = node->minor->dev;
- struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_bo *rbo;
- unsigned i = 0;
+ struct drm_file *file;
+ int r;
+
+ r = mutex_lock_interruptible(&dev->struct_mutex);
+ if (r)
+ return r;
- mutex_lock(&adev->gem.mutex);
- list_for_each_entry(rbo, &adev->gem.objects, list) {
- unsigned domain;
- const char *placement;
+ list_for_each_entry(file, &dev->filelist, lhead) {
+ struct task_struct *task;
- domain = amdgpu_mem_type_to_domain(rbo->tbo.mem.mem_type);
- switch (domain) {
- case AMDGPU_GEM_DOMAIN_VRAM:
- placement = "VRAM";
- break;
- case AMDGPU_GEM_DOMAIN_GTT:
- placement = " GTT";
- break;
- case AMDGPU_GEM_DOMAIN_CPU:
- default:
- placement = " CPU";
- break;
- }
- seq_printf(m, "bo[0x%08x] %8ldkB %8ldMB %s pid %8ld\n",
- i, amdgpu_bo_size(rbo) >> 10, amdgpu_bo_size(rbo) >> 20,
- placement, (unsigned long)rbo->pid);
- i++;
+ /*
+ * Although we have a valid reference on file->pid, that does
+ * not guarantee that the task_struct who called get_pid() is
+ * still alive (e.g. get_pid(current) => fork() => exit()).
+ * Therefore, we need to protect this ->comm access using RCU.
+ */
+ rcu_read_lock();
+ task = pid_task(file->pid, PIDTYPE_PID);
+ seq_printf(m, "pid %8d command %s:\n", pid_nr(file->pid),
+ task ? task->comm : "");
+ rcu_read_unlock();
+
+ spin_lock(&file->table_lock);
+ idr_for_each(&file->object_idr, amdgpu_debugfs_gem_bo_info, m);
+ spin_unlock(&file->table_lock);
}
- mutex_unlock(&adev->gem.mutex);
+
+ mutex_unlock(&dev->struct_mutex);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 9e25edafa7210..8443cea6821ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -55,10 +55,9 @@ static int amdgpu_debugfs_sa_init(struct amdgpu_device *adev);
* suballocator.
* Returns 0 on success, error on failure.
*/
-int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
+int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib)
{
- struct amdgpu_device *adev = ring->adev;
int r;
if (size) {
@@ -75,10 +74,8 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
ib->gpu_addr = amdgpu_sa_bo_gpu_addr(ib->sa_bo);
}
- amdgpu_sync_create(&ib->sync);
-
- ib->ring = ring;
ib->vm = vm;
+ ib->vm_id = 0;
return 0;
}
@@ -88,15 +85,13 @@ int amdgpu_ib_get(struct amdgpu_ring *ring, struct amdgpu_vm *vm,
*
* @adev: amdgpu_device pointer
* @ib: IB object to free
+ * @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
{
- amdgpu_sync_free(adev, &ib->sync, &ib->fence->base);
- amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
- if (ib->fence)
- fence_put(&ib->fence->base);
+ amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
}
/**
@@ -105,7 +100,7 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* @adev: amdgpu_device pointer
* @num_ibs: number of IBs to schedule
* @ibs: IB objects to schedule
- * @owner: owner for creating the fences
+ * @f: fence created during this submission
*
* Schedule an IB on the associated ring (all asics).
* Returns 0 on success, error on failure.
@@ -120,20 +115,21 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* a CONST_IB), it will be put on the ring prior to the DE IB. Prior
* to SI there was just a DE IB.
*/
-int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
- struct amdgpu_ib *ibs, void *owner)
+int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
+ struct amdgpu_ib *ibs, struct fence *last_vm_update,
+ struct fence **f)
{
+ struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
- struct amdgpu_ring *ring;
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
+ struct fence *hwf;
unsigned i;
int r = 0;
if (num_ibs == 0)
return -EINVAL;
- ring = ibs->ring;
ctx = ibs->ctx;
vm = ibs->vm;
@@ -141,42 +137,24 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
dev_err(adev->dev, "couldn't schedule ib\n");
return -EINVAL;
}
- r = amdgpu_sync_wait(&ibs->sync);
- if (r) {
- dev_err(adev->dev, "IB sync failed (%d).\n", r);
- return r;
- }
- r = amdgpu_ring_lock(ring, (256 + AMDGPU_NUM_SYNCS * 8) * num_ibs);
- if (r) {
- dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
- return r;
- }
- if (vm) {
- /* grab a vm id if necessary */
- r = amdgpu_vm_grab_id(ibs->vm, ibs->ring, &ibs->sync);
- if (r) {
- amdgpu_ring_unlock_undo(ring);
- return r;
- }
+ if (vm && !ibs->vm_id) {
+ dev_err(adev->dev, "VM IB without ID\n");
+ return -EINVAL;
}
- r = amdgpu_sync_rings(&ibs->sync, ring);
+ r = amdgpu_ring_alloc(ring, 256 * num_ibs);
if (r) {
- amdgpu_ring_unlock_undo(ring);
- dev_err(adev->dev, "failed to sync rings (%d)\n", r);
+ dev_err(adev->dev, "scheduling IB failed (%d).\n", r);
return r;
}
if (vm) {
/* do context switch */
- amdgpu_vm_flush(ring, vm, ib->sync.last_vm_update);
-
- if (ring->funcs->emit_gds_switch)
- amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
- ib->gds_base, ib->gds_size,
- ib->gws_base, ib->gws_size,
- ib->oa_base, ib->oa_size);
+ amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+ ib->gds_base, ib->gds_size,
+ ib->gws_base, ib->gws_size,
+ ib->oa_base, ib->oa_size);
if (ring->funcs->emit_hdp_flush)
amdgpu_ring_emit_hdp_flush(ring);
@@ -186,27 +164,32 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
for (i = 0; i < num_ibs; ++i) {
ib = &ibs[i];
- if (ib->ring != ring || ib->ctx != ctx || ib->vm != vm) {
+ if (ib->ctx != ctx || ib->vm != vm) {
ring->current_ctx = old_ctx;
- amdgpu_ring_unlock_undo(ring);
+ if (ib->vm_id)
+ amdgpu_vm_reset_id(adev, ib->vm_id);
+ amdgpu_ring_undo(ring);
return -EINVAL;
}
amdgpu_ring_emit_ib(ring, ib);
ring->current_ctx = ctx;
}
- r = amdgpu_fence_emit(ring, owner, &ib->fence);
+ if (vm) {
+ if (ring->funcs->emit_hdp_invalidate)
+ amdgpu_ring_emit_hdp_invalidate(ring);
+ }
+
+ r = amdgpu_fence_emit(ring, &hwf);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
ring->current_ctx = old_ctx;
- amdgpu_ring_unlock_undo(ring);
+ if (ib->vm_id)
+ amdgpu_vm_reset_id(adev, ib->vm_id);
+ amdgpu_ring_undo(ring);
return r;
}
- if (!amdgpu_enable_scheduler && ib->ctx)
- ib->sequence = amdgpu_ctx_add_fence(ib->ctx, ring,
- &ib->fence->base);
-
/* wrap the last IB with fence */
if (ib->user) {
uint64_t addr = amdgpu_bo_gpu_offset(ib->user->bo);
@@ -215,10 +198,10 @@ int amdgpu_ib_schedule(struct amdgpu_device *adev, unsigned num_ibs,
AMDGPU_FENCE_FLAG_64BIT);
}
- if (ib->vm)
- amdgpu_vm_fence(adev, ib->vm, &ib->fence->base);
+ if (f)
+ *f = fence_get(hwf);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
new file mode 100644
index 0000000000000..9c9b19e2f353b
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -0,0 +1,171 @@
+/*
+ * Copyright 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ *
+ */
+#include
+#include
+#include
+#include
+#include "amdgpu.h"
+#include "amdgpu_trace.h"
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+ struct amdgpu_job **job)
+{
+ size_t size = sizeof(struct amdgpu_job);
+
+ if (num_ibs == 0)
+ return -EINVAL;
+
+ size += sizeof(struct amdgpu_ib) * num_ibs;
+
+ *job = kzalloc(size, GFP_KERNEL);
+ if (!*job)
+ return -ENOMEM;
+
+ (*job)->adev = adev;
+ (*job)->ibs = (void *)&(*job)[1];
+ (*job)->num_ibs = num_ibs;
+
+ amdgpu_sync_create(&(*job)->sync);
+
+ return 0;
+}
+
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+ struct amdgpu_job **job)
+{
+ int r;
+
+ r = amdgpu_job_alloc(adev, 1, job);
+ if (r)
+ return r;
+
+ r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]);
+ if (r)
+ kfree(*job);
+
+ return r;
+}
+
+void amdgpu_job_free(struct amdgpu_job *job)
+{
+ unsigned i;
+ struct fence *f;
+ /* use sched fence if available */
+ f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
+
+ for (i = 0; i < job->num_ibs; ++i)
+ amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
+ fence_put(job->fence);
+
+ amdgpu_bo_unref(&job->uf.bo);
+ amdgpu_sync_free(&job->sync);
+ kfree(job);
+}
+
+int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
+ struct amd_sched_entity *entity, void *owner,
+ struct fence **f)
+{
+ job->ring = ring;
+ job->base.sched = &ring->sched;
+ job->base.s_entity = entity;
+ job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
+ if (!job->base.s_fence)
+ return -ENOMEM;
+
+ *f = fence_get(&job->base.s_fence->base);
+
+ job->owner = owner;
+ amd_sched_entity_push_job(&job->base);
+
+ return 0;
+}
+
+static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
+{
+ struct amdgpu_job *job = to_amdgpu_job(sched_job);
+ struct amdgpu_vm *vm = job->ibs->vm;
+
+ struct fence *fence = amdgpu_sync_get_fence(&job->sync);
+
+ if (fence == NULL && vm && !job->ibs->vm_id) {
+ struct amdgpu_ring *ring = job->ring;
+ unsigned i, vm_id;
+ uint64_t vm_pd_addr;
+ int r;
+
+ r = amdgpu_vm_grab_id(vm, ring, &job->sync,
+ &job->base.s_fence->base,
+ &vm_id, &vm_pd_addr);
+ if (r)
+ DRM_ERROR("Error getting VM ID (%d)\n", r);
+ else {
+ for (i = 0; i < job->num_ibs; ++i) {
+ job->ibs[i].vm_id = vm_id;
+ job->ibs[i].vm_pd_addr = vm_pd_addr;
+ }
+ }
+
+ fence = amdgpu_sync_get_fence(&job->sync);
+ }
+
+ return fence;
+}
+
+static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
+{
+ struct fence *fence = NULL;
+ struct amdgpu_job *job;
+ int r;
+
+ if (!sched_job) {
+ DRM_ERROR("job is null\n");
+ return NULL;
+ }
+ job = to_amdgpu_job(sched_job);
+
+ r = amdgpu_sync_wait(&job->sync);
+ if (r) {
+ DRM_ERROR("failed to sync wait (%d)\n", r);
+ return NULL;
+ }
+
+ trace_amdgpu_sched_run_job(job);
+ r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
+ job->sync.last_vm_update, &fence);
+ if (r) {
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
+ goto err;
+ }
+
+err:
+ job->fence = fence;
+ amdgpu_job_free(job);
+ return fence;
+}
+
+struct amd_sched_backend_ops amdgpu_sched_ops = {
+ .dependency = amdgpu_job_dependency,
+ .run_job = amdgpu_job_run,
+};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index e23843f4d877b..7805a8706af7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -447,8 +447,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
dev_info.max_memory_clock = adev->pm.default_mclk * 10;
}
dev_info.enabled_rb_pipes_mask = adev->gfx.config.backend_enable_mask;
- dev_info.num_rb_pipes = adev->gfx.config.max_backends_per_se *
- adev->gfx.config.max_shader_engines;
+ dev_info.num_rb_pipes = adev->gfx.config.num_rbs;
dev_info.num_hw_gfx_contexts = adev->gfx.config.max_hw_contexts;
dev_info._pad = 0;
dev_info.ids_flags = 0;
@@ -727,6 +726,12 @@ int amdgpu_get_vblank_timestamp_kms(struct drm_device *dev, unsigned int pipe,
/* Get associated drm_crtc: */
crtc = &adev->mode_info.crtcs[pipe]->base;
+ if (!crtc) {
+ /* This can occur on driver load if some component fails to
+ * initialize completely and driver is unloaded */
+ DRM_ERROR("Uninitialized crtc %d\n", pipe);
+ return -EINVAL;
+ }
/* Helper routine in DRM core does all the work: */
return drm_calc_vbltimestamp_from_scanoutpos(dev, pipe, max_error,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index d4e2780c07966..d7ec9bd6755fc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -48,8 +48,7 @@ struct amdgpu_mn {
/* protected by adev->mn_lock */
struct hlist_node node;
- /* objects protected by lock */
- struct mutex lock;
+ /* objects protected by mm->mmap_sem */
struct rb_root objects;
};
@@ -73,21 +72,19 @@ static void amdgpu_mn_destroy(struct work_struct *work)
struct amdgpu_bo *bo, *next_bo;
mutex_lock(&adev->mn_lock);
- mutex_lock(&rmn->lock);
+ down_write(&rmn->mm->mmap_sem);
hash_del(&rmn->node);
rbtree_postorder_for_each_entry_safe(node, next_node, &rmn->objects,
it.rb) {
-
- interval_tree_remove(&node->it, &rmn->objects);
list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
bo->mn = NULL;
list_del_init(&bo->mn_list);
}
kfree(node);
}
- mutex_unlock(&rmn->lock);
+ up_write(&rmn->mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
- mmu_notifier_unregister(&rmn->mn, rmn->mm);
+ mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
kfree(rmn);
}
@@ -129,8 +126,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
/* notification is exclusive, but interval is inclusive */
end -= 1;
- mutex_lock(&rmn->lock);
-
it = interval_tree_iter_first(&rmn->objects, start, end);
while (it) {
struct amdgpu_mn_node *node;
@@ -165,8 +160,6 @@ static void amdgpu_mn_invalidate_range_start(struct mmu_notifier *mn,
amdgpu_bo_unreserve(bo);
}
}
-
- mutex_unlock(&rmn->lock);
}
static const struct mmu_notifier_ops amdgpu_mn_ops = {
@@ -187,8 +180,8 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
struct amdgpu_mn *rmn;
int r;
- down_write(&mm->mmap_sem);
mutex_lock(&adev->mn_lock);
+ down_write(&mm->mmap_sem);
hash_for_each_possible(adev->mn_hash, rmn, node, (unsigned long)mm)
if (rmn->mm == mm)
@@ -203,7 +196,6 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
rmn->adev = adev;
rmn->mm = mm;
rmn->mn.ops = &amdgpu_mn_ops;
- mutex_init(&rmn->lock);
rmn->objects = RB_ROOT;
r = __mmu_notifier_register(&rmn->mn, mm);
@@ -213,14 +205,14 @@ static struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev)
hash_add(adev->mn_hash, &rmn->node, (unsigned long)mm);
release_locks:
- mutex_unlock(&adev->mn_lock);
up_write(&mm->mmap_sem);
+ mutex_unlock(&adev->mn_lock);
return rmn;
free_rmn:
- mutex_unlock(&adev->mn_lock);
up_write(&mm->mmap_sem);
+ mutex_unlock(&adev->mn_lock);
kfree(rmn);
return ERR_PTR(r);
@@ -250,7 +242,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
INIT_LIST_HEAD(&bos);
- mutex_lock(&rmn->lock);
+ down_write(&rmn->mm->mmap_sem);
while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
kfree(node);
@@ -264,7 +256,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
if (!node) {
node = kmalloc(sizeof(struct amdgpu_mn_node), GFP_KERNEL);
if (!node) {
- mutex_unlock(&rmn->lock);
+ up_write(&rmn->mm->mmap_sem);
return -ENOMEM;
}
}
@@ -279,7 +271,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
interval_tree_insert(&node->it, &rmn->objects);
- mutex_unlock(&rmn->lock);
+ up_write(&rmn->mm->mmap_sem);
return 0;
}
@@ -298,13 +290,15 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
struct list_head *head;
mutex_lock(&adev->mn_lock);
+
rmn = bo->mn;
if (rmn == NULL) {
mutex_unlock(&adev->mn_lock);
return;
}
- mutex_lock(&rmn->lock);
+ down_write(&rmn->mm->mmap_sem);
+
/* save the next list entry for later */
head = bo->mn_list.next;
@@ -318,6 +312,6 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
kfree(node);
}
- mutex_unlock(&rmn->lock);
+ up_write(&rmn->mm->mmap_sem);
mutex_unlock(&adev->mn_lock);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index fdc1be8550da6..8d432e6901af5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -390,7 +390,6 @@ struct amdgpu_crtc {
struct drm_display_mode native_mode;
u32 pll_id;
/* page flipping */
- struct workqueue_struct *pflip_queue;
struct amdgpu_flip_work *pflip_works;
enum amdgpu_flip_status pflip_status;
int deferred_flip_completion;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index b8fbbd7699e45..151a2d42c639c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -97,9 +97,6 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
amdgpu_update_memory_usage(bo->adev, &bo->tbo.mem, NULL);
- mutex_lock(&bo->adev->gem.mutex);
- list_del_init(&bo->list);
- mutex_unlock(&bo->adev->gem.mutex);
drm_gem_object_release(&bo->gem_base);
amdgpu_bo_unref(&bo->parent);
kfree(bo->metadata);
@@ -254,12 +251,15 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
bo->adev = adev;
INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va);
- bo->initial_domain = domain & (AMDGPU_GEM_DOMAIN_VRAM |
- AMDGPU_GEM_DOMAIN_GTT |
- AMDGPU_GEM_DOMAIN_CPU |
- AMDGPU_GEM_DOMAIN_GDS |
- AMDGPU_GEM_DOMAIN_GWS |
- AMDGPU_GEM_DOMAIN_OA);
+ bo->prefered_domains = domain & (AMDGPU_GEM_DOMAIN_VRAM |
+ AMDGPU_GEM_DOMAIN_GTT |
+ AMDGPU_GEM_DOMAIN_CPU |
+ AMDGPU_GEM_DOMAIN_GDS |
+ AMDGPU_GEM_DOMAIN_GWS |
+ AMDGPU_GEM_DOMAIN_OA);
+ bo->allowed_domains = bo->prefered_domains;
+ if (!kernel && bo->allowed_domains == AMDGPU_GEM_DOMAIN_VRAM)
+ bo->allowed_domains |= AMDGPU_GEM_DOMAIN_GTT;
bo->flags = flags;
@@ -308,7 +308,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
bool is_iomem;
- int r;
+ long r;
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
@@ -319,14 +319,20 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
}
return 0;
}
+
+ r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
- if (r) {
+ if (r)
return r;
- }
+
bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
- if (ptr) {
+ if (ptr)
*ptr = bo->kptr;
- }
+
return 0;
}
@@ -367,7 +373,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
int r, i;
unsigned fpfn, lpfn;
- if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return -EPERM;
if (WARN_ON_ONCE(min_offset > max_offset))
@@ -470,26 +476,6 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM);
}
-void amdgpu_bo_force_delete(struct amdgpu_device *adev)
-{
- struct amdgpu_bo *bo, *n;
-
- if (list_empty(&adev->gem.objects)) {
- return;
- }
- dev_err(adev->dev, "Userspace still has active objects !\n");
- list_for_each_entry_safe(bo, n, &adev->gem.objects, list) {
- dev_err(adev->dev, "%p %p %lu %lu force free\n",
- &bo->gem_base, bo, (unsigned long)bo->gem_base.size,
- *((unsigned long *)&bo->gem_base.refcount));
- mutex_lock(&bo->adev->gem.mutex);
- list_del_init(&bo->list);
- mutex_unlock(&bo->adev->gem.mutex);
- /* this should unref the ttm bo */
- drm_gem_object_unreference_unlocked(&bo->gem_base);
- }
-}
-
int amdgpu_bo_init(struct amdgpu_device *adev)
{
/* Add an MTRR for the VRAM */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 5107fb291bdbe..acc08018c6cc1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -149,7 +149,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
u64 *gpu_addr);
int amdgpu_bo_unpin(struct amdgpu_bo *bo);
int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
-void amdgpu_bo_force_delete(struct amdgpu_device *adev);
int amdgpu_bo_init(struct amdgpu_device *adev);
void amdgpu_bo_fini(struct amdgpu_device *adev);
int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 95a4a25d8df9e..ff9597ce268c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -123,7 +123,9 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
level = amdgpu_dpm_get_performance_level(adev);
return snprintf(buf, PAGE_SIZE, "%s\n",
(level == AMD_DPM_FORCED_LEVEL_AUTO) ? "auto" :
- (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" : "high");
+ (level == AMD_DPM_FORCED_LEVEL_LOW) ? "low" :
+ (level == AMD_DPM_FORCED_LEVEL_HIGH) ? "high" :
+ (level == AMD_DPM_FORCED_LEVEL_MANUAL) ? "manual" : "unknown");
} else {
enum amdgpu_dpm_forced_level level;
@@ -155,6 +157,8 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
level = AMDGPU_DPM_FORCED_LEVEL_HIGH;
} else if (strncmp("auto", buf, strlen("auto")) == 0) {
level = AMDGPU_DPM_FORCED_LEVEL_AUTO;
+ } else if (strncmp("manual", buf, strlen("manual")) == 0) {
+ level = AMDGPU_DPM_FORCED_LEVEL_MANUAL;
} else {
count = -EINVAL;
goto fail;
@@ -180,10 +184,293 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
return count;
}
+static ssize_t amdgpu_get_pp_num_states(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ struct pp_states_info data;
+ int i, buf_len;
+
+ if (adev->pp_enabled)
+ amdgpu_dpm_get_pp_num_states(adev, &data);
+
+ buf_len = snprintf(buf, PAGE_SIZE, "states: %d\n", data.nums);
+ for (i = 0; i < data.nums; i++)
+ buf_len += snprintf(buf + buf_len, PAGE_SIZE, "%d %s\n", i,
+ (data.states[i] == POWER_STATE_TYPE_INTERNAL_BOOT) ? "boot" :
+ (data.states[i] == POWER_STATE_TYPE_BATTERY) ? "battery" :
+ (data.states[i] == POWER_STATE_TYPE_BALANCED) ? "balanced" :
+ (data.states[i] == POWER_STATE_TYPE_PERFORMANCE) ? "performance" : "default");
+
+ return buf_len;
+}
+
+static ssize_t amdgpu_get_pp_cur_state(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ struct pp_states_info data;
+ enum amd_pm_state_type pm = 0;
+ int i = 0;
+
+ if (adev->pp_enabled) {
+
+ pm = amdgpu_dpm_get_current_power_state(adev);
+ amdgpu_dpm_get_pp_num_states(adev, &data);
+
+ for (i = 0; i < data.nums; i++) {
+ if (pm == data.states[i])
+ break;
+ }
+
+ if (i == data.nums)
+ i = -EINVAL;
+ }
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", i);
+}
+
+static ssize_t amdgpu_get_pp_force_state(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ struct pp_states_info data;
+ enum amd_pm_state_type pm = 0;
+ int i;
+
+ if (adev->pp_force_state_enabled && adev->pp_enabled) {
+ pm = amdgpu_dpm_get_current_power_state(adev);
+ amdgpu_dpm_get_pp_num_states(adev, &data);
+
+ for (i = 0; i < data.nums; i++) {
+ if (pm == data.states[i])
+ break;
+ }
+
+ if (i == data.nums)
+ i = -EINVAL;
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", i);
+
+ } else
+ return snprintf(buf, PAGE_SIZE, "\n");
+}
+
+static ssize_t amdgpu_set_pp_force_state(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ enum amd_pm_state_type state = 0;
+ long idx;
+ int ret;
+
+ if (strlen(buf) == 1)
+ adev->pp_force_state_enabled = false;
+ else {
+ ret = kstrtol(buf, 0, &idx);
+
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+
+ if (adev->pp_enabled) {
+ struct pp_states_info data;
+ amdgpu_dpm_get_pp_num_states(adev, &data);
+ state = data.states[idx];
+ /* only set user selected power states */
+ if (state != POWER_STATE_TYPE_INTERNAL_BOOT &&
+ state != POWER_STATE_TYPE_DEFAULT) {
+ amdgpu_dpm_dispatch_task(adev,
+ AMD_PP_EVENT_ENABLE_USER_STATE, &state, NULL);
+ adev->pp_force_state_enabled = true;
+ }
+ }
+ }
+fail:
+ return count;
+}
+
+static ssize_t amdgpu_get_pp_table(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ char *table = NULL;
+ int size, i;
+
+ if (adev->pp_enabled)
+ size = amdgpu_dpm_get_pp_table(adev, &table);
+ else
+ return 0;
+
+ if (size >= PAGE_SIZE)
+ size = PAGE_SIZE - 1;
+
+ for (i = 0; i < size; i++) {
+ sprintf(buf + i, "%02x", table[i]);
+ }
+ sprintf(buf + i, "\n");
+
+ return size;
+}
+
+static ssize_t amdgpu_set_pp_table(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+
+ if (adev->pp_enabled)
+ amdgpu_dpm_set_pp_table(adev, buf, count);
+
+ return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size = 0;
+
+ if (adev->pp_enabled)
+ size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+
+ return size;
+}
+
+static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int ret;
+ long level;
+
+ ret = kstrtol(buf, 0, &level);
+
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+
+ if (adev->pp_enabled)
+ amdgpu_dpm_force_clock_level(adev, PP_SCLK, level);
+fail:
+ return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size = 0;
+
+ if (adev->pp_enabled)
+ size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+
+ return size;
+}
+
+static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int ret;
+ long level;
+
+ ret = kstrtol(buf, 0, &level);
+
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+
+ if (adev->pp_enabled)
+ amdgpu_dpm_force_clock_level(adev, PP_MCLK, level);
+fail:
+ return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ ssize_t size = 0;
+
+ if (adev->pp_enabled)
+ size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+
+ return size;
+}
+
+static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ struct drm_device *ddev = dev_get_drvdata(dev);
+ struct amdgpu_device *adev = ddev->dev_private;
+ int ret;
+ long level;
+
+ ret = kstrtol(buf, 0, &level);
+
+ if (ret) {
+ count = -EINVAL;
+ goto fail;
+ }
+
+ if (adev->pp_enabled)
+ amdgpu_dpm_force_clock_level(adev, PP_PCIE, level);
+fail:
+ return count;
+}
+
static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
amdgpu_get_dpm_forced_performance_level,
amdgpu_set_dpm_forced_performance_level);
+static DEVICE_ATTR(pp_num_states, S_IRUGO, amdgpu_get_pp_num_states, NULL);
+static DEVICE_ATTR(pp_cur_state, S_IRUGO, amdgpu_get_pp_cur_state, NULL);
+static DEVICE_ATTR(pp_force_state, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_force_state,
+ amdgpu_set_pp_force_state);
+static DEVICE_ATTR(pp_table, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_table,
+ amdgpu_set_pp_table);
+static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_dpm_sclk,
+ amdgpu_set_pp_dpm_sclk);
+static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_dpm_mclk,
+ amdgpu_set_pp_dpm_mclk);
+static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
+ amdgpu_get_pp_dpm_pcie,
+ amdgpu_set_pp_dpm_pcie);
static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
struct device_attribute *attr,
@@ -637,14 +924,12 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
amdgpu_dpm_print_power_state(adev, adev->pm.dpm.requested_ps);
}
- mutex_lock(&adev->ring_lock);
-
/* update whether vce is active */
ps->vce_active = adev->pm.dpm.vce_active;
ret = amdgpu_dpm_pre_set_power_state(adev);
if (ret)
- goto done;
+ return;
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
@@ -682,9 +967,6 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)
amdgpu_dpm_force_performance_level(adev, adev->pm.dpm.forced_level);
}
}
-
-done:
- mutex_unlock(&adev->ring_lock);
}
void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
@@ -785,6 +1067,44 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
DRM_ERROR("failed to create device file for dpm state\n");
return ret;
}
+
+ if (adev->pp_enabled) {
+ ret = device_create_file(adev->dev, &dev_attr_pp_num_states);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_num_states\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_pp_cur_state);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_cur_state\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_pp_force_state);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_force_state\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_pp_table);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_table\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+ return ret;
+ }
+ ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+ if (ret) {
+ DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+ return ret;
+ }
+ }
ret = amdgpu_debugfs_pm_init(adev);
if (ret) {
DRM_ERROR("Failed to register debugfs file for dpm!\n");
@@ -802,6 +1122,15 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
hwmon_device_unregister(adev->pm.int_hwmon_dev);
device_remove_file(adev->dev, &dev_attr_power_dpm_state);
device_remove_file(adev->dev, &dev_attr_power_dpm_force_performance_level);
+ if (adev->pp_enabled) {
+ device_remove_file(adev->dev, &dev_attr_pp_num_states);
+ device_remove_file(adev->dev, &dev_attr_pp_cur_state);
+ device_remove_file(adev->dev, &dev_attr_pp_force_state);
+ device_remove_file(adev->dev, &dev_attr_pp_table);
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+ device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+ }
}
void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@@ -817,13 +1146,11 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
int i = 0;
amdgpu_display_bandwidth_update(adev);
- mutex_lock(&adev->ring_lock);
- for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
- struct amdgpu_ring *ring = adev->rings[i];
- if (ring && ring->ready)
- amdgpu_fence_wait_empty(ring);
- }
- mutex_unlock(&adev->ring_lock);
+ for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
+ struct amdgpu_ring *ring = adev->rings[i];
+ if (ring && ring->ready)
+ amdgpu_fence_wait_empty(ring);
+ }
amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_DISPLAY_CONFIG_CHANGE, NULL, NULL);
} else {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 59f735a933a93..be6388f73ba27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -73,10 +73,6 @@ struct drm_gem_object *amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
if (ret)
return ERR_PTR(ret);
- mutex_lock(&adev->gem.mutex);
- list_add_tail(&bo->list, &adev->gem.objects);
- mutex_unlock(&adev->gem.mutex);
-
return &bo->gem_base;
}
@@ -121,7 +117,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
{
struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
- if (amdgpu_ttm_tt_has_userptr(bo->tbo.ttm))
+ if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm))
return ERR_PTR(-EPERM);
return drm_gem_prime_export(dev, gobj, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d1f234dd21261..972eed2ef787e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -48,28 +48,6 @@
*/
static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring);
-/**
- * amdgpu_ring_free_size - update the free size
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- *
- * Update the free dw slots in the ring buffer (all asics).
- */
-void amdgpu_ring_free_size(struct amdgpu_ring *ring)
-{
- uint32_t rptr = amdgpu_ring_get_rptr(ring);
-
- /* This works because ring_size is a power of 2 */
- ring->ring_free_dw = rptr + (ring->ring_size / 4);
- ring->ring_free_dw -= ring->wptr;
- ring->ring_free_dw &= ring->ptr_mask;
- if (!ring->ring_free_dw) {
- /* this is an empty ring */
- ring->ring_free_dw = ring->ring_size / 4;
- }
-}
-
/**
* amdgpu_ring_alloc - allocate space on the ring buffer
*
@@ -82,50 +60,18 @@ void amdgpu_ring_free_size(struct amdgpu_ring *ring)
*/
int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)
{
- int r;
-
- /* make sure we aren't trying to allocate more space than there is on the ring */
- if (ndw > (ring->ring_size / 4))
- return -ENOMEM;
/* Align requested size with padding so unlock_commit can
* pad safely */
- amdgpu_ring_free_size(ring);
ndw = (ndw + ring->align_mask) & ~ring->align_mask;
- while (ndw > (ring->ring_free_dw - 1)) {
- amdgpu_ring_free_size(ring);
- if (ndw < ring->ring_free_dw) {
- break;
- }
- r = amdgpu_fence_wait_next(ring);
- if (r)
- return r;
- }
- ring->count_dw = ndw;
- ring->wptr_old = ring->wptr;
- return 0;
-}
-/**
- * amdgpu_ring_lock - lock the ring and allocate space on it
- *
- * @adev: amdgpu_device pointer
- * @ring: amdgpu_ring structure holding ring information
- * @ndw: number of dwords to allocate in the ring buffer
- *
- * Lock the ring and allocate @ndw dwords in the ring buffer
- * (all asics).
- * Returns 0 on success, error on failure.
- */
-int amdgpu_ring_lock(struct amdgpu_ring *ring, unsigned ndw)
-{
- int r;
+ /* Make sure we aren't trying to allocate more space
+ * than the maximum for one submission
+ */
+ if (WARN_ON_ONCE(ndw > ring->max_dw))
+ return -ENOMEM;
- mutex_lock(ring->ring_lock);
- r = amdgpu_ring_alloc(ring, ndw);
- if (r) {
- mutex_unlock(ring->ring_lock);
- return r;
- }
+ ring->count_dw = ndw;
+ ring->wptr_old = ring->wptr;
return 0;
}
@@ -144,6 +90,19 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
amdgpu_ring_write(ring, ring->nop);
}
+/** amdgpu_ring_generic_pad_ib - pad IB with NOP packets
+ *
+ * @ring: amdgpu_ring structure holding ring information
+ * @ib: IB to add NOP packets to
+ *
+ * This is the generic pad_ib function for rings except SDMA
+ */
+void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
+{
+ while (ib->length_dw & ring->align_mask)
+ ib->ptr[ib->length_dw++] = ring->nop;
+}
+
/**
* amdgpu_ring_commit - tell the GPU to execute the new
* commands on the ring buffer
@@ -167,20 +126,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
amdgpu_ring_set_wptr(ring);
}
-/**
- * amdgpu_ring_unlock_commit - tell the GPU to execute the new
- * commands on the ring buffer and unlock it
- *
- * @ring: amdgpu_ring structure holding ring information
- *
- * Call amdgpu_ring_commit() then unlock the ring (all asics).
- */
-void amdgpu_ring_unlock_commit(struct amdgpu_ring *ring)
-{
- amdgpu_ring_commit(ring);
- mutex_unlock(ring->ring_lock);
-}
-
/**
* amdgpu_ring_undo - reset the wptr
*
@@ -193,19 +138,6 @@ void amdgpu_ring_undo(struct amdgpu_ring *ring)
ring->wptr = ring->wptr_old;
}
-/**
- * amdgpu_ring_unlock_undo - reset the wptr and unlock the ring
- *
- * @ring: amdgpu_ring structure holding ring information
- *
- * Call amdgpu_ring_undo() then unlock the ring (all asics).
- */
-void amdgpu_ring_unlock_undo(struct amdgpu_ring *ring)
-{
- amdgpu_ring_undo(ring);
- mutex_unlock(ring->ring_lock);
-}
-
/**
* amdgpu_ring_backup - Back up the content of a ring
*
@@ -218,43 +150,32 @@ unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
{
unsigned size, ptr, i;
- /* just in case lock the ring */
- mutex_lock(ring->ring_lock);
*data = NULL;
- if (ring->ring_obj == NULL) {
- mutex_unlock(ring->ring_lock);
+ if (ring->ring_obj == NULL)
return 0;
- }
/* it doesn't make sense to save anything if all fences are signaled */
- if (!amdgpu_fence_count_emitted(ring)) {
- mutex_unlock(ring->ring_lock);
+ if (!amdgpu_fence_count_emitted(ring))
return 0;
- }
ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
size = ring->wptr + (ring->ring_size / 4);
size -= ptr;
size &= ring->ptr_mask;
- if (size == 0) {
- mutex_unlock(ring->ring_lock);
+ if (size == 0)
return 0;
- }
/* and then save the content of the ring */
*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
- if (!*data) {
- mutex_unlock(ring->ring_lock);
+ if (!*data)
return 0;
- }
for (i = 0; i < size; ++i) {
(*data)[i] = ring->ring[ptr++];
ptr &= ring->ptr_mask;
}
- mutex_unlock(ring->ring_lock);
return size;
}
@@ -276,7 +197,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
return 0;
/* restore the saved ring content */
- r = amdgpu_ring_lock(ring, size);
+ r = amdgpu_ring_alloc(ring, size);
if (r)
return r;
@@ -284,7 +205,7 @@ int amdgpu_ring_restore(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, data[i]);
}
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
kfree(data);
return 0;
}
@@ -315,7 +236,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
- r = amdgpu_fence_driver_init_ring(ring);
+ r = amdgpu_fence_driver_init_ring(ring,
+ amdgpu_sched_hw_submission);
if (r)
return r;
}
@@ -352,7 +274,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
return r;
}
- ring->ring_lock = &adev->ring_lock;
/* Align ring size */
rb_bufsz = order_base_2(ring_size / 8);
ring_size = (1 << (rb_bufsz + 1)) * 4;
@@ -389,7 +310,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
}
}
ring->ptr_mask = (ring->ring_size / 4) - 1;
- ring->ring_free_dw = ring->ring_size / 4;
+ ring->max_dw = DIV_ROUND_UP(ring->ring_size / 4,
+ amdgpu_sched_hw_submission);
if (amdgpu_debugfs_ring_init(adev, ring)) {
DRM_ERROR("Failed to register debugfs file for rings !\n");
@@ -410,15 +332,10 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
int r;
struct amdgpu_bo *ring_obj;
- if (ring->ring_lock == NULL)
- return;
-
- mutex_lock(ring->ring_lock);
ring_obj = ring->ring_obj;
ring->ready = false;
ring->ring = NULL;
ring->ring_obj = NULL;
- mutex_unlock(ring->ring_lock);
amdgpu_wb_free(ring->adev, ring->fence_offs);
amdgpu_wb_free(ring->adev, ring->rptr_offs);
@@ -436,30 +353,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
}
}
-/**
- * amdgpu_ring_from_fence - get ring from fence
- *
- * @f: fence structure
- *
- * Extract the ring a fence belongs to. Handles both scheduler as
- * well as hardware fences.
- */
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
-{
- struct amdgpu_fence *a_fence;
- struct amd_sched_fence *s_fence;
-
- s_fence = to_amd_sched_fence(f);
- if (s_fence)
- return container_of(s_fence->sched, struct amdgpu_ring, sched);
-
- a_fence = to_amdgpu_fence(f);
- if (a_fence)
- return a_fence->ring;
-
- return NULL;
-}
-
/*
* Debugfs info
*/
@@ -474,29 +367,18 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
uint32_t rptr, wptr, rptr_next;
- unsigned count, i, j;
-
- amdgpu_ring_free_size(ring);
- count = (ring->ring_size / 4) - ring->ring_free_dw;
+ unsigned i;
wptr = amdgpu_ring_get_wptr(ring);
- seq_printf(m, "wptr: 0x%08x [%5d]\n",
- wptr, wptr);
+ seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
rptr = amdgpu_ring_get_rptr(ring);
- seq_printf(m, "rptr: 0x%08x [%5d]\n",
- rptr, rptr);
-
rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
+ seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
+
seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
ring->wptr, ring->wptr);
- seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
- ring->last_semaphore_signal_addr);
- seq_printf(m, "last semaphore wait addr : 0x%016llx\n",
- ring->last_semaphore_wait_addr);
- seq_printf(m, "%u free dwords in ring\n", ring->ring_free_dw);
- seq_printf(m, "%u dwords in ring\n", count);
if (!ring->ready)
return 0;
@@ -505,11 +387,20 @@ static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
* packet that is the root issue
*/
i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
- for (j = 0; j <= (count + 32); j++) {
+ while (i != rptr) {
+ seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
+ if (i == rptr)
+ seq_puts(m, " *");
+ if (i == rptr_next)
+ seq_puts(m, " #");
+ seq_puts(m, "\n");
+ i = (i + 1) & ring->ptr_mask;
+ }
+ while (i != wptr) {
seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
- if (rptr == i)
+ if (i == rptr)
seq_puts(m, " *");
- if (rptr_next == i)
+ if (i == rptr_next)
seq_puts(m, " #");
seq_puts(m, "\n");
i = (i + 1) & ring->ptr_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index ca72a2e487b9b..8bf84efafb049 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -60,9 +60,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
sa_manager->align = align;
sa_manager->hole = &sa_manager->olist;
INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
INIT_LIST_HEAD(&sa_manager->flist[i]);
- }
r = amdgpu_bo_create(adev, size, align, true, domain,
0, NULL, NULL, &sa_manager->bo);
@@ -228,11 +227,9 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
unsigned soffset, eoffset, wasted;
int i;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- if (!list_empty(&sa_manager->flist[i])) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
+ if (!list_empty(&sa_manager->flist[i]))
return true;
- }
- }
soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
@@ -265,12 +262,11 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
/* go over all fence list and try to find the closest sa_bo
* of the current last
*/
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
struct amdgpu_sa_bo *sa_bo;
- if (list_empty(&sa_manager->flist[i])) {
+ if (list_empty(&sa_manager->flist[i]))
continue;
- }
sa_bo = list_first_entry(&sa_manager->flist[i],
struct amdgpu_sa_bo, flist);
@@ -299,7 +295,9 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
}
if (best_bo) {
- uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
+ uint32_t idx = best_bo->fence->context;
+
+ idx %= AMDGPU_SA_NUM_FENCE_LISTS;
++tries[idx];
sa_manager->hole = best_bo->olist.prev;
@@ -315,14 +313,17 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align)
{
- struct fence *fences[AMDGPU_MAX_RINGS];
- unsigned tries[AMDGPU_MAX_RINGS];
+ struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
+ unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
unsigned count;
int i, r;
signed long t;
- BUG_ON(align > sa_manager->align);
- BUG_ON(size > sa_manager->size);
+ if (WARN_ON_ONCE(align > sa_manager->align))
+ return -EINVAL;
+
+ if (WARN_ON_ONCE(size > sa_manager->size))
+ return -EINVAL;
*sa_bo = kmalloc(sizeof(struct amdgpu_sa_bo), GFP_KERNEL);
if ((*sa_bo) == NULL) {
@@ -335,7 +336,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
spin_lock(&sa_manager->wq.lock);
do {
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
fences[i] = NULL;
tries[i] = 0;
}
@@ -352,7 +353,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
/* see if we can skip over some allocations */
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
- for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+ for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
if (fences[i])
fences[count++] = fence_get(fences[i]);
@@ -394,8 +395,9 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
spin_lock(&sa_manager->wq.lock);
if (fence && !fence_is_signaled(fence)) {
uint32_t idx;
+
(*sa_bo)->fence = fence_get(fence);
- idx = amdgpu_ring_from_fence(fence)->idx;
+ idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
} else {
amdgpu_sa_bo_remove_locked(*sa_bo);
@@ -407,25 +409,6 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
#if defined(CONFIG_DEBUG_FS)
-static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
-{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
- struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
-
- if (a_fence)
- seq_printf(m, " protected by 0x%016llx on ring %d",
- a_fence->seq, a_fence->ring->idx);
-
- if (s_fence) {
- struct amdgpu_ring *ring;
-
-
- ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
- seq_printf(m, " protected by 0x%016x on ring %d",
- s_fence->base.seqno, ring->idx);
- }
-}
-
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
@@ -442,8 +425,11 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
}
seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
soffset, eoffset, eoffset - soffset);
+
if (i->fence)
- amdgpu_sa_bo_dump_fence(i->fence, m);
+ seq_printf(m, " protected by 0x%08x on context %d",
+ i->fence->seqno, i->fence->context);
+
seq_printf(m, "\n");
}
spin_unlock(&sa_manager->wq.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
deleted file mode 100644
index 438c052546955..0000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * Copyright 2015 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- *
- */
-#include
-#include
-#include
-#include
-#include "amdgpu.h"
-#include "amdgpu_trace.h"
-
-static struct fence *amdgpu_sched_dependency(struct amd_sched_job *sched_job)
-{
- struct amdgpu_job *job = to_amdgpu_job(sched_job);
- return amdgpu_sync_get_fence(&job->ibs->sync);
-}
-
-static struct fence *amdgpu_sched_run_job(struct amd_sched_job *sched_job)
-{
- struct amdgpu_fence *fence = NULL;
- struct amdgpu_job *job;
- int r;
-
- if (!sched_job) {
- DRM_ERROR("job is null\n");
- return NULL;
- }
- job = to_amdgpu_job(sched_job);
- trace_amdgpu_sched_run_job(job);
- r = amdgpu_ib_schedule(job->adev, job->num_ibs, job->ibs, job->owner);
- if (r) {
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
- goto err;
- }
-
- fence = job->ibs[job->num_ibs - 1].fence;
- fence_get(&fence->base);
-
-err:
- if (job->free_job)
- job->free_job(job);
-
- kfree(job);
- return fence ? &fence->base : NULL;
-}
-
-struct amd_sched_backend_ops amdgpu_sched_ops = {
- .dependency = amdgpu_sched_dependency,
- .run_job = amdgpu_sched_run_job,
-};
-
-int amdgpu_sched_ib_submit_kernel_helper(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct amdgpu_ib *ibs,
- unsigned num_ibs,
- int (*free_job)(struct amdgpu_job *),
- void *owner,
- struct fence **f)
-{
- int r = 0;
- if (amdgpu_enable_scheduler) {
- struct amdgpu_job *job =
- kzalloc(sizeof(struct amdgpu_job), GFP_KERNEL);
- if (!job)
- return -ENOMEM;
- job->base.sched = &ring->sched;
- job->base.s_entity = &adev->kernel_ctx.rings[ring->idx].entity;
- job->base.s_fence = amd_sched_fence_create(job->base.s_entity, owner);
- if (!job->base.s_fence) {
- kfree(job);
- return -ENOMEM;
- }
- *f = fence_get(&job->base.s_fence->base);
-
- job->adev = adev;
- job->ibs = ibs;
- job->num_ibs = num_ibs;
- job->owner = owner;
- job->free_job = free_job;
- amd_sched_entity_push_job(&job->base);
- } else {
- r = amdgpu_ib_schedule(adev, num_ibs, ibs, owner);
- if (r)
- return r;
- *f = fence_get(&ibs[num_ibs - 1].fence->base);
- }
-
- return 0;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
deleted file mode 100644
index 1caaf201b708a..0000000000000
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_semaphore.c
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright 2011 Christian König.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- *
- */
-/*
- * Authors:
- * Christian König
- */
-#include
-#include "amdgpu.h"
-#include "amdgpu_trace.h"
-
-int amdgpu_semaphore_create(struct amdgpu_device *adev,
- struct amdgpu_semaphore **semaphore)
-{
- int r;
-
- *semaphore = kmalloc(sizeof(struct amdgpu_semaphore), GFP_KERNEL);
- if (*semaphore == NULL) {
- return -ENOMEM;
- }
- r = amdgpu_sa_bo_new(&adev->ring_tmp_bo,
- &(*semaphore)->sa_bo, 8, 8);
- if (r) {
- kfree(*semaphore);
- *semaphore = NULL;
- return r;
- }
- (*semaphore)->waiters = 0;
- (*semaphore)->gpu_addr = amdgpu_sa_bo_gpu_addr((*semaphore)->sa_bo);
-
- *((uint64_t *)amdgpu_sa_bo_cpu_addr((*semaphore)->sa_bo)) = 0;
-
- return 0;
-}
-
-bool amdgpu_semaphore_emit_signal(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore)
-{
- trace_amdgpu_semaphore_signale(ring->idx, semaphore);
-
- if (amdgpu_ring_emit_semaphore(ring, semaphore, false)) {
- --semaphore->waiters;
-
- /* for debugging lockup only, used by sysfs debug files */
- ring->last_semaphore_signal_addr = semaphore->gpu_addr;
- return true;
- }
- return false;
-}
-
-bool amdgpu_semaphore_emit_wait(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore)
-{
- trace_amdgpu_semaphore_wait(ring->idx, semaphore);
-
- if (amdgpu_ring_emit_semaphore(ring, semaphore, true)) {
- ++semaphore->waiters;
-
- /* for debugging lockup only, used by sysfs debug files */
- ring->last_semaphore_wait_addr = semaphore->gpu_addr;
- return true;
- }
- return false;
-}
-
-void amdgpu_semaphore_free(struct amdgpu_device *adev,
- struct amdgpu_semaphore **semaphore,
- struct fence *fence)
-{
- if (semaphore == NULL || *semaphore == NULL) {
- return;
- }
- if ((*semaphore)->waiters > 0) {
- dev_err(adev->dev, "semaphore %p has more waiters than signalers,"
- " hardware lockup imminent!\n", *semaphore);
- }
- amdgpu_sa_bo_free(adev, &(*semaphore)->sa_bo, fence);
- kfree(*semaphore);
- *semaphore = NULL;
-}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 181ce39ef5e59..c48b4fce5e57a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -37,6 +37,8 @@ struct amdgpu_sync_entry {
struct fence *fence;
};
+static struct kmem_cache *amdgpu_sync_slab;
+
/**
* amdgpu_sync_create - zero init sync object
*
@@ -46,26 +48,22 @@ struct amdgpu_sync_entry {
*/
void amdgpu_sync_create(struct amdgpu_sync *sync)
{
- unsigned i;
-
- for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
- sync->semaphores[i] = NULL;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- sync->sync_to[i] = NULL;
-
hash_init(sync->fences);
sync->last_vm_update = NULL;
}
+/**
+ * amdgpu_sync_same_dev - test if fence belong to us
+ *
+ * @adev: amdgpu device to use for the test
+ * @f: fence to test
+ *
+ * Test if the fence was issued by us.
+ */
static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
- if (a_fence)
- return a_fence->ring->adev == adev;
-
if (s_fence) {
struct amdgpu_ring *ring;
@@ -76,17 +74,31 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
return false;
}
-static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
+/**
+ * amdgpu_sync_get_owner - extract the owner of a fence
+ *
+ * @fence: fence get the owner from
+ *
+ * Extract who originally created the fence.
+ */
+static void *amdgpu_sync_get_owner(struct fence *f)
{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
if (s_fence)
- return s_fence->owner == owner;
- if (a_fence)
- return a_fence->owner == owner;
- return false;
+ return s_fence->owner;
+
+ return AMDGPU_FENCE_OWNER_UNDEFINED;
}
+/**
+ * amdgpu_sync_keep_later - Keep the later fence
+ *
+ * @keep: existing fence to test
+ * @fence: new fence
+ *
+ * Either keep the existing fence or the new one, depending which one is later.
+ */
static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
{
if (*keep && fence_is_later(*keep, fence))
@@ -107,59 +119,39 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
struct fence *f)
{
struct amdgpu_sync_entry *e;
- struct amdgpu_fence *fence;
if (!f)
return 0;
if (amdgpu_sync_same_dev(adev, f) &&
- amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+ amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
amdgpu_sync_keep_later(&sync->last_vm_update, f);
- fence = to_amdgpu_fence(f);
- if (!fence || fence->ring->adev != adev) {
- hash_for_each_possible(sync->fences, e, node, f->context) {
- if (unlikely(e->fence->context != f->context))
- continue;
-
- amdgpu_sync_keep_later(&e->fence, f);
- return 0;
- }
-
- e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
- if (!e)
- return -ENOMEM;
+ hash_for_each_possible(sync->fences, e, node, f->context) {
+ if (unlikely(e->fence->context != f->context))
+ continue;
- hash_add(sync->fences, &e->node, f->context);
- e->fence = fence_get(f);
+ amdgpu_sync_keep_later(&e->fence, f);
return 0;
}
- amdgpu_sync_keep_later(&sync->sync_to[fence->ring->idx], f);
+ e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
+ if (!e)
+ return -ENOMEM;
+ hash_add(sync->fences, &e->node, f->context);
+ e->fence = fence_get(f);
return 0;
}
-static void *amdgpu_sync_get_owner(struct fence *f)
-{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
- struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
-
- if (s_fence)
- return s_fence->owner;
- else if (a_fence)
- return a_fence->owner;
- return AMDGPU_FENCE_OWNER_UNDEFINED;
-}
-
/**
- * amdgpu_sync_resv - use the semaphores to sync to a reservation object
+ * amdgpu_sync_resv - sync to a reservation object
*
* @sync: sync object to add fences from reservation object to
* @resv: reservation object with embedded fence
* @shared: true if we should only sync to the exclusive fence
*
- * Sync to the fence using the semaphore objects
+ * Sync to the fence
*/
int amdgpu_sync_resv(struct amdgpu_device *adev,
struct amdgpu_sync *sync,
@@ -224,7 +216,7 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
f = e->fence;
hash_del(&e->node);
- kfree(e);
+ kmem_cache_free(amdgpu_sync_slab, e);
if (!fence_is_signaled(f))
return f;
@@ -247,109 +239,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
hash_del(&e->node);
fence_put(e->fence);
- kfree(e);
- }
-
- if (amdgpu_enable_semaphores)
- return 0;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct fence *fence = sync->sync_to[i];
- if (!fence)
- continue;
-
- r = fence_wait(fence, false);
- if (r)
- return r;
- }
-
- return 0;
-}
-
-/**
- * amdgpu_sync_rings - sync ring to all registered fences
- *
- * @sync: sync object to use
- * @ring: ring that needs sync
- *
- * Ensure that all registered fences are signaled before letting
- * the ring continue. The caller must hold the ring lock.
- */
-int amdgpu_sync_rings(struct amdgpu_sync *sync,
- struct amdgpu_ring *ring)
-{
- struct amdgpu_device *adev = ring->adev;
- unsigned count = 0;
- int i, r;
-
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- struct amdgpu_ring *other = adev->rings[i];
- struct amdgpu_semaphore *semaphore;
- struct amdgpu_fence *fence;
-
- if (!sync->sync_to[i])
- continue;
-
- fence = to_amdgpu_fence(sync->sync_to[i]);
-
- /* check if we really need to sync */
- if (!amdgpu_enable_scheduler &&
- !amdgpu_fence_need_sync(fence, ring))
- continue;
-
- /* prevent GPU deadlocks */
- if (!other->ready) {
- dev_err(adev->dev, "Syncing to a disabled ring!");
- return -EINVAL;
- }
-
- if (amdgpu_enable_scheduler || !amdgpu_enable_semaphores) {
- r = fence_wait(sync->sync_to[i], true);
- if (r)
- return r;
- continue;
- }
-
- if (count >= AMDGPU_NUM_SYNCS) {
- /* not enough room, wait manually */
- r = fence_wait(&fence->base, false);
- if (r)
- return r;
- continue;
- }
- r = amdgpu_semaphore_create(adev, &semaphore);
- if (r)
- return r;
-
- sync->semaphores[count++] = semaphore;
-
- /* allocate enough space for sync command */
- r = amdgpu_ring_alloc(other, 16);
- if (r)
- return r;
-
- /* emit the signal semaphore */
- if (!amdgpu_semaphore_emit_signal(other, semaphore)) {
- /* signaling wasn't successful wait manually */
- amdgpu_ring_undo(other);
- r = fence_wait(&fence->base, false);
- if (r)
- return r;
- continue;
- }
-
- /* we assume caller has already allocated space on waiters ring */
- if (!amdgpu_semaphore_emit_wait(ring, semaphore)) {
- /* waiting wasn't successful wait manually */
- amdgpu_ring_undo(other);
- r = fence_wait(&fence->base, false);
- if (r)
- return r;
- continue;
- }
-
- amdgpu_ring_commit(other);
- amdgpu_fence_note_sync(fence, ring);
+ kmem_cache_free(amdgpu_sync_slab, e);
}
return 0;
@@ -358,15 +248,11 @@ int amdgpu_sync_rings(struct amdgpu_sync *sync,
/**
* amdgpu_sync_free - free the sync object
*
- * @adev: amdgpu_device pointer
* @sync: sync object to use
- * @fence: fence to use for the free
*
- * Free the sync object by freeing all semaphores in it.
+ * Free the sync object.
*/
-void amdgpu_sync_free(struct amdgpu_device *adev,
- struct amdgpu_sync *sync,
- struct fence *fence)
+void amdgpu_sync_free(struct amdgpu_sync *sync)
{
struct amdgpu_sync_entry *e;
struct hlist_node *tmp;
@@ -375,14 +261,34 @@ void amdgpu_sync_free(struct amdgpu_device *adev,
hash_for_each_safe(sync->fences, i, tmp, e, node) {
hash_del(&e->node);
fence_put(e->fence);
- kfree(e);
+ kmem_cache_free(amdgpu_sync_slab, e);
}
- for (i = 0; i < AMDGPU_NUM_SYNCS; ++i)
- amdgpu_semaphore_free(adev, &sync->semaphores[i], fence);
+ fence_put(sync->last_vm_update);
+}
+
+/**
+ * amdgpu_sync_init - init sync object subsystem
+ *
+ * Allocate the slab allocator.
+ */
+int amdgpu_sync_init(void)
+{
+ amdgpu_sync_slab = kmem_cache_create(
+ "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_sync_slab)
+ return -ENOMEM;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
- fence_put(sync->sync_to[i]);
+ return 0;
+}
- fence_put(sync->last_vm_update);
+/**
+ * amdgpu_sync_fini - fini sync object subsystem
+ *
+ * Free the slab allocator.
+ */
+void amdgpu_sync_fini(void)
+{
+ kmem_cache_destroy(amdgpu_sync_slab);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
index 4865615e9c066..05a53f4fc3349 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@@ -238,144 +238,10 @@ void amdgpu_test_moves(struct amdgpu_device *adev)
amdgpu_do_test_moves(adev);
}
-static int amdgpu_test_create_and_emit_fence(struct amdgpu_device *adev,
- struct amdgpu_ring *ring,
- struct fence **fence)
-{
- uint32_t handle = ring->idx ^ 0xdeafbeef;
- int r;
-
- if (ring == &adev->uvd.ring) {
- r = amdgpu_uvd_get_create_msg(ring, handle, NULL);
- if (r) {
- DRM_ERROR("Failed to get dummy create msg\n");
- return r;
- }
-
- r = amdgpu_uvd_get_destroy_msg(ring, handle, fence);
- if (r) {
- DRM_ERROR("Failed to get dummy destroy msg\n");
- return r;
- }
-
- } else if (ring == &adev->vce.ring[0] ||
- ring == &adev->vce.ring[1]) {
- r = amdgpu_vce_get_create_msg(ring, handle, NULL);
- if (r) {
- DRM_ERROR("Failed to get dummy create msg\n");
- return r;
- }
-
- r = amdgpu_vce_get_destroy_msg(ring, handle, fence);
- if (r) {
- DRM_ERROR("Failed to get dummy destroy msg\n");
- return r;
- }
- } else {
- struct amdgpu_fence *a_fence = NULL;
- r = amdgpu_ring_lock(ring, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring A %d\n", ring->idx);
- return r;
- }
- amdgpu_fence_emit(ring, AMDGPU_FENCE_OWNER_UNDEFINED, &a_fence);
- amdgpu_ring_unlock_commit(ring);
- *fence = &a_fence->base;
- }
- return 0;
-}
-
void amdgpu_test_ring_sync(struct amdgpu_device *adev,
struct amdgpu_ring *ringA,
struct amdgpu_ring *ringB)
{
- struct fence *fence1 = NULL, *fence2 = NULL;
- struct amdgpu_semaphore *semaphore = NULL;
- int r;
-
- r = amdgpu_semaphore_create(adev, &semaphore);
- if (r) {
- DRM_ERROR("Failed to create semaphore\n");
- goto out_cleanup;
- }
-
- r = amdgpu_ring_lock(ringA, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_wait(ringA, semaphore);
- amdgpu_ring_unlock_commit(ringA);
-
- r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence1);
- if (r)
- goto out_cleanup;
-
- r = amdgpu_ring_lock(ringA, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_wait(ringA, semaphore);
- amdgpu_ring_unlock_commit(ringA);
-
- r = amdgpu_test_create_and_emit_fence(adev, ringA, &fence2);
- if (r)
- goto out_cleanup;
-
- mdelay(1000);
-
- if (fence_is_signaled(fence1)) {
- DRM_ERROR("Fence 1 signaled without waiting for semaphore.\n");
- goto out_cleanup;
- }
-
- r = amdgpu_ring_lock(ringB, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring B %p\n", ringB);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_signal(ringB, semaphore);
- amdgpu_ring_unlock_commit(ringB);
-
- r = fence_wait(fence1, false);
- if (r) {
- DRM_ERROR("Failed to wait for sync fence 1\n");
- goto out_cleanup;
- }
-
- mdelay(1000);
-
- if (fence_is_signaled(fence2)) {
- DRM_ERROR("Fence 2 signaled without waiting for semaphore.\n");
- goto out_cleanup;
- }
-
- r = amdgpu_ring_lock(ringB, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring B %p\n", ringB);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_signal(ringB, semaphore);
- amdgpu_ring_unlock_commit(ringB);
-
- r = fence_wait(fence2, false);
- if (r) {
- DRM_ERROR("Failed to wait for sync fence 1\n");
- goto out_cleanup;
- }
-
-out_cleanup:
- amdgpu_semaphore_free(adev, &semaphore, NULL);
-
- if (fence1)
- fence_put(fence1);
-
- if (fence2)
- fence_put(fence2);
-
- if (r)
- printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
}
static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
@@ -383,109 +249,6 @@ static void amdgpu_test_ring_sync2(struct amdgpu_device *adev,
struct amdgpu_ring *ringB,
struct amdgpu_ring *ringC)
{
- struct fence *fenceA = NULL, *fenceB = NULL;
- struct amdgpu_semaphore *semaphore = NULL;
- bool sigA, sigB;
- int i, r;
-
- r = amdgpu_semaphore_create(adev, &semaphore);
- if (r) {
- DRM_ERROR("Failed to create semaphore\n");
- goto out_cleanup;
- }
-
- r = amdgpu_ring_lock(ringA, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring A %d\n", ringA->idx);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_wait(ringA, semaphore);
- amdgpu_ring_unlock_commit(ringA);
-
- r = amdgpu_test_create_and_emit_fence(adev, ringA, &fenceA);
- if (r)
- goto out_cleanup;
-
- r = amdgpu_ring_lock(ringB, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring B %d\n", ringB->idx);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_wait(ringB, semaphore);
- amdgpu_ring_unlock_commit(ringB);
- r = amdgpu_test_create_and_emit_fence(adev, ringB, &fenceB);
- if (r)
- goto out_cleanup;
-
- mdelay(1000);
-
- if (fence_is_signaled(fenceA)) {
- DRM_ERROR("Fence A signaled without waiting for semaphore.\n");
- goto out_cleanup;
- }
- if (fence_is_signaled(fenceB)) {
- DRM_ERROR("Fence B signaled without waiting for semaphore.\n");
- goto out_cleanup;
- }
-
- r = amdgpu_ring_lock(ringC, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring B %p\n", ringC);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_signal(ringC, semaphore);
- amdgpu_ring_unlock_commit(ringC);
-
- for (i = 0; i < 30; ++i) {
- mdelay(100);
- sigA = fence_is_signaled(fenceA);
- sigB = fence_is_signaled(fenceB);
- if (sigA || sigB)
- break;
- }
-
- if (!sigA && !sigB) {
- DRM_ERROR("Neither fence A nor B has been signaled\n");
- goto out_cleanup;
- } else if (sigA && sigB) {
- DRM_ERROR("Both fence A and B has been signaled\n");
- goto out_cleanup;
- }
-
- DRM_INFO("Fence %c was first signaled\n", sigA ? 'A' : 'B');
-
- r = amdgpu_ring_lock(ringC, 64);
- if (r) {
- DRM_ERROR("Failed to lock ring B %p\n", ringC);
- goto out_cleanup;
- }
- amdgpu_semaphore_emit_signal(ringC, semaphore);
- amdgpu_ring_unlock_commit(ringC);
-
- mdelay(1000);
-
- r = fence_wait(fenceA, false);
- if (r) {
- DRM_ERROR("Failed to wait for sync fence A\n");
- goto out_cleanup;
- }
- r = fence_wait(fenceB, false);
- if (r) {
- DRM_ERROR("Failed to wait for sync fence B\n");
- goto out_cleanup;
- }
-
-out_cleanup:
- amdgpu_semaphore_free(adev, &semaphore, NULL);
-
- if (fenceA)
- fence_put(fenceA);
-
- if (fenceB)
- fence_put(fenceB);
-
- if (r)
- printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
}
static bool amdgpu_test_sync_possible(struct amdgpu_ring *ringA,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 8f9834ab1bd5a..26a5f4acf5840 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -38,10 +38,10 @@ TRACE_EVENT(amdgpu_cs,
TP_fast_assign(
__entry->bo_list = p->bo_list;
- __entry->ring = p->ibs[i].ring->idx;
- __entry->dw = p->ibs[i].length_dw;
+ __entry->ring = p->job->ring->idx;
+ __entry->dw = p->job->ibs[i].length_dw;
__entry->fences = amdgpu_fence_count_emitted(
- p->ibs[i].ring);
+ p->job->ring);
),
TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
__entry->bo_list, __entry->ring, __entry->dw,
@@ -65,7 +65,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
- __entry->ring_name = job->ibs[0].ring->name;
+ __entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -90,7 +90,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
__entry->sched_job = &job->base;
__entry->ib = job->ibs;
__entry->fence = &job->base.s_fence->base;
- __entry->ring_name = job->ibs[0].ring->name;
+ __entry->ring_name = job->ring->name;
__entry->num_ibs = job->num_ibs;
),
TP_printk("adev=%p, sched_job=%p, first ib=%p, sched fence=%p, ring name:%s, num_ibs:%u",
@@ -100,18 +100,24 @@ TRACE_EVENT(amdgpu_sched_run_job,
TRACE_EVENT(amdgpu_vm_grab_id,
- TP_PROTO(unsigned vmid, int ring),
- TP_ARGS(vmid, ring),
+ TP_PROTO(struct amdgpu_vm *vm, int ring, unsigned vmid,
+ uint64_t pd_addr),
+ TP_ARGS(vm, ring, vmid, pd_addr),
TP_STRUCT__entry(
- __field(u32, vmid)
+ __field(struct amdgpu_vm *, vm)
__field(u32, ring)
+ __field(u32, vmid)
+ __field(u64, pd_addr)
),
TP_fast_assign(
- __entry->vmid = vmid;
+ __entry->vm = vm;
__entry->ring = ring;
+ __entry->vmid = vmid;
+ __entry->pd_addr = pd_addr;
),
- TP_printk("vmid=%u, ring=%u", __entry->vmid, __entry->ring)
+ TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx", __entry->vm,
+ __entry->ring, __entry->vmid, __entry->pd_addr)
);
TRACE_EVENT(amdgpu_vm_bo_map,
@@ -228,8 +234,8 @@ TRACE_EVENT(amdgpu_vm_flush,
__entry->ring = ring;
__entry->id = id;
),
- TP_printk("pd_addr=%010Lx, ring=%u, id=%u",
- __entry->pd_addr, __entry->ring, __entry->id)
+ TP_printk("ring=%u, id=%u, pd_addr=%010Lx",
+ __entry->ring, __entry->id, __entry->pd_addr)
);
TRACE_EVENT(amdgpu_bo_list_set,
@@ -247,42 +253,6 @@ TRACE_EVENT(amdgpu_bo_list_set,
TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
);
-DECLARE_EVENT_CLASS(amdgpu_semaphore_request,
-
- TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
- TP_ARGS(ring, sem),
-
- TP_STRUCT__entry(
- __field(int, ring)
- __field(signed, waiters)
- __field(uint64_t, gpu_addr)
- ),
-
- TP_fast_assign(
- __entry->ring = ring;
- __entry->waiters = sem->waiters;
- __entry->gpu_addr = sem->gpu_addr;
- ),
-
- TP_printk("ring=%u, waiters=%d, addr=%010Lx", __entry->ring,
- __entry->waiters, __entry->gpu_addr)
-);
-
-DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_signale,
-
- TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
- TP_ARGS(ring, sem)
-);
-
-DEFINE_EVENT(amdgpu_semaphore_request, amdgpu_semaphore_wait,
-
- TP_PROTO(int ring, struct amdgpu_semaphore *sem),
-
- TP_ARGS(ring, sem)
-);
-
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7b82e57aa09cb..ab34190859a8d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -77,6 +77,8 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
{
struct drm_global_reference *global_ref;
+ struct amdgpu_ring *ring;
+ struct amd_sched_rq *rq;
int r;
adev->mman.mem_global_referenced = false;
@@ -106,13 +108,27 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
return r;
}
+ ring = adev->mman.buffer_funcs_ring;
+ rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
+ r = amd_sched_entity_init(&ring->sched, &adev->mman.entity,
+ rq, amdgpu_sched_jobs);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up TTM BO move run queue.\n");
+ drm_global_item_unref(&adev->mman.mem_global_ref);
+ drm_global_item_unref(&adev->mman.bo_global_ref.ref);
+ return r;
+ }
+
adev->mman.mem_global_referenced = true;
+
return 0;
}
static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
{
if (adev->mman.mem_global_referenced) {
+ amd_sched_entity_fini(adev->mman.entity.sched,
+ &adev->mman.entity);
drm_global_item_unref(&adev->mman.bo_global_ref.ref);
drm_global_item_unref(&adev->mman.mem_global_ref);
adev->mman.mem_global_referenced = false;
@@ -478,32 +494,32 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
/*
* TTM backend functions.
*/
+struct amdgpu_ttm_gup_task_list {
+ struct list_head list;
+ struct task_struct *task;
+};
+
struct amdgpu_ttm_tt {
- struct ttm_dma_tt ttm;
- struct amdgpu_device *adev;
- u64 offset;
- uint64_t userptr;
- struct mm_struct *usermm;
- uint32_t userflags;
+ struct ttm_dma_tt ttm;
+ struct amdgpu_device *adev;
+ u64 offset;
+ uint64_t userptr;
+ struct mm_struct *usermm;
+ uint32_t userflags;
+ spinlock_t guptasklock;
+ struct list_head guptasks;
+ atomic_t mmu_invalidations;
};
-/* prepare the sg table with the user pages */
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
- struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned pinned = 0, nents;
- int r;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
- enum dma_data_direction direction = write ?
- DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
-
- if (current->mm != gtt->usermm)
- return -EPERM;
+ unsigned pinned = 0;
+ int r;
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /* check that we only pin down anonymous memory
+ /* check that we only use anonymous memory
to prevent problems with writeback */
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
@@ -516,9 +532,20 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
do {
unsigned num_pages = ttm->num_pages - pinned;
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
- struct page **pages = ttm->pages + pinned;
+ struct page **p = pages + pinned;
+ struct amdgpu_ttm_gup_task_list guptask;
+
+ guptask.task = current;
+ spin_lock(>t->guptasklock);
+ list_add(&guptask.list, >t->guptasks);
+ spin_unlock(>t->guptasklock);
+
+ r = get_user_pages(userptr, num_pages, write, 0, p, NULL);
+
+ spin_lock(>t->guptasklock);
+ list_del(&guptask.list);
+ spin_unlock(>t->guptasklock);
- r = get_user_pages(userptr, num_pages, write, 0, pages, NULL);
if (r < 0)
goto release_pages;
@@ -526,6 +553,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
} while (pinned < ttm->num_pages);
+ return 0;
+
+release_pages:
+ release_pages(pages, pinned, 0);
+ return r;
+}
+
+/* prepare the sg table with the user pages */
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ unsigned nents;
+ int r;
+
+ int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+ enum dma_data_direction direction = write ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL);
@@ -544,9 +590,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
release_sg:
kfree(ttm->sg);
-
-release_pages:
- release_pages(ttm->pages, pinned, 0);
return r;
}
@@ -769,38 +812,61 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->userptr = addr;
gtt->usermm = current->mm;
gtt->userflags = flags;
+ spin_lock_init(>t->guptasklock);
+ INIT_LIST_HEAD(>t->guptasks);
+ atomic_set(>t->mmu_invalidations, 0);
+
return 0;
}
-bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm)
+struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
if (gtt == NULL)
- return false;
+ return NULL;
- return !!gtt->userptr;
+ return gtt->usermm;
}
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
- if (gtt == NULL)
- return false;
-
- if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
+ if (gtt == NULL || !gtt->userptr)
return false;
size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
+ spin_lock(>t->guptasklock);
+ list_for_each_entry(entry, >t->guptasks, list) {
+ if (entry->task == current) {
+ spin_unlock(>t->guptasklock);
+ return false;
+ }
+ }
+ spin_unlock(>t->guptasklock);
+
+ atomic_inc(>t->mmu_invalidations);
+
return true;
}
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+ int *last_invalidated)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ int prev_invalidated = *last_invalidated;
+
+ *last_invalidated = atomic_read(>t->mmu_invalidations);
+ return prev_invalidated != *last_invalidated;
+}
+
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1014,9 +1080,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_job *job;
+
uint32_t max_bytes;
unsigned num_loops, num_dw;
- struct amdgpu_ib *ib;
unsigned i;
int r;
@@ -1028,20 +1095,12 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
while (num_dw & 0x7)
num_dw++;
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib)
- return -ENOMEM;
-
- r = amdgpu_ib_get(ring, NULL, num_dw * 4, ib);
- if (r) {
- kfree(ib);
+ r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, &job);
+ if (r)
return r;
- }
-
- ib->length_dw = 0;
if (resv) {
- r = amdgpu_sync_resv(adev, &ib->sync, resv,
+ r = amdgpu_sync_resv(adev, &job->sync, resv,
AMDGPU_FENCE_OWNER_UNDEFINED);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
@@ -1052,31 +1111,25 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring,
for (i = 0; i < num_loops; i++) {
uint32_t cur_size_in_bytes = min(byte_count, max_bytes);
- amdgpu_emit_copy_buffer(adev, ib, src_offset, dst_offset,
- cur_size_in_bytes);
+ amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_offset,
+ dst_offset, cur_size_in_bytes);
src_offset += cur_size_in_bytes;
dst_offset += cur_size_in_bytes;
byte_count -= cur_size_in_bytes;
}
- amdgpu_vm_pad_ib(adev, ib);
- WARN_ON(ib->length_dw > num_dw);
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_vm_free_job,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- fence);
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+ WARN_ON(job->ibs[0].length_dw > num_dw);
+ r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r)
goto error_free;
- if (!amdgpu_enable_scheduler) {
- amdgpu_ib_free(adev, ib);
- kfree(ib);
- }
return 0;
+
error_free:
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ amdgpu_job_free(job);
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 53f987aeeacff..c1a5810444174 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -91,6 +91,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
{
+ struct amdgpu_ring *ring;
+ struct amd_sched_rq *rq;
unsigned long bo_size;
const char *fw_name;
const struct common_firmware_header *hdr;
@@ -191,6 +193,15 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
amdgpu_bo_unreserve(adev->uvd.vcpu_bo);
+ ring = &adev->uvd.ring;
+ rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+ r = amd_sched_entity_init(&ring->sched, &adev->uvd.entity,
+ rq, amdgpu_sched_jobs);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up UVD run queue.\n");
+ return r;
+ }
+
for (i = 0; i < AMDGPU_MAX_UVD_HANDLES; ++i) {
atomic_set(&adev->uvd.handles[i], 0);
adev->uvd.filp[i] = NULL;
@@ -210,6 +221,8 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
if (adev->uvd.vcpu_bo == NULL)
return 0;
+ amd_sched_entity_fini(&adev->uvd.ring.sched, &adev->uvd.entity);
+
r = amdgpu_bo_reserve(adev->uvd.vcpu_bo, false);
if (!r) {
amdgpu_bo_kunmap(adev->uvd.vcpu_bo);
@@ -241,7 +254,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
amdgpu_uvd_note_usage(adev);
- r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
+ r = amdgpu_uvd_get_destroy_msg(ring, handle, false, &fence);
if (r) {
DRM_ERROR("Error destroying UVD (%d)!\n", r);
continue;
@@ -295,7 +308,8 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
amdgpu_uvd_note_usage(adev);
- r = amdgpu_uvd_get_destroy_msg(ring, handle, &fence);
+ r = amdgpu_uvd_get_destroy_msg(ring, handle,
+ false, &fence);
if (r) {
DRM_ERROR("Error destroying UVD (%d)!\n", r);
continue;
@@ -525,13 +539,6 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
return -EINVAL;
}
- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- if (r < 0) {
- DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
- return r;
- }
-
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
@@ -616,7 +623,6 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
{
struct amdgpu_bo_va_mapping *mapping;
struct amdgpu_bo *bo;
- struct amdgpu_ib *ib;
uint32_t cmd, lo, hi;
uint64_t start, end;
uint64_t addr;
@@ -638,9 +644,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
start += addr;
- ib = &ctx->parser->ibs[ctx->ib_idx];
- ib->ptr[ctx->data0] = start & 0xFFFFFFFF;
- ib->ptr[ctx->data1] = start >> 32;
+ amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
+ lower_32_bits(start));
+ amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data1,
+ upper_32_bits(start));
cmd = amdgpu_get_ib_value(ctx->parser, ctx->ib_idx, ctx->idx) >> 1;
if (cmd < 0x4) {
@@ -702,7 +709,7 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
- struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx];
+ struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int i, r;
ctx->idx++;
@@ -748,7 +755,7 @@ static int amdgpu_uvd_cs_reg(struct amdgpu_uvd_cs_ctx *ctx,
static int amdgpu_uvd_cs_packets(struct amdgpu_uvd_cs_ctx *ctx,
int (*cb)(struct amdgpu_uvd_cs_ctx *ctx))
{
- struct amdgpu_ib *ib = &ctx->parser->ibs[ctx->ib_idx];
+ struct amdgpu_ib *ib = &ctx->parser->job->ibs[ctx->ib_idx];
int r;
for (ctx->idx = 0 ; ctx->idx < ib->length_dw; ) {
@@ -790,7 +797,7 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
[0x00000003] = 2048,
[0x00000004] = 0xFFFFFFFF,
};
- struct amdgpu_ib *ib = &parser->ibs[ib_idx];
+ struct amdgpu_ib *ib = &parser->job->ibs[ib_idx];
int r;
if (ib->length_dw % 16) {
@@ -823,22 +830,14 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
return 0;
}
-static int amdgpu_uvd_free_job(
- struct amdgpu_job *job)
-{
- amdgpu_ib_free(job->adev, job->ibs);
- kfree(job->ibs);
- return 0;
-}
-
-static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
- struct amdgpu_bo *bo,
- struct fence **fence)
+static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
+ bool direct, struct fence **fence)
{
struct ttm_validate_buffer tv;
struct ww_acquire_ctx ticket;
struct list_head head;
- struct amdgpu_ib *ib = NULL;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
struct fence *f = NULL;
struct amdgpu_device *adev = ring->adev;
uint64_t addr;
@@ -862,15 +861,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
if (r)
goto err;
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib) {
- r = -ENOMEM;
- goto err;
- }
- r = amdgpu_ib_get(ring, NULL, 64, ib);
+
+ r = amdgpu_job_alloc_with_ib(adev, 64, &job);
if (r)
- goto err1;
+ goto err;
+ ib = &job->ibs[0];
addr = amdgpu_bo_gpu_offset(bo);
ib->ptr[0] = PACKET0(mmUVD_GPCOM_VCPU_DATA0, 0);
ib->ptr[1] = addr;
@@ -882,12 +878,19 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
ib->ptr[i] = PACKET2(0);
ib->length_dw = 16;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_uvd_free_job,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
- if (r)
- goto err2;
+ if (direct) {
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+ job->fence = f;
+ if (r)
+ goto err_free;
+
+ amdgpu_job_free(job);
+ } else {
+ r = amdgpu_job_submit(job, ring, &adev->uvd.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+ if (r)
+ goto err_free;
+ }
ttm_eu_fence_buffer_objects(&ticket, &head, f);
@@ -895,16 +898,12 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring,
*fence = fence_get(f);
amdgpu_bo_unref(&bo);
fence_put(f);
- if (amdgpu_enable_scheduler)
- return 0;
- amdgpu_ib_free(ring->adev, ib);
- kfree(ib);
return 0;
-err2:
- amdgpu_ib_free(ring->adev, ib);
-err1:
- kfree(ib);
+
+err_free:
+ amdgpu_job_free(job);
+
err:
ttm_eu_backoff_reservation(&ticket, &head);
return r;
@@ -959,11 +958,11 @@ int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
amdgpu_bo_kunmap(bo);
amdgpu_bo_unreserve(bo);
- return amdgpu_uvd_send_msg(ring, bo, fence);
+ return amdgpu_uvd_send_msg(ring, bo, true, fence);
}
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct fence **fence)
+ bool direct, struct fence **fence)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_bo *bo;
@@ -1001,7 +1000,7 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
amdgpu_bo_kunmap(bo);
amdgpu_bo_unreserve(bo);
- return amdgpu_uvd_send_msg(ring, bo, fence);
+ return amdgpu_uvd_send_msg(ring, bo, direct, fence);
}
static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 1724c2c861516..9a3b449081a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -31,7 +31,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev);
int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct fence **fence);
+ bool direct, struct fence **fence);
void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
struct drm_file *filp);
int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index a745eeeb5d820..4bec0c108cea9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -74,6 +74,8 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
*/
int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
{
+ struct amdgpu_ring *ring;
+ struct amd_sched_rq *rq;
const char *fw_name;
const struct common_firmware_header *hdr;
unsigned ucode_version, version_major, version_minor, binary_id;
@@ -170,6 +172,16 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
return r;
}
+
+ ring = &adev->vce.ring[0];
+ rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_NORMAL];
+ r = amd_sched_entity_init(&ring->sched, &adev->vce.entity,
+ rq, amdgpu_sched_jobs);
+ if (r != 0) {
+ DRM_ERROR("Failed setting up VCE run queue.\n");
+ return r;
+ }
+
for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
atomic_set(&adev->vce.handles[i], 0);
adev->vce.filp[i] = NULL;
@@ -190,6 +202,8 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
if (adev->vce.vcpu_bo == NULL)
return 0;
+ amd_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+
amdgpu_bo_unref(&adev->vce.vcpu_bo);
amdgpu_ring_fini(&adev->vce.ring[0]);
@@ -337,7 +351,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
amdgpu_vce_note_usage(adev);
- r = amdgpu_vce_get_destroy_msg(ring, handle, NULL);
+ r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
if (r)
DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
@@ -346,14 +360,6 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
}
}
-static int amdgpu_vce_free_job(
- struct amdgpu_job *job)
-{
- amdgpu_ib_free(job->adev, job->ibs);
- kfree(job->ibs);
- return 0;
-}
-
/**
* amdgpu_vce_get_create_msg - generate a VCE create msg
*
@@ -368,21 +374,17 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence)
{
const unsigned ib_size_dw = 1024;
- struct amdgpu_ib *ib = NULL;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
struct fence *f = NULL;
- struct amdgpu_device *adev = ring->adev;
uint64_t dummy;
int i, r;
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib)
- return -ENOMEM;
- r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
- if (r) {
- DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
- kfree(ib);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ if (r)
return r;
- }
+
+ ib = &job->ibs[0];
dummy = ib->gpu_addr + 1024;
@@ -423,20 +425,19 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_vce_free_job,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+ job->fence = f;
if (r)
goto err;
+
+ amdgpu_job_free(job);
if (fence)
*fence = fence_get(f);
fence_put(f);
- if (amdgpu_enable_scheduler)
- return 0;
+ return 0;
+
err:
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ amdgpu_job_free(job);
return r;
}
@@ -451,26 +452,20 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
* Close up a stream for HW test or if userspace failed to do so
*/
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct fence **fence)
+ bool direct, struct fence **fence)
{
const unsigned ib_size_dw = 1024;
- struct amdgpu_ib *ib = NULL;
+ struct amdgpu_job *job;
+ struct amdgpu_ib *ib;
struct fence *f = NULL;
- struct amdgpu_device *adev = ring->adev;
uint64_t dummy;
int i, r;
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib)
- return -ENOMEM;
-
- r = amdgpu_ib_get(ring, NULL, ib_size_dw * 4, ib);
- if (r) {
- kfree(ib);
- DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+ r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+ if (r)
return r;
- }
+ ib = &job->ibs[0];
dummy = ib->gpu_addr + 1024;
/* stitch together an VCE destroy msg */
@@ -490,20 +485,28 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_vce_free_job,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
- if (r)
- goto err;
+
+ if (direct) {
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+ job->fence = f;
+ if (r)
+ goto err;
+
+ amdgpu_job_free(job);
+ } else {
+ r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+ AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+ if (r)
+ goto err;
+ }
+
if (fence)
*fence = fence_get(f);
fence_put(f);
- if (amdgpu_enable_scheduler)
- return 0;
+ return 0;
+
err:
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ amdgpu_job_free(job);
return r;
}
@@ -521,7 +524,6 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
int lo, int hi, unsigned size, uint32_t index)
{
struct amdgpu_bo_va_mapping *mapping;
- struct amdgpu_ib *ib = &p->ibs[ib_idx];
struct amdgpu_bo *bo;
uint64_t addr;
@@ -550,8 +552,8 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
addr += amdgpu_bo_gpu_offset(bo);
addr -= ((uint64_t)size) * ((uint64_t)index);
- ib->ptr[lo] = addr & 0xFFFFFFFF;
- ib->ptr[hi] = addr >> 32;
+ amdgpu_set_ib_value(p, ib_idx, lo, lower_32_bits(addr));
+ amdgpu_set_ib_value(p, ib_idx, hi, upper_32_bits(addr));
return 0;
}
@@ -606,7 +608,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
*/
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
{
- struct amdgpu_ib *ib = &p->ibs[ib_idx];
+ struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
unsigned fb_idx = 0, bs_idx = 0;
int session_idx = -1;
bool destroyed = false;
@@ -742,30 +744,6 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
return r;
}
-/**
- * amdgpu_vce_ring_emit_semaphore - emit a semaphore command
- *
- * @ring: engine to use
- * @semaphore: address of semaphore
- * @emit_wait: true=emit wait, false=emit signal
- *
- */
-bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- uint64_t addr = semaphore->gpu_addr;
-
- amdgpu_ring_write(ring, VCE_CMD_SEMAPHORE);
- amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
- amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
- amdgpu_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
- if (!emit_wait)
- amdgpu_ring_write(ring, VCE_CMD_END);
-
- return true;
-}
-
/**
* amdgpu_vce_ring_emit_ib - execute indirect buffer
*
@@ -814,14 +792,14 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
unsigned i;
int r;
- r = amdgpu_ring_lock(ring, 16);
+ r = amdgpu_ring_alloc(ring, 16);
if (r) {
DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n",
ring->idx, r);
return r;
}
amdgpu_ring_write(ring, VCE_CMD_END);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
if (amdgpu_ring_get_rptr(ring) != rptr)
@@ -862,7 +840,7 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
goto error;
}
- r = amdgpu_vce_get_destroy_msg(ring, 1, &fence);
+ r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
goto error;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index ba2da8ee59067..ef99d23701825 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -31,12 +31,9 @@ int amdgpu_vce_resume(struct amdgpu_device *adev);
int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
struct fence **fence);
int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
- struct fence **fence);
+ bool direct, struct fence **fence);
void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
-bool amdgpu_vce_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait);
void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
unsigned flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 9599f7559b3dc..b6c011b836412 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -50,12 +50,15 @@
* SI supports 16.
*/
+/* Special value that no flush is necessary */
+#define AMDGPU_VM_NO_FLUSH (~0ll)
+
/**
* amdgpu_vm_num_pde - return the number of page directory entries
*
* @adev: amdgpu_device pointer
*
- * Calculate the number of page directory entries (cayman+).
+ * Calculate the number of page directory entries.
*/
static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
{
@@ -67,7 +70,7 @@ static unsigned amdgpu_vm_num_pdes(struct amdgpu_device *adev)
*
* @adev: amdgpu_device pointer
*
- * Calculate the size of the page directory in bytes (cayman+).
+ * Calculate the size of the page directory in bytes.
*/
static unsigned amdgpu_vm_directory_size(struct amdgpu_device *adev)
{
@@ -89,11 +92,10 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
struct amdgpu_bo_list_entry *entry)
{
entry->robj = vm->page_directory;
- entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
- entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
entry->priority = 0;
entry->tv.bo = &vm->page_directory->tbo;
entry->tv.shared = true;
+ entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
@@ -154,133 +156,154 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
* @vm: vm to allocate id for
* @ring: ring we want to submit job to
* @sync: sync object where we add dependencies
+ * @fence: fence protecting ID from reuse
*
* Allocate an id for the vm, adding fences to the sync obj as necessary.
- *
- * Global mutex must be locked!
*/
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync)
+ struct amdgpu_sync *sync, struct fence *fence,
+ unsigned *vm_id, uint64_t *vm_pd_addr)
{
- struct fence *best[AMDGPU_MAX_RINGS] = {};
- struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
+ uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+ struct fence *updates = sync->last_vm_update;
+ int r;
- unsigned choices[2] = {};
- unsigned i;
+ mutex_lock(&adev->vm_manager.lock);
/* check if the id is still valid */
- if (vm_id->id) {
- unsigned id = vm_id->id;
+ if (id->mgr_id) {
+ struct fence *flushed = id->flushed_updates;
+ bool is_later;
long owner;
- owner = atomic_long_read(&adev->vm_manager.ids[id].owner);
- if (owner == (long)vm) {
- trace_amdgpu_vm_grab_id(vm_id->id, ring->idx);
+ if (!flushed)
+ is_later = true;
+ else if (!updates)
+ is_later = false;
+ else
+ is_later = fence_is_later(updates, flushed);
+
+ owner = atomic_long_read(&id->mgr_id->owner);
+ if (!is_later && owner == (long)id &&
+ pd_addr == id->pd_gpu_addr) {
+
+ r = amdgpu_sync_fence(ring->adev, sync,
+ id->mgr_id->active);
+ if (r) {
+ mutex_unlock(&adev->vm_manager.lock);
+ return r;
+ }
+
+ fence_put(id->mgr_id->active);
+ id->mgr_id->active = fence_get(fence);
+
+ list_move_tail(&id->mgr_id->list,
+ &adev->vm_manager.ids_lru);
+
+ *vm_id = id->mgr_id - adev->vm_manager.ids;
+ *vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
+ *vm_pd_addr);
+
+ mutex_unlock(&adev->vm_manager.lock);
return 0;
}
}
- /* we definately need to flush */
- vm_id->pd_gpu_addr = ~0ll;
+ id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+ struct amdgpu_vm_manager_id,
+ list);
- /* skip over VMID 0, since it is the system VM */
- for (i = 1; i < adev->vm_manager.nvm; ++i) {
- struct fence *fence = adev->vm_manager.ids[i].active;
- struct amdgpu_ring *fring;
-
- if (fence == NULL) {
- /* found a free one */
- vm_id->id = i;
- trace_amdgpu_vm_grab_id(i, ring->idx);
- return 0;
- }
+ r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+ if (!r) {
+ fence_put(id->mgr_id->active);
+ id->mgr_id->active = fence_get(fence);
- fring = amdgpu_ring_from_fence(fence);
- if (best[fring->idx] == NULL ||
- fence_is_later(best[fring->idx], fence)) {
- best[fring->idx] = fence;
- choices[fring == ring ? 0 : 1] = i;
- }
- }
+ fence_put(id->flushed_updates);
+ id->flushed_updates = fence_get(updates);
- for (i = 0; i < 2; ++i) {
- if (choices[i]) {
- struct fence *fence;
+ id->pd_gpu_addr = pd_addr;
- fence = adev->vm_manager.ids[choices[i]].active;
- vm_id->id = choices[i];
+ list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
+ atomic_long_set(&id->mgr_id->owner, (long)id);
- trace_amdgpu_vm_grab_id(choices[i], ring->idx);
- return amdgpu_sync_fence(ring->adev, sync, fence);
- }
+ *vm_id = id->mgr_id - adev->vm_manager.ids;
+ *vm_pd_addr = pd_addr;
+ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
}
- /* should never happen */
- BUG();
- return -EINVAL;
+ mutex_unlock(&adev->vm_manager.lock);
+ return r;
}
/**
* amdgpu_vm_flush - hardware flush the vm
*
* @ring: ring to use for flush
- * @vm: vm we want to flush
- * @updates: last vm update that we waited for
+ * @vm_id: vmid number to use
+ * @pd_addr: address of the page directory
*
- * Flush the vm (cayman+).
- *
- * Global and local mutex must be locked!
+ * Emit a VM flush when it is necessary.
*/
void amdgpu_vm_flush(struct amdgpu_ring *ring,
- struct amdgpu_vm *vm,
- struct fence *updates)
+ unsigned vm_id, uint64_t pd_addr,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
{
- uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
- struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
- struct fence *flushed_updates = vm_id->flushed_updates;
- bool is_later;
-
- if (!flushed_updates)
- is_later = true;
- else if (!updates)
- is_later = false;
- else
- is_later = fence_is_later(updates, flushed_updates);
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+ bool gds_switch_needed = ring->funcs->emit_gds_switch && (
+ mgr_id->gds_base != gds_base ||
+ mgr_id->gds_size != gds_size ||
+ mgr_id->gws_base != gws_base ||
+ mgr_id->gws_size != gws_size ||
+ mgr_id->oa_base != oa_base ||
+ mgr_id->oa_size != oa_size);
+
+ if (ring->funcs->emit_pipeline_sync && (
+ pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
+ amdgpu_ring_emit_pipeline_sync(ring);
+
+ if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+ trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
+ amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+ }
- if (pd_addr != vm_id->pd_gpu_addr || is_later) {
- trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
- if (is_later) {
- vm_id->flushed_updates = fence_get(updates);
- fence_put(flushed_updates);
- }
- vm_id->pd_gpu_addr = pd_addr;
- amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
+ if (gds_switch_needed) {
+ mgr_id->gds_base = gds_base;
+ mgr_id->gds_size = gds_size;
+ mgr_id->gws_base = gws_base;
+ mgr_id->gws_size = gws_size;
+ mgr_id->oa_base = oa_base;
+ mgr_id->oa_size = oa_size;
+ amdgpu_ring_emit_gds_switch(ring, vm_id,
+ gds_base, gds_size,
+ gws_base, gws_size,
+ oa_base, oa_size);
}
}
/**
- * amdgpu_vm_fence - remember fence for vm
+ * amdgpu_vm_reset_id - reset VMID to zero
*
- * @adev: amdgpu_device pointer
- * @vm: vm we want to fence
- * @fence: fence to remember
- *
- * Fence the vm (cayman+).
- * Set the fence used to protect page table and id.
+ * @adev: amdgpu device structure
+ * @vm_id: vmid number to use
*
- * Global and local mutex must be locked!
+ * Reset saved GDW, GWS and OA to force switch on next flush.
*/
-void amdgpu_vm_fence(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct fence *fence)
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
{
- struct amdgpu_ring *ring = amdgpu_ring_from_fence(fence);
- unsigned vm_id = vm->ids[ring->idx].id;
-
- fence_put(adev->vm_manager.ids[vm_id].active);
- adev->vm_manager.ids[vm_id].active = fence_get(fence);
- atomic_long_set(&adev->vm_manager.ids[vm_id].owner, (long)vm);
+ struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+
+ mgr_id->gds_base = 0;
+ mgr_id->gds_size = 0;
+ mgr_id->gws_base = 0;
+ mgr_id->gws_size = 0;
+ mgr_id->oa_base = 0;
+ mgr_id->oa_size = 0;
}
/**
@@ -289,7 +312,7 @@ void amdgpu_vm_fence(struct amdgpu_device *adev,
* @vm: requested vm
* @bo: requested buffer object
*
- * Find @bo inside the requested vm (cayman+).
+ * Find @bo inside the requested vm.
* Search inside the @bos vm list for the requested vm
* Returns the found bo_va or NULL if none is found
*
@@ -312,32 +335,40 @@ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
* amdgpu_vm_update_pages - helper to call the right asic function
*
* @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: GTT hw access flags
* @ib: indirect buffer to fill with commands
* @pe: addr of the page entry
* @addr: dst addr to write into pe
* @count: number of page entries to update
* @incr: increase next addr by incr bytes
* @flags: hw access flags
- * @gtt_flags: GTT hw access flags
*
* Traces the parameters and calls the right asic functions
* to setup the page table using the DMA.
*/
static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
+ struct amdgpu_gart *gtt,
+ uint32_t gtt_flags,
struct amdgpu_ib *ib,
uint64_t pe, uint64_t addr,
unsigned count, uint32_t incr,
- uint32_t flags, uint32_t gtt_flags)
+ uint32_t flags)
{
trace_amdgpu_vm_set_page(pe, addr, count, incr, flags);
- if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) {
- uint64_t src = adev->gart.table_addr + (addr >> 12) * 8;
+ if ((gtt == &adev->gart) && (flags == gtt_flags)) {
+ uint64_t src = gtt->table_addr + (addr >> 12) * 8;
amdgpu_vm_copy_pte(adev, ib, pe, src, count);
- } else if ((flags & AMDGPU_PTE_SYSTEM) || (count < 3)) {
- amdgpu_vm_write_pte(adev, ib, pe, addr,
- count, incr, flags);
+ } else if (gtt) {
+ dma_addr_t *pages_addr = gtt->pages_addr;
+ amdgpu_vm_write_pte(adev, ib, pages_addr, pe, addr,
+ count, incr, flags);
+
+ } else if (count < 3) {
+ amdgpu_vm_write_pte(adev, ib, NULL, pe, addr,
+ count, incr, flags);
} else {
amdgpu_vm_set_pte_pde(adev, ib, pe, addr,
@@ -345,15 +376,6 @@ static void amdgpu_vm_update_pages(struct amdgpu_device *adev,
}
}
-int amdgpu_vm_free_job(struct amdgpu_job *job)
-{
- int i;
- for (i = 0; i < job->num_ibs; i++)
- amdgpu_ib_free(job->adev, &job->ibs[i]);
- kfree(job->ibs);
- return 0;
-}
-
/**
* amdgpu_vm_clear_bo - initially clear the page dir/table
*
@@ -363,15 +385,18 @@ int amdgpu_vm_free_job(struct amdgpu_job *job)
* need to reserve bo first before calling it.
*/
static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
+ struct amdgpu_vm *vm,
struct amdgpu_bo *bo)
{
- struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+ struct amdgpu_ring *ring;
struct fence *fence = NULL;
- struct amdgpu_ib *ib;
+ struct amdgpu_job *job;
unsigned entries;
uint64_t addr;
int r;
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
r = reservation_object_reserve_shared(bo->tbo.resv);
if (r)
return r;
@@ -383,56 +408,57 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
addr = amdgpu_bo_gpu_offset(bo);
entries = amdgpu_bo_size(bo) / 8;
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib)
+ r = amdgpu_job_alloc_with_ib(adev, 64, &job);
+ if (r)
goto error;
- r = amdgpu_ib_get(ring, NULL, entries * 2 + 64, ib);
+ amdgpu_vm_update_pages(adev, NULL, 0, &job->ibs[0], addr, 0, entries,
+ 0, 0);
+ amdgpu_ring_pad_ib(ring, &job->ibs[0]);
+
+ WARN_ON(job->ibs[0].length_dw > 64);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &fence);
if (r)
goto error_free;
- ib->length_dw = 0;
-
- amdgpu_vm_update_pages(adev, ib, addr, 0, entries, 0, 0, 0);
- amdgpu_vm_pad_ib(adev, ib);
- WARN_ON(ib->length_dw > 64);
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_vm_free_job,
- AMDGPU_FENCE_OWNER_VM,
- &fence);
- if (!r)
- amdgpu_bo_fence(bo, fence, true);
+ amdgpu_bo_fence(bo, fence, true);
fence_put(fence);
- if (amdgpu_enable_scheduler)
- return 0;
+ return 0;
error_free:
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ amdgpu_job_free(job);
error:
return r;
}
/**
- * amdgpu_vm_map_gart - get the physical address of a gart page
+ * amdgpu_vm_map_gart - Resolve gart mapping of addr
*
- * @adev: amdgpu_device pointer
+ * @pages_addr: optional DMA address to use for lookup
* @addr: the unmapped addr
*
* Look up the physical address of the page that the pte resolves
- * to (cayman+).
- * Returns the physical address of the page.
+ * to and return the pointer for the page table entry.
*/
-uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr)
+uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
{
uint64_t result;
- /* page table offset */
- result = adev->gart.pages_addr[addr >> PAGE_SHIFT];
+ if (pages_addr) {
+ /* page table offset */
+ result = pages_addr[addr >> PAGE_SHIFT];
+
+ /* in case cpu page size != gpu page size*/
+ result |= addr & (~PAGE_MASK);
- /* in case cpu page size != gpu page size*/
- result |= addr & (~PAGE_MASK);
+ } else {
+ /* No mapping required */
+ result = addr;
+ }
+
+ result &= 0xFFFFFFFFFFFFF000ULL;
return result;
}
@@ -446,45 +472,37 @@ uint64_t amdgpu_vm_map_gart(struct amdgpu_device *adev, uint64_t addr)
* @end: end of GPU address range
*
* Allocates new page tables if necessary
- * and updates the page directory (cayman+).
+ * and updates the page directory.
* Returns 0 for success, error for failure.
- *
- * Global and local mutex must be locked!
*/
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm)
{
- struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+ struct amdgpu_ring *ring;
struct amdgpu_bo *pd = vm->page_directory;
uint64_t pd_addr = amdgpu_bo_gpu_offset(pd);
uint32_t incr = AMDGPU_VM_PTE_COUNT * 8;
uint64_t last_pde = ~0, last_pt = ~0;
unsigned count = 0, pt_idx, ndw;
+ struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct fence *fence = NULL;
int r;
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+
/* padding, etc. */
ndw = 64;
/* assume the worst case */
ndw += vm->max_pde_used * 6;
- /* update too big for an IB */
- if (ndw > 0xfffff)
- return -ENOMEM;
-
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib)
- return -ENOMEM;
-
- r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
- if (r) {
- kfree(ib);
+ r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+ if (r)
return r;
- }
- ib->length_dw = 0;
+
+ ib = &job->ibs[0];
/* walk over the address space and update the page directory */
for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
@@ -504,9 +522,10 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
((last_pt + incr * count) != pt)) {
if (count) {
- amdgpu_vm_update_pages(adev, ib, last_pde,
- last_pt, count, incr,
- AMDGPU_PTE_VALID, 0);
+ amdgpu_vm_update_pages(adev, NULL, 0, ib,
+ last_pde, last_pt,
+ count, incr,
+ AMDGPU_PTE_VALID);
}
count = 1;
@@ -518,17 +537,16 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
}
if (count)
- amdgpu_vm_update_pages(adev, ib, last_pde, last_pt, count,
- incr, AMDGPU_PTE_VALID, 0);
+ amdgpu_vm_update_pages(adev, NULL, 0, ib, last_pde, last_pt,
+ count, incr, AMDGPU_PTE_VALID);
if (ib->length_dw != 0) {
- amdgpu_vm_pad_ib(adev, ib);
- amdgpu_sync_resv(adev, &ib->sync, pd->tbo.resv, AMDGPU_FENCE_OWNER_VM);
+ amdgpu_ring_pad_ib(ring, ib);
+ amdgpu_sync_resv(adev, &job->sync, pd->tbo.resv,
+ AMDGPU_FENCE_OWNER_VM);
WARN_ON(ib->length_dw > ndw);
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_vm_free_job,
- AMDGPU_FENCE_OWNER_VM,
- &fence);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &fence);
if (r)
goto error_free;
@@ -536,18 +554,15 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
fence_put(vm->page_directory_fence);
vm->page_directory_fence = fence_get(fence);
fence_put(fence);
- }
- if (!amdgpu_enable_scheduler || ib->length_dw == 0) {
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ } else {
+ amdgpu_job_free(job);
}
return 0;
error_free:
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ amdgpu_job_free(job);
return r;
}
@@ -555,20 +570,20 @@ int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
* amdgpu_vm_frag_ptes - add fragment information to PTEs
*
* @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: GTT hw mapping flags
* @ib: IB for the update
* @pe_start: first PTE to handle
* @pe_end: last PTE to handle
* @addr: addr those PTEs should point to
* @flags: hw mapping flags
- * @gtt_flags: GTT hw mapping flags
- *
- * Global and local mutex must be locked!
*/
static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
+ struct amdgpu_gart *gtt,
+ uint32_t gtt_flags,
struct amdgpu_ib *ib,
uint64_t pe_start, uint64_t pe_end,
- uint64_t addr, uint32_t flags,
- uint32_t gtt_flags)
+ uint64_t addr, uint32_t flags)
{
/**
* The MC L1 TLB supports variable sized pages, based on a fragment
@@ -598,36 +613,39 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
unsigned count;
+ /* Abort early if there isn't anything to do */
+ if (pe_start == pe_end)
+ return;
+
/* system pages are non continuously */
- if ((flags & AMDGPU_PTE_SYSTEM) || !(flags & AMDGPU_PTE_VALID) ||
- (frag_start >= frag_end)) {
+ if (gtt || !(flags & AMDGPU_PTE_VALID) || (frag_start >= frag_end)) {
count = (pe_end - pe_start) / 8;
- amdgpu_vm_update_pages(adev, ib, pe_start, addr, count,
- AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags);
+ amdgpu_vm_update_pages(adev, gtt, gtt_flags, ib, pe_start,
+ addr, count, AMDGPU_GPU_PAGE_SIZE,
+ flags);
return;
}
/* handle the 4K area at the beginning */
if (pe_start != frag_start) {
count = (frag_start - pe_start) / 8;
- amdgpu_vm_update_pages(adev, ib, pe_start, addr, count,
- AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags);
+ amdgpu_vm_update_pages(adev, NULL, 0, ib, pe_start, addr,
+ count, AMDGPU_GPU_PAGE_SIZE, flags);
addr += AMDGPU_GPU_PAGE_SIZE * count;
}
/* handle the area in the middle */
count = (frag_end - frag_start) / 8;
- amdgpu_vm_update_pages(adev, ib, frag_start, addr, count,
- AMDGPU_GPU_PAGE_SIZE, flags | frag_flags,
- gtt_flags);
+ amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_start, addr, count,
+ AMDGPU_GPU_PAGE_SIZE, flags | frag_flags);
/* handle the 4K area at the end */
if (frag_end != pe_end) {
addr += AMDGPU_GPU_PAGE_SIZE * count;
count = (pe_end - frag_end) / 8;
- amdgpu_vm_update_pages(adev, ib, frag_end, addr, count,
- AMDGPU_GPU_PAGE_SIZE, flags, gtt_flags);
+ amdgpu_vm_update_pages(adev, NULL, 0, ib, frag_end, addr,
+ count, AMDGPU_GPU_PAGE_SIZE, flags);
}
}
@@ -635,122 +653,105 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
* amdgpu_vm_update_ptes - make sure that page tables are valid
*
* @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: GTT hw mapping flags
* @vm: requested vm
* @start: start of GPU address range
* @end: end of GPU address range
* @dst: destination address to map to
* @flags: mapping flags
*
- * Update the page tables in the range @start - @end (cayman+).
- *
- * Global and local mutex must be locked!
+ * Update the page tables in the range @start - @end.
*/
-static int amdgpu_vm_update_ptes(struct amdgpu_device *adev,
- struct amdgpu_vm *vm,
- struct amdgpu_ib *ib,
- uint64_t start, uint64_t end,
- uint64_t dst, uint32_t flags,
- uint32_t gtt_flags)
+static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
+ struct amdgpu_gart *gtt,
+ uint32_t gtt_flags,
+ struct amdgpu_vm *vm,
+ struct amdgpu_ib *ib,
+ uint64_t start, uint64_t end,
+ uint64_t dst, uint32_t flags)
{
- uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
- uint64_t last_pte = ~0, last_dst = ~0;
- void *owner = AMDGPU_FENCE_OWNER_VM;
- unsigned count = 0;
- uint64_t addr;
+ const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
- /* sync to everything on unmapping */
- if (!(flags & AMDGPU_PTE_VALID))
- owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+ uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
+ uint64_t addr;
/* walk over the address space and update the page tables */
for (addr = start; addr < end; ) {
uint64_t pt_idx = addr >> amdgpu_vm_block_size;
struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
unsigned nptes;
- uint64_t pte;
- int r;
-
- amdgpu_sync_resv(adev, &ib->sync, pt->tbo.resv, owner);
- r = reservation_object_reserve_shared(pt->tbo.resv);
- if (r)
- return r;
+ uint64_t pe_start;
if ((addr & ~mask) == (end & ~mask))
nptes = end - addr;
else
nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
- pte = amdgpu_bo_gpu_offset(pt);
- pte += (addr & mask) * 8;
+ pe_start = amdgpu_bo_gpu_offset(pt);
+ pe_start += (addr & mask) * 8;
- if ((last_pte + 8 * count) != pte) {
+ if (last_pe_end != pe_start) {
- if (count) {
- amdgpu_vm_frag_ptes(adev, ib, last_pte,
- last_pte + 8 * count,
- last_dst, flags,
- gtt_flags);
- }
+ amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+ last_pe_start, last_pe_end,
+ last_dst, flags);
- count = nptes;
- last_pte = pte;
+ last_pe_start = pe_start;
+ last_pe_end = pe_start + 8 * nptes;
last_dst = dst;
} else {
- count += nptes;
+ last_pe_end += 8 * nptes;
}
addr += nptes;
dst += nptes * AMDGPU_GPU_PAGE_SIZE;
}
- if (count) {
- amdgpu_vm_frag_ptes(adev, ib, last_pte,
- last_pte + 8 * count,
- last_dst, flags, gtt_flags);
- }
-
- return 0;
+ amdgpu_vm_frag_ptes(adev, gtt, gtt_flags, ib,
+ last_pe_start, last_pe_end,
+ last_dst, flags);
}
/**
* amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
*
* @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @gtt_flags: flags as they are used for GTT
* @vm: requested vm
- * @mapping: mapped range and flags to use for the update
+ * @start: start of mapped range
+ * @last: last mapped entry
+ * @flags: flags for the entries
* @addr: addr to set the area to
- * @gtt_flags: flags as they are used for GTT
* @fence: optional resulting fence
*
- * Fill in the page table entries for @mapping.
+ * Fill in the page table entries between @start and @last.
* Returns 0 for success, -EINVAL for failure.
- *
- * Object have to be reserved and mutex must be locked!
*/
static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+ struct amdgpu_gart *gtt,
+ uint32_t gtt_flags,
struct amdgpu_vm *vm,
- struct amdgpu_bo_va_mapping *mapping,
- uint64_t addr, uint32_t gtt_flags,
+ uint64_t start, uint64_t last,
+ uint32_t flags, uint64_t addr,
struct fence **fence)
{
- struct amdgpu_ring *ring = adev->vm_manager.vm_pte_funcs_ring;
+ struct amdgpu_ring *ring;
+ void *owner = AMDGPU_FENCE_OWNER_VM;
unsigned nptes, ncmds, ndw;
- uint32_t flags = gtt_flags;
+ struct amdgpu_job *job;
struct amdgpu_ib *ib;
struct fence *f = NULL;
int r;
- /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
- * but in case of something, we filter the flags in first place
- */
- if (!(mapping->flags & AMDGPU_PTE_READABLE))
- flags &= ~AMDGPU_PTE_READABLE;
- if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
- flags &= ~AMDGPU_PTE_WRITEABLE;
+ ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
- trace_amdgpu_vm_bo_update(mapping);
+ /* sync to everything on unmapping */
+ if (!(flags & AMDGPU_PTE_VALID))
+ owner = AMDGPU_FENCE_OWNER_UNDEFINED;
- nptes = mapping->it.last - mapping->it.start + 1;
+ nptes = last - start + 1;
/*
* reserve space for one command every (1 << BLOCK_SIZE)
@@ -761,11 +762,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
/* padding, etc. */
ndw = 64;
- if ((flags & AMDGPU_PTE_SYSTEM) && (flags == gtt_flags)) {
+ if ((gtt == &adev->gart) && (flags == gtt_flags)) {
/* only copy commands needed */
ndw += ncmds * 7;
- } else if (flags & AMDGPU_PTE_SYSTEM) {
+ } else if (gtt) {
/* header for write data commands */
ndw += ncmds * 4;
@@ -780,38 +781,28 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
ndw += 2 * 10;
}
- /* update too big for an IB */
- if (ndw > 0xfffff)
- return -ENOMEM;
-
- ib = kzalloc(sizeof(struct amdgpu_ib), GFP_KERNEL);
- if (!ib)
- return -ENOMEM;
-
- r = amdgpu_ib_get(ring, NULL, ndw * 4, ib);
- if (r) {
- kfree(ib);
+ r = amdgpu_job_alloc_with_ib(adev, ndw * 4, &job);
+ if (r)
return r;
- }
- ib->length_dw = 0;
+ ib = &job->ibs[0];
- r = amdgpu_vm_update_ptes(adev, vm, ib, mapping->it.start,
- mapping->it.last + 1, addr + mapping->offset,
- flags, gtt_flags);
+ r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
+ owner);
+ if (r)
+ goto error_free;
- if (r) {
- amdgpu_ib_free(adev, ib);
- kfree(ib);
- return r;
- }
+ r = reservation_object_reserve_shared(vm->page_directory->tbo.resv);
+ if (r)
+ goto error_free;
+
+ amdgpu_vm_update_ptes(adev, gtt, gtt_flags, vm, ib, start, last + 1,
+ addr, flags);
- amdgpu_vm_pad_ib(adev, ib);
+ amdgpu_ring_pad_ib(ring, ib);
WARN_ON(ib->length_dw > ndw);
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, ib, 1,
- &amdgpu_vm_free_job,
- AMDGPU_FENCE_OWNER_VM,
- &f);
+ r = amdgpu_job_submit(job, ring, &vm->entity,
+ AMDGPU_FENCE_OWNER_VM, &f);
if (r)
goto error_free;
@@ -821,18 +812,75 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
*fence = fence_get(f);
}
fence_put(f);
- if (!amdgpu_enable_scheduler) {
- amdgpu_ib_free(adev, ib);
- kfree(ib);
- }
return 0;
error_free:
- amdgpu_ib_free(adev, ib);
- kfree(ib);
+ amdgpu_job_free(job);
return r;
}
+/**
+ * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
+ *
+ * @adev: amdgpu_device pointer
+ * @gtt: GART instance to use for mapping
+ * @vm: requested vm
+ * @mapping: mapped range and flags to use for the update
+ * @addr: addr to set the area to
+ * @gtt_flags: flags as they are used for GTT
+ * @fence: optional resulting fence
+ *
+ * Split the mapping into smaller chunks so that each update fits
+ * into a SDMA IB.
+ * Returns 0 for success, -EINVAL for failure.
+ */
+static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+ struct amdgpu_gart *gtt,
+ uint32_t gtt_flags,
+ struct amdgpu_vm *vm,
+ struct amdgpu_bo_va_mapping *mapping,
+ uint64_t addr, struct fence **fence)
+{
+ const uint64_t max_size = 64ULL * 1024ULL * 1024ULL / AMDGPU_GPU_PAGE_SIZE;
+
+ uint64_t start = mapping->it.start;
+ uint32_t flags = gtt_flags;
+ int r;
+
+ /* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
+ * but in case of something, we filter the flags in first place
+ */
+ if (!(mapping->flags & AMDGPU_PTE_READABLE))
+ flags &= ~AMDGPU_PTE_READABLE;
+ if (!(mapping->flags & AMDGPU_PTE_WRITEABLE))
+ flags &= ~AMDGPU_PTE_WRITEABLE;
+
+ trace_amdgpu_vm_bo_update(mapping);
+
+ addr += mapping->offset;
+
+ if (!gtt || ((gtt == &adev->gart) && (flags == gtt_flags)))
+ return amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+ start, mapping->it.last,
+ flags, addr, fence);
+
+ while (start != mapping->it.last + 1) {
+ uint64_t last;
+
+ last = min((uint64_t)mapping->it.last, start + max_size - 1);
+ r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
+ start, last, flags, addr,
+ fence);
+ if (r)
+ return r;
+
+ start = last + 1;
+ addr += max_size * AMDGPU_GPU_PAGE_SIZE;
+ }
+
+ return 0;
+}
+
/**
* amdgpu_vm_bo_update - update all BO mappings in the vm page table
*
@@ -851,14 +899,25 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
{
struct amdgpu_vm *vm = bo_va->vm;
struct amdgpu_bo_va_mapping *mapping;
+ struct amdgpu_gart *gtt = NULL;
uint32_t flags;
uint64_t addr;
int r;
if (mem) {
addr = (u64)mem->start << PAGE_SHIFT;
- if (mem->mem_type != TTM_PL_TT)
+ switch (mem->mem_type) {
+ case TTM_PL_TT:
+ gtt = &bo_va->bo->adev->gart;
+ break;
+
+ case TTM_PL_VRAM:
addr += adev->vm_manager.vram_base_offset;
+ break;
+
+ default:
+ break;
+ }
} else {
addr = 0;
}
@@ -871,8 +930,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
spin_unlock(&vm->status_lock);
list_for_each_entry(mapping, &bo_va->invalids, list) {
- r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, addr,
- flags, &bo_va->last_pt_update);
+ r = amdgpu_vm_bo_split_mapping(adev, gtt, flags, vm, mapping, addr,
+ &bo_va->last_pt_update);
if (r)
return r;
}
@@ -912,21 +971,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
int r;
- spin_lock(&vm->freed_lock);
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- spin_unlock(&vm->freed_lock);
- r = amdgpu_vm_bo_update_mapping(adev, vm, mapping, 0, 0, NULL);
+
+ r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
+ 0, NULL);
kfree(mapping);
if (r)
return r;
- spin_lock(&vm->freed_lock);
}
- spin_unlock(&vm->freed_lock);
-
return 0;
}
@@ -953,9 +1009,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
bo_va = list_first_entry(&vm->invalidated,
struct amdgpu_bo_va, vm_status);
spin_unlock(&vm->status_lock);
- mutex_lock(&bo_va->mutex);
+
r = amdgpu_vm_bo_update(adev, bo_va, NULL);
- mutex_unlock(&bo_va->mutex);
if (r)
return r;
@@ -976,7 +1031,7 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
* @vm: requested vm
* @bo: amdgpu buffer object
*
- * Add @bo into the requested vm (cayman+).
+ * Add @bo into the requested vm.
* Add @bo to the list of bos associated with the vm
* Returns newly added bo_va or NULL for failure
*
@@ -999,7 +1054,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
- mutex_init(&bo_va->mutex);
+
list_add_tail(&bo_va->bo_list, &bo->va);
return bo_va;
@@ -1051,9 +1106,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
- spin_lock(&vm->it_lock);
it = interval_tree_iter_first(&vm->va, saddr, eaddr);
- spin_unlock(&vm->it_lock);
if (it) {
struct amdgpu_bo_va_mapping *tmp;
tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1077,13 +1130,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mapping->offset = offset;
mapping->flags = flags;
- mutex_lock(&bo_va->mutex);
list_add(&mapping->list, &bo_va->invalids);
- mutex_unlock(&bo_va->mutex);
- spin_lock(&vm->it_lock);
interval_tree_insert(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
- trace_amdgpu_vm_bo_map(bo_va, mapping);
/* Make sure the page tables are allocated */
saddr >>= amdgpu_vm_block_size;
@@ -1117,18 +1165,17 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
*/
pt->parent = amdgpu_bo_ref(vm->page_directory);
- r = amdgpu_vm_clear_bo(adev, pt);
+ r = amdgpu_vm_clear_bo(adev, vm, pt);
if (r) {
amdgpu_bo_unref(&pt);
goto error_free;
}
entry->robj = pt;
- entry->prefered_domains = AMDGPU_GEM_DOMAIN_VRAM;
- entry->allowed_domains = AMDGPU_GEM_DOMAIN_VRAM;
entry->priority = 0;
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
+ entry->user_pages = NULL;
vm->page_tables[pt_idx].addr = 0;
}
@@ -1136,9 +1183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
error_free:
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
kfree(mapping);
@@ -1167,7 +1212,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- mutex_lock(&bo_va->mutex);
+
list_for_each_entry(mapping, &bo_va->valids, list) {
if (mapping->it.start == saddr)
break;
@@ -1181,25 +1226,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
break;
}
- if (&mapping->list == &bo_va->invalids) {
- mutex_unlock(&bo_va->mutex);
+ if (&mapping->list == &bo_va->invalids)
return -ENOENT;
- }
}
- mutex_unlock(&bo_va->mutex);
+
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- if (valid) {
- spin_lock(&vm->freed_lock);
+ if (valid)
list_add(&mapping->list, &vm->freed);
- spin_unlock(&vm->freed_lock);
- } else {
+ else
kfree(mapping);
- }
return 0;
}
@@ -1210,7 +1248,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
* @bo_va: requested bo_va
*
- * Remove @bo_va->bo from the requested vm (cayman+).
+ * Remove @bo_va->bo from the requested vm.
*
* Object have to be reserved!
*/
@@ -1228,23 +1266,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- spin_lock(&vm->freed_lock);
list_add(&mapping->list, &vm->freed);
- spin_unlock(&vm->freed_lock);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
kfree(mapping);
}
+
fence_put(bo_va->last_pt_update);
- mutex_destroy(&bo_va->mutex);
kfree(bo_va);
}
@@ -1255,7 +1287,7 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
* @vm: requested vm
* @bo: amdgpu buffer object
*
- * Mark @bo as invalid (cayman+).
+ * Mark @bo as invalid.
*/
void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
struct amdgpu_bo *bo)
@@ -1276,17 +1308,20 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
* @vm: requested vm
*
- * Init @vm fields (cayman+).
+ * Init @vm fields.
*/
int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
{
const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
AMDGPU_VM_PTE_COUNT * 8);
unsigned pd_size, pd_entries;
+ unsigned ring_instance;
+ struct amdgpu_ring *ring;
+ struct amd_sched_rq *rq;
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- vm->ids[i].id = 0;
+ vm->ids[i].mgr_id = NULL;
vm->ids[i].flushed_updates = NULL;
}
vm->va = RB_ROOT;
@@ -1294,8 +1329,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
- spin_lock_init(&vm->it_lock);
- spin_lock_init(&vm->freed_lock);
+
pd_size = amdgpu_vm_directory_size(adev);
pd_entries = amdgpu_vm_num_pdes(adev);
@@ -1306,6 +1340,17 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
return -ENOMEM;
}
+ /* create scheduler entity for page table updates */
+
+ ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring);
+ ring_instance %= adev->vm_manager.vm_pte_num_rings;
+ ring = adev->vm_manager.vm_pte_rings[ring_instance];
+ rq = &ring->sched.sched_rq[AMD_SCHED_PRIORITY_KERNEL];
+ r = amd_sched_entity_init(&ring->sched, &vm->entity,
+ rq, amdgpu_sched_jobs);
+ if (r)
+ return r;
+
vm->page_directory_fence = NULL;
r = amdgpu_bo_create(adev, pd_size, align, true,
@@ -1313,22 +1358,27 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
AMDGPU_GEM_CREATE_NO_CPU_ACCESS,
NULL, NULL, &vm->page_directory);
if (r)
- return r;
+ goto error_free_sched_entity;
+
r = amdgpu_bo_reserve(vm->page_directory, false);
- if (r) {
- amdgpu_bo_unref(&vm->page_directory);
- vm->page_directory = NULL;
- return r;
- }
- r = amdgpu_vm_clear_bo(adev, vm->page_directory);
+ if (r)
+ goto error_free_page_directory;
+
+ r = amdgpu_vm_clear_bo(adev, vm, vm->page_directory);
amdgpu_bo_unreserve(vm->page_directory);
- if (r) {
- amdgpu_bo_unref(&vm->page_directory);
- vm->page_directory = NULL;
- return r;
- }
+ if (r)
+ goto error_free_page_directory;
return 0;
+
+error_free_page_directory:
+ amdgpu_bo_unref(&vm->page_directory);
+ vm->page_directory = NULL;
+
+error_free_sched_entity:
+ amd_sched_entity_fini(&ring->sched, &vm->entity);
+
+ return r;
}
/**
@@ -1337,7 +1387,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
* @adev: amdgpu_device pointer
* @vm: requested vm
*
- * Tear down @vm (cayman+).
+ * Tear down @vm.
* Unbind the VM and remove all bos from the vm bo list
*/
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
@@ -1345,6 +1395,8 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
struct amdgpu_bo_va_mapping *mapping, *tmp;
int i;
+ amd_sched_entity_fini(vm->entity.sched, &vm->entity);
+
if (!RB_EMPTY_ROOT(&vm->va)) {
dev_err(adev->dev, "still active bo inside vm\n");
}
@@ -1364,14 +1416,38 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- unsigned id = vm->ids[i].id;
+ struct amdgpu_vm_id *id = &vm->ids[i];
- atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
- (long)vm, 0);
- fence_put(vm->ids[i].flushed_updates);
+ if (id->mgr_id)
+ atomic_long_cmpxchg(&id->mgr_id->owner,
+ (long)id, 0);
+ fence_put(id->flushed_updates);
+ }
+}
+
+/**
+ * amdgpu_vm_manager_init - init the VM manager
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Initialize the VM manager structures
+ */
+void amdgpu_vm_manager_init(struct amdgpu_device *adev)
+{
+ unsigned i;
+
+ INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
+
+ /* skip over VMID 0, since it is the system VM */
+ for (i = 1; i < adev->vm_manager.num_ids; ++i) {
+ amdgpu_vm_reset_id(adev, i);
+ list_add_tail(&adev->vm_manager.ids[i].list,
+ &adev->vm_manager.ids_lru);
}
+ atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
}
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 474ca02b09493..1f9109d3348bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -3017,7 +3017,6 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
&memory_level->MinVddcPhases);
memory_level->EnabledForThrottle = 1;
- memory_level->EnabledForActivity = 1;
memory_level->UpH = 0;
memory_level->DownH = 100;
memory_level->VoltageDownH = 0;
@@ -3376,7 +3375,6 @@ static int ci_populate_single_graphic_level(struct amdgpu_device *adev,
graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2);
graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm);
graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1);
- graphic_level->EnabledForActivity = 1;
return 0;
}
@@ -3407,6 +3405,7 @@ static int ci_populate_all_graphic_levels(struct amdgpu_device *adev)
pi->smc_state_table.GraphicsLevel[i].DisplayWatermark =
PPSMC_DISPLAY_WATERMARK_HIGH;
}
+ pi->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1;
pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count;
pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
@@ -3450,6 +3449,8 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev)
return ret;
}
+ pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
+
if ((dpm_table->mclk_table.count >= 2) &&
((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
pi->smc_state_table.MemoryLevel[1].MinVddc =
@@ -4381,26 +4382,6 @@ static int ci_dpm_force_performance_level(struct amdgpu_device *adev,
}
}
}
- if ((!pi->pcie_dpm_key_disabled) &&
- pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
- levels = 0;
- tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
- while (tmp >>= 1)
- levels++;
- if (levels) {
- ret = ci_dpm_force_state_pcie(adev, level);
- if (ret)
- return ret;
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX_1) &
- TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK) >>
- TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT;
- if (tmp == levels)
- break;
- udelay(1);
- }
- }
- }
} else if (level == AMDGPU_DPM_FORCED_LEVEL_LOW) {
if ((!pi->sclk_dpm_key_disabled) &&
pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -5395,30 +5376,6 @@ static int ci_dpm_enable(struct amdgpu_device *adev)
ci_update_current_ps(adev, boot_ps);
- if (adev->irq.installed &&
- amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) {
-#if 0
- PPSMC_Result result;
-#endif
- ret = ci_thermal_set_temperature_range(adev, CISLANDS_TEMP_RANGE_MIN,
- CISLANDS_TEMP_RANGE_MAX);
- if (ret) {
- DRM_ERROR("ci_thermal_set_temperature_range failed\n");
- return ret;
- }
- amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
- AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
- amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
- AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
-
-#if 0
- result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt);
-
- if (result != PPSMC_Result_OK)
- DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
-#endif
- }
-
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 155965ed14a3b..bddc9ba114955 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1059,257 +1059,6 @@ static int cik_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
-static void cik_print_gpu_status_regs(struct amdgpu_device *adev)
-{
- dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
- RREG32(mmGRBM_STATUS));
- dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
- RREG32(mmGRBM_STATUS2));
- dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE0));
- dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE1));
- dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE2));
- dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE3));
- dev_info(adev->dev, " SRBM_STATUS=0x%08X\n",
- RREG32(mmSRBM_STATUS));
- dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
- RREG32(mmSRBM_STATUS2));
- dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
- RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
- dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
- RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
- dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
- dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
- RREG32(mmCP_STALLED_STAT1));
- dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
- RREG32(mmCP_STALLED_STAT2));
- dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
- RREG32(mmCP_STALLED_STAT3));
- dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
- RREG32(mmCP_CPF_BUSY_STAT));
- dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
- RREG32(mmCP_CPF_STALLED_STAT1));
- dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
- dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
- dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
- RREG32(mmCP_CPC_STALLED_STAT1));
- dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
-}
-
-/**
- * cik_gpu_check_soft_reset - check which blocks are busy
- *
- * @adev: amdgpu_device pointer
- *
- * Check which blocks are busy and return the relevant reset
- * mask to be used by cik_gpu_soft_reset().
- * Returns a mask of the blocks to be reset.
- */
-u32 amdgpu_cik_gpu_check_soft_reset(struct amdgpu_device *adev)
-{
- u32 reset_mask = 0;
- u32 tmp;
-
- /* GRBM_STATUS */
- tmp = RREG32(mmGRBM_STATUS);
- if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
- GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
- GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
- GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
- GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
- GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
- reset_mask |= AMDGPU_RESET_GFX;
-
- if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK))
- reset_mask |= AMDGPU_RESET_CP;
-
- /* GRBM_STATUS2 */
- tmp = RREG32(mmGRBM_STATUS2);
- if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_RLC;
-
- /* SDMA0_STATUS_REG */
- tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
- if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
- reset_mask |= AMDGPU_RESET_DMA;
-
- /* SDMA1_STATUS_REG */
- tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
- if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
- reset_mask |= AMDGPU_RESET_DMA1;
-
- /* SRBM_STATUS2 */
- tmp = RREG32(mmSRBM_STATUS2);
- if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_DMA;
-
- if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_DMA1;
-
- /* SRBM_STATUS */
- tmp = RREG32(mmSRBM_STATUS);
-
- if (tmp & SRBM_STATUS__IH_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_IH;
-
- if (tmp & SRBM_STATUS__SEM_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_SEM;
-
- if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
- reset_mask |= AMDGPU_RESET_GRBM;
-
- if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_VMC;
-
- if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
- SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK))
- reset_mask |= AMDGPU_RESET_MC;
-
- if (amdgpu_display_is_display_hung(adev))
- reset_mask |= AMDGPU_RESET_DISPLAY;
-
- /* Skip MC reset as it's mostly likely not hung, just busy */
- if (reset_mask & AMDGPU_RESET_MC) {
- DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
- reset_mask &= ~AMDGPU_RESET_MC;
- }
-
- return reset_mask;
-}
-
-/**
- * cik_gpu_soft_reset - soft reset GPU
- *
- * @adev: amdgpu_device pointer
- * @reset_mask: mask of which blocks to reset
- *
- * Soft reset the blocks specified in @reset_mask.
- */
-static void cik_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
-{
- struct amdgpu_mode_mc_save save;
- u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
- u32 tmp;
-
- if (reset_mask == 0)
- return;
-
- dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask);
-
- cik_print_gpu_status_regs(adev);
- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
-
- /* disable CG/PG */
-
- /* stop the rlc */
- gfx_v7_0_rlc_stop(adev);
-
- /* Disable GFX parsing/prefetching */
- WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
-
- /* Disable MEC parsing/prefetching */
- WREG32(mmCP_MEC_CNTL, CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
-
- if (reset_mask & AMDGPU_RESET_DMA) {
- /* sdma0 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
- }
- if (reset_mask & AMDGPU_RESET_DMA1) {
- /* sdma1 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
- }
-
- gmc_v7_0_mc_stop(adev, &save);
- if (amdgpu_asic_wait_for_mc_idle(adev)) {
- dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
-
- if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP))
- grbm_soft_reset = GRBM_SOFT_RESET__SOFT_RESET_CP_MASK |
- GRBM_SOFT_RESET__SOFT_RESET_GFX_MASK;
-
- if (reset_mask & AMDGPU_RESET_CP) {
- grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_CP_MASK;
-
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
- }
-
- if (reset_mask & AMDGPU_RESET_DMA)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA_MASK;
-
- if (reset_mask & AMDGPU_RESET_DMA1)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SDMA1_MASK;
-
- if (reset_mask & AMDGPU_RESET_DISPLAY)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_DC_MASK;
-
- if (reset_mask & AMDGPU_RESET_RLC)
- grbm_soft_reset |= GRBM_SOFT_RESET__SOFT_RESET_RLC_MASK;
-
- if (reset_mask & AMDGPU_RESET_SEM)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_SEM_MASK;
-
- if (reset_mask & AMDGPU_RESET_IH)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_IH_MASK;
-
- if (reset_mask & AMDGPU_RESET_GRBM)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_GRBM_MASK;
-
- if (reset_mask & AMDGPU_RESET_VMC)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_VMC_MASK;
-
- if (!(adev->flags & AMD_IS_APU)) {
- if (reset_mask & AMDGPU_RESET_MC)
- srbm_soft_reset |= SRBM_SOFT_RESET__SOFT_RESET_MC_MASK;
- }
-
- if (grbm_soft_reset) {
- tmp = RREG32(mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmGRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~grbm_soft_reset;
- WREG32(mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmGRBM_SOFT_RESET);
- }
-
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
- }
-
- /* Wait a little for things to settle down */
- udelay(50);
-
- gmc_v7_0_mc_resume(adev, &save);
- udelay(50);
-
- cik_print_gpu_status_regs(adev);
-}
-
struct kv_reset_save_regs {
u32 gmcon_reng_execute;
u32 gmcon_misc;
@@ -1405,45 +1154,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
{
- struct amdgpu_mode_mc_save save;
struct kv_reset_save_regs kv_save = { 0 };
- u32 tmp, i;
+ u32 i;
dev_info(adev->dev, "GPU pci config reset\n");
- /* disable dpm? */
-
- /* disable cg/pg */
-
- /* Disable GFX parsing/prefetching */
- WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK |
- CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
-
- /* Disable MEC parsing/prefetching */
- WREG32(mmCP_MEC_CNTL,
- CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
-
- /* sdma0 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
- /* sdma1 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
- tmp |= SDMA0_F32_CNTL__HALT_MASK;
- WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
- /* XXX other engines? */
-
- /* halt the rlc, disable cp internal ints */
- gfx_v7_0_rlc_stop(adev);
-
- udelay(50);
-
- /* disable mem access */
- gmc_v7_0_mc_stop(adev, &save);
- if (amdgpu_asic_wait_for_mc_idle(adev)) {
- dev_warn(adev->dev, "Wait for MC idle timed out !\n");
- }
-
if (adev->flags & AMD_IS_APU)
kv_save_regs_for_reset(adev, &kv_save);
@@ -1489,26 +1204,11 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
*/
static int cik_asic_reset(struct amdgpu_device *adev)
{
- u32 reset_mask;
-
- reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
+ cik_set_bios_scratch_engine_hung(adev, true);
- if (reset_mask)
- cik_set_bios_scratch_engine_hung(adev, true);
+ cik_gpu_pci_config_reset(adev);
- /* try soft reset */
- cik_gpu_soft_reset(adev, reset_mask);
-
- reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
-
- /* try pci config reset */
- if (reset_mask && amdgpu_hard_reset)
- cik_gpu_pci_config_reset(adev);
-
- reset_mask = amdgpu_cik_gpu_check_soft_reset(adev);
-
- if (!reset_mask)
- cik_set_bios_scratch_engine_hung(adev, false);
+ cik_set_bios_scratch_engine_hung(adev, false);
return 0;
}
@@ -2328,8 +2028,6 @@ static int cik_common_early_init(void *handle)
adev->asic_funcs = &cik_asic_funcs;
- adev->has_uvd = true;
-
adev->rev_id = cik_get_rev_id(adev);
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c55ecf0ea8454..d3ac3298fba8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
- u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+ u32 extra_bits = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 4)
@@ -261,6 +261,13 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
+static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 1);
+}
+
/**
* cik_sdma_ring_emit_fence - emit a fence on the DMA ring
*
@@ -294,30 +301,6 @@ static void cik_sdma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_TRAP, 0, 0));
}
-/**
- * cik_sdma_ring_emit_semaphore - emit a semaphore on the dma ring
- *
- * @ring: amdgpu_ring structure holding ring information
- * @semaphore: amdgpu semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (CIK).
- */
-static bool cik_sdma_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- u64 addr = semaphore->gpu_addr;
- u32 extra_bits = emit_wait ? 0 : SDMA_SEMAPHORE_EXTRA_S;
-
- amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SEMAPHORE, 0, extra_bits));
- amdgpu_ring_write(ring, addr & 0xfffffff8);
- amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
-
- return true;
-}
-
/**
* cik_sdma_gfx_stop - stop the gfx async dma engines
*
@@ -417,6 +400,9 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev)
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+ WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+ adev->gfx.config.gb_addr_config & 0x70);
+
WREG32(mmSDMA0_SEM_INCOMPLETE_TIMER_CNTL + sdma_offsets[i], 0);
WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
@@ -584,7 +570,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_lock(ring, 5);
+ r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_wb_free(adev, index);
@@ -595,7 +581,7 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
amdgpu_ring_write(ring, 1); /* number of DWs to follow */
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -645,7 +631,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(ring, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
goto err0;
@@ -657,9 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[3] = 1;
ib.ptr[4] = 0xDEADBEEF;
ib.length_dw = 5;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err1;
@@ -685,7 +669,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
err1:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err0:
amdgpu_wb_free(adev, index);
return r;
@@ -738,7 +723,7 @@ static void cik_sdma_vm_copy_pte(struct amdgpu_ib *ib,
* Update PTEs by writing them manually using sDMA (CIK).
*/
static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
- uint64_t pe,
+ const dma_addr_t *pages_addr, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
@@ -757,14 +742,7 @@ static void cik_sdma_vm_write_pte(struct amdgpu_ib *ib,
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = ndw;
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- if (flags & AMDGPU_PTE_SYSTEM) {
- value = amdgpu_vm_map_gart(ib->ring->adev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
- } else if (flags & AMDGPU_PTE_VALID) {
- value = addr;
- } else {
- value = 0;
- }
+ value = amdgpu_vm_map_gart(pages_addr, addr);
addr += incr;
value |= flags;
ib->ptr[ib->length_dw++] = value;
@@ -827,9 +805,9 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib,
* @ib: indirect buffer to fill with padding
*
*/
-static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
+static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
+ struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
u32 pad_count;
int i;
@@ -844,6 +822,30 @@ static void cik_sdma_vm_pad_ib(struct amdgpu_ib *ib)
SDMA_PACKET(SDMA_OPCODE_NOP, 0, 0);
}
+/**
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0,
+ SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+ SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */
+ SDMA_POLL_REG_MEM_EXTRA_M));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
+
/**
* cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
*
@@ -1097,6 +1099,8 @@ static void cik_sdma_print_status(void *handle)
i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
+ dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
+ i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
cik_srbm_select(adev, 0, 0, 0, j);
@@ -1297,12 +1301,14 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.parse_cs = NULL,
.emit_ib = cik_sdma_ring_emit_ib,
.emit_fence = cik_sdma_ring_emit_fence,
- .emit_semaphore = cik_sdma_ring_emit_semaphore,
+ .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
.test_ring = cik_sdma_ring_test_ring,
.test_ib = cik_sdma_ring_test_ib,
.insert_nop = cik_sdma_ring_insert_nop,
+ .pad_ib = cik_sdma_ring_pad_ib,
};
static void cik_sdma_set_ring_funcs(struct amdgpu_device *adev)
@@ -1399,14 +1405,18 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = {
.copy_pte = cik_sdma_vm_copy_pte,
.write_pte = cik_sdma_vm_write_pte,
.set_pte_pde = cik_sdma_vm_set_pte_pde,
- .pad_ib = cik_sdma_vm_pad_ib,
};
static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ unsigned i;
+
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs;
- adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
- adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->vm_manager.vm_pte_rings[i] =
+ &adev->sdma.instance[i].ring;
+
+ adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 7f6d457f250a4..60d4493206dd1 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -46,9 +46,6 @@
#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define CIK_RB_BITMAP_WIDTH_PER_SH 2
-#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4
-
#define AMDGPU_NUM_OF_VMIDS 8
#define PIPEID(x) ((x) << 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 093599aba64b9..6de2ce535e370 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1668,6 +1668,9 @@ static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
{
int i;
+ if (!amdgpu_audio)
+ return;
+
if (!adev->mode_info.audio.enabled)
return;
@@ -1973,7 +1976,7 @@ static void dce_v10_0_afmt_enable(struct drm_encoder *encoder, bool enable)
enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
}
-static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v10_0_afmt_init(struct amdgpu_device *adev)
{
int i;
@@ -1986,8 +1989,16 @@ static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
if (adev->mode_info.afmt[i]) {
adev->mode_info.afmt[i]->offset = dig_offsets[i];
adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
}
}
+ return 0;
}
static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
@@ -2064,8 +2075,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
target_fb = fb;
- }
- else {
+ } else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
target_fb = crtc->primary->fb;
}
@@ -2079,9 +2089,9 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic)
+ if (atomic) {
fb_location = amdgpu_bo_gpu_offset(rbo);
- else {
+ } else {
r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(rbo);
@@ -2670,7 +2680,6 @@ static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
- destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
@@ -2701,13 +2710,13 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_on(dev, amdgpu_crtc->crtc_id);
dce_v10_0_crtc_load_lut(crtc);
break;
case DRM_MODE_DPMS_STANDBY:
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
- drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_off(dev, amdgpu_crtc->crtc_id);
if (amdgpu_crtc->enabled) {
dce_v10_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2890,7 +2899,6 @@ static int dce_v10_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
- amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = 128;
@@ -2982,8 +2990,6 @@ static int dce_v10_0_sw_init(void *handle)
if (r)
return r;
- adev->mode_info.mode_config_initialized = true;
-
adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
adev->ddev->mode_config.max_width = 16384;
@@ -3014,7 +3020,9 @@ static int dce_v10_0_sw_init(void *handle)
return -EINVAL;
/* setup afmt */
- dce_v10_0_afmt_init(adev);
+ r = dce_v10_0_afmt_init(adev);
+ if (r)
+ return r;
r = dce_v10_0_audio_init(adev);
if (r)
@@ -3022,7 +3030,8 @@ static int dce_v10_0_sw_init(void *handle)
drm_kms_helper_poll_init(adev->ddev);
- return r;
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
}
static int dce_v10_0_sw_fini(void *handle)
@@ -3366,7 +3375,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
- queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+ schedule_work(&works->unpin_work);
return 0;
}
@@ -3624,16 +3633,8 @@ dce_v10_0_ext_dpms(struct drm_encoder *encoder, int mode)
}
-static bool dce_v10_0_ext_mode_fixup(struct drm_encoder *encoder,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
static const struct drm_encoder_helper_funcs dce_v10_0_ext_helper_funcs = {
.dpms = dce_v10_0_ext_dpms,
- .mode_fixup = dce_v10_0_ext_mode_fixup,
.prepare = dce_v10_0_ext_prepare,
.mode_set = dce_v10_0_ext_mode_set,
.commit = dce_v10_0_ext_commit,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 8e67249d4367d..e9ccc6b787f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1658,6 +1658,9 @@ static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
{
int i;
+ if (!amdgpu_audio)
+ return;
+
if (!adev->mode_info.audio.enabled)
return;
@@ -1963,7 +1966,7 @@ static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
}
-static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
{
int i;
@@ -1976,8 +1979,16 @@ static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
if (adev->mode_info.afmt[i]) {
adev->mode_info.afmt[i]->offset = dig_offsets[i];
adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
}
}
+ return 0;
}
static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
@@ -2054,8 +2065,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
target_fb = fb;
- }
- else {
+ } else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
target_fb = crtc->primary->fb;
}
@@ -2069,9 +2079,9 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic)
+ if (atomic) {
fb_location = amdgpu_bo_gpu_offset(rbo);
- else {
+ } else {
r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(rbo);
@@ -2661,7 +2671,6 @@ static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
- destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
@@ -2692,13 +2701,13 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_on(dev, amdgpu_crtc->crtc_id);
dce_v11_0_crtc_load_lut(crtc);
break;
case DRM_MODE_DPMS_STANDBY:
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
- drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_off(dev, amdgpu_crtc->crtc_id);
if (amdgpu_crtc->enabled) {
dce_v11_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2881,7 +2890,6 @@ static int dce_v11_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
- amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = 128;
@@ -2963,7 +2971,7 @@ static int dce_v11_0_sw_init(void *handle)
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
if (r)
- return r;
+ return r;
}
for (i = 8; i < 20; i += 2) {
@@ -2975,9 +2983,7 @@ static int dce_v11_0_sw_init(void *handle)
/* HPD hotplug */
r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
if (r)
- return r;
-
- adev->mode_info.mode_config_initialized = true;
+ return r;
adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
@@ -2996,6 +3002,7 @@ static int dce_v11_0_sw_init(void *handle)
adev->ddev->mode_config.max_width = 16384;
adev->ddev->mode_config.max_height = 16384;
+
/* allocate crtcs */
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = dce_v11_0_crtc_init(adev, i);
@@ -3009,7 +3016,9 @@ static int dce_v11_0_sw_init(void *handle)
return -EINVAL;
/* setup afmt */
- dce_v11_0_afmt_init(adev);
+ r = dce_v11_0_afmt_init(adev);
+ if (r)
+ return r;
r = dce_v11_0_audio_init(adev);
if (r)
@@ -3017,7 +3026,8 @@ static int dce_v11_0_sw_init(void *handle)
drm_kms_helper_poll_init(adev->ddev);
- return r;
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
}
static int dce_v11_0_sw_fini(void *handle)
@@ -3361,7 +3371,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
- queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+ schedule_work(&works->unpin_work);
return 0;
}
@@ -3619,16 +3629,8 @@ dce_v11_0_ext_dpms(struct drm_encoder *encoder, int mode)
}
-static bool dce_v11_0_ext_mode_fixup(struct drm_encoder *encoder,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
static const struct drm_encoder_helper_funcs dce_v11_0_ext_helper_funcs = {
.dpms = dce_v11_0_ext_dpms,
- .mode_fixup = dce_v11_0_ext_mode_fixup,
.prepare = dce_v11_0_ext_prepare,
.mode_set = dce_v11_0_ext_mode_set,
.commit = dce_v11_0_ext_commit,
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index d0e128c248134..e56b55d8c2802 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1639,6 +1639,9 @@ static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
{
int i;
+ if (!amdgpu_audio)
+ return;
+
if (!adev->mode_info.audio.enabled)
return;
@@ -1910,7 +1913,7 @@ static void dce_v8_0_afmt_enable(struct drm_encoder *encoder, bool enable)
enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
}
-static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v8_0_afmt_init(struct amdgpu_device *adev)
{
int i;
@@ -1923,8 +1926,16 @@ static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
if (adev->mode_info.afmt[i]) {
adev->mode_info.afmt[i]->offset = dig_offsets[i];
adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
}
}
+ return 0;
}
static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
@@ -2001,8 +2012,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
target_fb = fb;
- }
- else {
+ } else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
target_fb = crtc->primary->fb;
}
@@ -2016,9 +2026,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic)
+ if (atomic) {
fb_location = amdgpu_bo_gpu_offset(rbo);
- else {
+ } else {
r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(rbo);
@@ -2582,7 +2592,6 @@ static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
drm_crtc_cleanup(crtc);
- destroy_workqueue(amdgpu_crtc->pflip_queue);
kfree(amdgpu_crtc);
}
@@ -2613,13 +2622,13 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_on(dev, amdgpu_crtc->crtc_id);
dce_v8_0_crtc_load_lut(crtc);
break;
case DRM_MODE_DPMS_STANDBY:
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
- drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_off(dev, amdgpu_crtc->crtc_id);
if (amdgpu_crtc->enabled) {
dce_v8_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2809,7 +2818,6 @@ static int dce_v8_0_crtc_init(struct amdgpu_device *adev, int index)
drm_mode_crtc_set_gamma_size(&amdgpu_crtc->base, 256);
amdgpu_crtc->crtc_id = index;
- amdgpu_crtc->pflip_queue = create_singlethread_workqueue("amdgpu-pageflip-queue");
adev->mode_info.crtcs[index] = amdgpu_crtc;
amdgpu_crtc->max_cursor_width = CIK_CURSOR_WIDTH;
@@ -2892,8 +2900,6 @@ static int dce_v8_0_sw_init(void *handle)
if (r)
return r;
- adev->mode_info.mode_config_initialized = true;
-
adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
adev->ddev->mode_config.max_width = 16384;
@@ -2924,7 +2930,9 @@ static int dce_v8_0_sw_init(void *handle)
return -EINVAL;
/* setup afmt */
- dce_v8_0_afmt_init(adev);
+ r = dce_v8_0_afmt_init(adev);
+ if (r)
+ return r;
r = dce_v8_0_audio_init(adev);
if (r)
@@ -2932,7 +2940,8 @@ static int dce_v8_0_sw_init(void *handle)
drm_kms_helper_poll_init(adev->ddev);
- return r;
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
}
static int dce_v8_0_sw_fini(void *handle)
@@ -3375,7 +3384,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,
spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
- queue_work(amdgpu_crtc->pflip_queue, &works->unpin_work);
+ schedule_work(&works->unpin_work);
return 0;
}
@@ -3554,16 +3563,8 @@ dce_v8_0_ext_dpms(struct drm_encoder *encoder, int mode)
}
-static bool dce_v8_0_ext_mode_fixup(struct drm_encoder *encoder,
- const struct drm_display_mode *mode,
- struct drm_display_mode *adjusted_mode)
-{
- return true;
-}
-
static const struct drm_encoder_helper_funcs dce_v8_0_ext_helper_funcs = {
.dpms = dce_v8_0_ext_dpms,
- .mode_fixup = dce_v8_0_ext_mode_fixup,
.prepare = dce_v8_0_ext_prepare,
.mode_set = dce_v8_0_ext_mode_set,
.commit = dce_v8_0_ext_commit,
diff --git a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
index e35340afd3db1..b336c918d6a79 100644
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@@ -272,6 +272,12 @@ static int fiji_smu_upload_firmware_image(struct amdgpu_device *adev)
if (!adev->pm.fw)
return -EINVAL;
+ /* Skip SMC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ */
+ if (adev->virtualization.supports_sr_iov)
+ return 0;
+
hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
amdgpu_ucode_print_smc_hdr(&hdr->header);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 06602df707f86..bb8709066fd87 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -31,8 +31,6 @@
#include "amdgpu_ucode.h"
#include "clearstate_ci.h"
-#include "uvd/uvd_4_2_d.h"
-
#include "dce/dce_8_0_d.h"
#include "dce/dce_8_0_sh_mask.h"
@@ -1006,9 +1004,15 @@ static int gfx_v7_0_init_microcode(struct amdgpu_device *adev)
*/
static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
{
- const u32 num_tile_mode_states = 32;
- const u32 num_secondary_tile_mode_states = 16;
- u32 reg_offset, gb_tile_moden, split_equal_to_row_size;
+ const u32 num_tile_mode_states =
+ ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+ const u32 num_secondary_tile_mode_states =
+ ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+ u32 reg_offset, split_equal_to_row_size;
+ uint32_t *tile, *macrotile;
+
+ tile = adev->gfx.config.tile_mode_array;
+ macrotile = adev->gfx.config.macrotile_mode_array;
switch (adev->gfx.config.mem_row_size_in_kb) {
case 1:
@@ -1023,832 +1027,531 @@ static void gfx_v7_0_tiling_mode_table_init(struct amdgpu_device *adev)
break;
}
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ tile[reg_offset] = 0;
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ macrotile[reg_offset] = 0;
+
switch (adev->asic_type) {
case CHIP_BONAIRE:
- for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
- switch (reg_offset) {
- case 0:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 1:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 2:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 3:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 4:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 5:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 6:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 7:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
-
- case 8:
- gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16));
- break;
- case 9:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
- break;
- case 10:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 11:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 12:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- case 13:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
- break;
- case 14:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 15:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 16:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 17:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- case 18:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 19:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
- break;
- case 20:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 21:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 22:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 23:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- case 24:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 25:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 26:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 27:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
- break;
- case 28:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 29:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 30:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- default:
- gb_tile_moden = 0;
- break;
- }
- adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
- WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
- }
- for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
- switch (reg_offset) {
- case 0:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 1:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 2:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 3:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 4:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 5:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 6:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_4_BANK));
- break;
- case 8:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 9:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 10:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 11:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 12:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 13:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 14:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_4_BANK));
- break;
- default:
- gb_tile_moden = 0;
- break;
- }
- adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
- WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
- }
+ tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[7] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16));
+ tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+ tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[12] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[17] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[23] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+ tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[30] = (TILE_SPLIT(split_equal_to_row_size));
+
+ macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
break;
case CHIP_HAWAII:
- for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
- switch (reg_offset) {
- case 0:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 1:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 2:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 3:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 4:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 5:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 6:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 7:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
-
- case 8:
- gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
- break;
- case 9:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
- break;
- case 10:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 11:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 12:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 13:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
- break;
- case 14:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 15:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 16:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 17:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 18:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 19:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
- break;
- case 20:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 21:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 22:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 23:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 24:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 25:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 26:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 27:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
- break;
- case 28:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 29:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 30:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P4_16x16) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- default:
- gb_tile_moden = 0;
- break;
- }
- adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
- WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
- }
- for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
- switch (reg_offset) {
- case 0:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 1:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 2:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 3:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 4:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 5:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_4_BANK));
- break;
- case 6:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_4_BANK));
- break;
- case 8:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 9:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 10:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 11:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 12:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 13:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 14:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
- NUM_BANKS(ADDR_SURF_4_BANK));
- break;
- default:
- gb_tile_moden = 0;
- break;
- }
- adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
- WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
- }
+ tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[7] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16));
+ tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+ tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[12] = (ARRAY_MODE(ARRAY_PRT_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[17] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
+ tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[23] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+ tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P16_32x32_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[30] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P4_16x16) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+
+ macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+ macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_1) |
+ NUM_BANKS(ADDR_SURF_4_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
break;
case CHIP_KABINI:
case CHIP_KAVERI:
case CHIP_MULLINS:
default:
- for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++) {
- switch (reg_offset) {
- case 0:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 1:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 2:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 3:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 4:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 5:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
- break;
- case 6:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
- TILE_SPLIT(split_equal_to_row_size));
- break;
- case 7:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
-
- case 8:
- gb_tile_moden = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
- PIPE_CONFIG(ADDR_SURF_P2));
- break;
- case 9:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
- break;
- case 10:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 11:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 12:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- case 13:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
- break;
- case 14:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 15:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 16:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 17:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- case 18:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 19:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
- break;
- case 20:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 21:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 22:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 23:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- case 24:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 25:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 26:
- gb_tile_moden = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
- break;
- case 27:
- gb_tile_moden = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
- break;
- case 28:
- gb_tile_moden = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
- break;
- case 29:
- gb_tile_moden = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
- PIPE_CONFIG(ADDR_SURF_P2) |
- MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
- SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
- break;
- case 30:
- gb_tile_moden = (TILE_SPLIT(split_equal_to_row_size));
- break;
- default:
- gb_tile_moden = 0;
- break;
- }
- adev->gfx.config.tile_mode_array[reg_offset] = gb_tile_moden;
- WREG32(mmGB_TILE_MODE0 + reg_offset, gb_tile_moden);
- }
- for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++) {
- switch (reg_offset) {
- case 0:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 1:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 2:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 3:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 4:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 5:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 6:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- case 8:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 9:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 10:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 11:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 12:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 13:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
- NUM_BANKS(ADDR_SURF_16_BANK));
- break;
- case 14:
- gb_tile_moden = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
- BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
- MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
- NUM_BANKS(ADDR_SURF_8_BANK));
- break;
- default:
- gb_tile_moden = 0;
- break;
- }
- adev->gfx.config.macrotile_mode_array[reg_offset] = gb_tile_moden;
- WREG32(mmGB_MACROTILE_MODE0 + reg_offset, gb_tile_moden);
- }
+ tile[0] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_64B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[1] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_128B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[2] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_256B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[3] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ TILE_SPLIT(ADDR_SURF_TILE_SPLIT_512B) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[4] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[5] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING));
+ tile[6] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DEPTH_MICRO_TILING) |
+ TILE_SPLIT(split_equal_to_row_size));
+ tile[7] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[8] = (ARRAY_MODE(ARRAY_LINEAR_ALIGNED) |
+ PIPE_CONFIG(ADDR_SURF_P2));
+ tile[9] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING));
+ tile[10] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[11] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_DISPLAY_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[12] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[13] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING));
+ tile[14] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[15] = (ARRAY_MODE(ARRAY_3D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[16] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[17] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[18] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[19] = (ARRAY_MODE(ARRAY_1D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING));
+ tile[20] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[21] = (ARRAY_MODE(ARRAY_3D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[22] = (ARRAY_MODE(ARRAY_PRT_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[23] = (TILE_SPLIT(split_equal_to_row_size));
+ tile[24] = (ARRAY_MODE(ARRAY_2D_TILED_THICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THIN_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[25] = (ARRAY_MODE(ARRAY_2D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[26] = (ARRAY_MODE(ARRAY_3D_TILED_XTHICK) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_THICK_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_1));
+ tile[27] = (ARRAY_MODE(ARRAY_1D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING));
+ tile[28] = (ARRAY_MODE(ARRAY_2D_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_2));
+ tile[29] = (ARRAY_MODE(ARRAY_PRT_TILED_THIN1) |
+ PIPE_CONFIG(ADDR_SURF_P2) |
+ MICRO_TILE_MODE_NEW(ADDR_SURF_ROTATED_MICRO_TILING) |
+ SAMPLE_SPLIT(ADDR_SURF_SAMPLE_SPLIT_8));
+ tile[30] = (TILE_SPLIT(split_equal_to_row_size));
+
+ macrotile[0] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[1] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[2] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[3] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[4] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[5] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[6] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+ macrotile[8] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_8) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[9] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_4) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[10] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_4) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[11] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_2) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[12] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_2) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[13] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_4) |
+ NUM_BANKS(ADDR_SURF_16_BANK));
+ macrotile[14] = (BANK_WIDTH(ADDR_SURF_BANK_WIDTH_1) |
+ BANK_HEIGHT(ADDR_SURF_BANK_HEIGHT_1) |
+ MACRO_TILE_ASPECT(ADDR_SURF_MACRO_ASPECT_2) |
+ NUM_BANKS(ADDR_SURF_8_BANK));
+
+ for (reg_offset = 0; reg_offset < num_tile_mode_states; reg_offset++)
+ WREG32(mmGB_TILE_MODE0 + reg_offset, tile[reg_offset]);
+ for (reg_offset = 0; reg_offset < num_secondary_tile_mode_states; reg_offset++)
+ if (reg_offset != 7)
+ WREG32(mmGB_MACROTILE_MODE0 + reg_offset, macrotile[reg_offset]);
break;
}
}
@@ -1893,45 +1596,31 @@ void gfx_v7_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
*/
static u32 gfx_v7_0_create_bitmask(u32 bit_width)
{
- u32 i, mask = 0;
-
- for (i = 0; i < bit_width; i++) {
- mask <<= 1;
- mask |= 1;
- }
- return mask;
+ return (u32)((1ULL << bit_width) - 1);
}
/**
- * gfx_v7_0_get_rb_disabled - computes the mask of disabled RBs
+ * gfx_v7_0_get_rb_active_bitmap - computes the mask of enabled RBs
*
* @adev: amdgpu_device pointer
- * @max_rb_num: max RBs (render backends) for the asic
- * @se_num: number of SEs (shader engines) for the asic
- * @sh_per_se: number of SH blocks per SE for the asic
*
- * Calculates the bitmask of disabled RBs (CIK).
- * Returns the disabled RB bitmask.
+ * Calculates the bitmask of enabled RBs (CIK).
+ * Returns the enabled RB bitmask.
*/
-static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
- u32 max_rb_num_per_se,
- u32 sh_per_se)
+static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
{
u32 data, mask;
data = RREG32(mmCC_RB_BACKEND_DISABLE);
- if (data & 1)
- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
- else
- data = 0;
-
data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
- mask = gfx_v7_0_create_bitmask(max_rb_num_per_se / sh_per_se);
+ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
- return data & mask;
+ return (~data) & mask;
}
/**
@@ -1940,73 +1629,31 @@ static u32 gfx_v7_0_get_rb_disabled(struct amdgpu_device *adev,
* @adev: amdgpu_device pointer
* @se_num: number of SEs (shader engines) for the asic
* @sh_per_se: number of SH blocks per SE for the asic
- * @max_rb_num: max RBs (render backends) for the asic
*
* Configures per-SE/SH RB registers (CIK).
*/
-static void gfx_v7_0_setup_rb(struct amdgpu_device *adev,
- u32 se_num, u32 sh_per_se,
- u32 max_rb_num_per_se)
+static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
{
int i, j;
- u32 data, mask;
- u32 disabled_rbs = 0;
- u32 enabled_rbs = 0;
+ u32 data;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < se_num; i++) {
- for (j = 0; j < sh_per_se; j++) {
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j);
- data = gfx_v7_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
- if (adev->asic_type == CHIP_HAWAII)
- disabled_rbs |= data << ((i * sh_per_se + j) * HAWAII_RB_BITMAP_WIDTH_PER_SH);
- else
- disabled_rbs |= data << ((i * sh_per_se + j) * CIK_RB_BITMAP_WIDTH_PER_SH);
+ data = gfx_v7_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
}
}
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
- mask = 1;
- for (i = 0; i < max_rb_num_per_se * se_num; i++) {
- if (!(disabled_rbs & mask))
- enabled_rbs |= mask;
- mask <<= 1;
- }
-
- adev->gfx.config.backend_enable_mask = enabled_rbs;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < se_num; i++) {
- gfx_v7_0_select_se_sh(adev, i, 0xffffffff);
- data = 0;
- for (j = 0; j < sh_per_se; j++) {
- switch (enabled_rbs & 3) {
- case 0:
- if (j == 0)
- data |= (RASTER_CONFIG_RB_MAP_3 <<
- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
- else
- data |= (RASTER_CONFIG_RB_MAP_0 <<
- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
- break;
- case 1:
- data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
- break;
- case 2:
- data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
- break;
- case 3:
- default:
- data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
- break;
- }
- enabled_rbs >>= 2;
- }
- WREG32(mmPA_SC_RASTER_CONFIG, data);
- }
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
}
/**
@@ -2059,192 +1706,23 @@ static void gmc_v7_0_init_compute_vmid(struct amdgpu_device *adev)
*/
static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
{
- u32 gb_addr_config;
- u32 mc_shared_chmap, mc_arb_ramcfg;
- u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
- u32 sh_mem_cfg;
- u32 tmp;
- int i;
-
- switch (adev->asic_type) {
- case CHIP_BONAIRE:
- adev->gfx.config.max_shader_engines = 2;
- adev->gfx.config.max_tile_pipes = 4;
- adev->gfx.config.max_cu_per_sh = 7;
- adev->gfx.config.max_sh_per_se = 1;
- adev->gfx.config.max_backends_per_se = 2;
- adev->gfx.config.max_texture_channel_caches = 4;
- adev->gfx.config.max_gprs = 256;
- adev->gfx.config.max_gs_threads = 32;
- adev->gfx.config.max_hw_contexts = 8;
-
- adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
- adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
- adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
- adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
- break;
- case CHIP_HAWAII:
- adev->gfx.config.max_shader_engines = 4;
- adev->gfx.config.max_tile_pipes = 16;
- adev->gfx.config.max_cu_per_sh = 11;
- adev->gfx.config.max_sh_per_se = 1;
- adev->gfx.config.max_backends_per_se = 4;
- adev->gfx.config.max_texture_channel_caches = 16;
- adev->gfx.config.max_gprs = 256;
- adev->gfx.config.max_gs_threads = 32;
- adev->gfx.config.max_hw_contexts = 8;
-
- adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
- adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
- adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
- adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
- break;
- case CHIP_KAVERI:
- adev->gfx.config.max_shader_engines = 1;
- adev->gfx.config.max_tile_pipes = 4;
- if ((adev->pdev->device == 0x1304) ||
- (adev->pdev->device == 0x1305) ||
- (adev->pdev->device == 0x130C) ||
- (adev->pdev->device == 0x130F) ||
- (adev->pdev->device == 0x1310) ||
- (adev->pdev->device == 0x1311) ||
- (adev->pdev->device == 0x131C)) {
- adev->gfx.config.max_cu_per_sh = 8;
- adev->gfx.config.max_backends_per_se = 2;
- } else if ((adev->pdev->device == 0x1309) ||
- (adev->pdev->device == 0x130A) ||
- (adev->pdev->device == 0x130D) ||
- (adev->pdev->device == 0x1313) ||
- (adev->pdev->device == 0x131D)) {
- adev->gfx.config.max_cu_per_sh = 6;
- adev->gfx.config.max_backends_per_se = 2;
- } else if ((adev->pdev->device == 0x1306) ||
- (adev->pdev->device == 0x1307) ||
- (adev->pdev->device == 0x130B) ||
- (adev->pdev->device == 0x130E) ||
- (adev->pdev->device == 0x1315) ||
- (adev->pdev->device == 0x131B)) {
- adev->gfx.config.max_cu_per_sh = 4;
- adev->gfx.config.max_backends_per_se = 1;
- } else {
- adev->gfx.config.max_cu_per_sh = 3;
- adev->gfx.config.max_backends_per_se = 1;
- }
- adev->gfx.config.max_sh_per_se = 1;
- adev->gfx.config.max_texture_channel_caches = 4;
- adev->gfx.config.max_gprs = 256;
- adev->gfx.config.max_gs_threads = 16;
- adev->gfx.config.max_hw_contexts = 8;
-
- adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
- adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
- adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
- adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
- break;
- case CHIP_KABINI:
- case CHIP_MULLINS:
- default:
- adev->gfx.config.max_shader_engines = 1;
- adev->gfx.config.max_tile_pipes = 2;
- adev->gfx.config.max_cu_per_sh = 2;
- adev->gfx.config.max_sh_per_se = 1;
- adev->gfx.config.max_backends_per_se = 1;
- adev->gfx.config.max_texture_channel_caches = 2;
- adev->gfx.config.max_gprs = 256;
- adev->gfx.config.max_gs_threads = 16;
- adev->gfx.config.max_hw_contexts = 8;
-
- adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
- adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
- adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
- adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
- gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
- break;
- }
-
- WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
-
- mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
- adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
- mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
-
- adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
- adev->gfx.config.mem_max_burst_length_bytes = 256;
- if (adev->flags & AMD_IS_APU) {
- /* Get memory bank mapping mode. */
- tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
- dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
- dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
-
- tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
- dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
- dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
-
- /* Validate settings in case only one DIMM installed. */
- if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
- dimm00_addr_map = 0;
- if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
- dimm01_addr_map = 0;
- if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
- dimm10_addr_map = 0;
- if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
- dimm11_addr_map = 0;
-
- /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
- /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
- if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
- adev->gfx.config.mem_row_size_in_kb = 2;
- else
- adev->gfx.config.mem_row_size_in_kb = 1;
- } else {
- tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
- adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
- if (adev->gfx.config.mem_row_size_in_kb > 4)
- adev->gfx.config.mem_row_size_in_kb = 4;
- }
- /* XXX use MC settings? */
- adev->gfx.config.shader_engine_tile_size = 32;
- adev->gfx.config.num_gpus = 1;
- adev->gfx.config.multi_gpu_tile_size = 64;
-
- /* fix up row size */
- gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
- switch (adev->gfx.config.mem_row_size_in_kb) {
- case 1:
- default:
- gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
- break;
- case 2:
- gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
- break;
- case 4:
- gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
- break;
- }
- adev->gfx.config.gb_addr_config = gb_addr_config;
+ u32 tmp, sh_mem_cfg;
+ int i;
+
+ WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
- WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
- WREG32(mmHDP_ADDR_CONFIG, gb_addr_config);
- WREG32(mmDMIF_ADDR_CALC, gb_addr_config);
- WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET, gb_addr_config & 0x70);
- WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET, gb_addr_config & 0x70);
- WREG32(mmUVD_UDEC_ADDR_CONFIG, gb_addr_config);
- WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, gb_addr_config);
- WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, gb_addr_config);
+ WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
gfx_v7_0_tiling_mode_table_init(adev);
- gfx_v7_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
- adev->gfx.config.max_sh_per_se,
- adev->gfx.config.max_backends_per_se);
+ gfx_v7_0_setup_rb(adev);
/* set HW defaults for 3D engine */
WREG32(mmCP_MEQ_THRESHOLDS,
- (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
- (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
+ (0x30 << CP_MEQ_THRESHOLDS__MEQ1_START__SHIFT) |
+ (0x60 << CP_MEQ_THRESHOLDS__MEQ2_START__SHIFT));
mutex_lock(&adev->grbm_idx_mutex);
/*
@@ -2255,7 +1733,7 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
- sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
+ sh_mem_cfg = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE,
SH_MEM_ALIGNMENT_MODE_UNALIGNED);
mutex_lock(&adev->srbm_mutex);
@@ -2379,7 +1857,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
- r = amdgpu_ring_lock(ring, 3);
+ r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_gfx_scratch_free(adev, scratch);
@@ -2388,7 +1866,7 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
@@ -2446,6 +1924,25 @@ static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x20); /* poll interval */
}
+/**
+ * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
+ *
+ * @adev: amdgpu_device pointer
+ * @ridx: amdgpu ring index
+ *
+ * Emits an hdp invalidate on the cp.
+ */
+static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) |
+ WR_CONFIRM));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 1);
+}
+
/**
* gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
*
@@ -2516,36 +2013,6 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, upper_32_bits(seq));
}
-/**
- * gfx_v7_0_ring_emit_semaphore - emit a semaphore on the CP ring
- *
- * @ring: amdgpu ring buffer object
- * @semaphore: amdgpu semaphore object
- * @emit_wait: Is this a sempahore wait?
- *
- * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
- * from running ahead of semaphore waits.
- */
-static bool gfx_v7_0_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- uint64_t addr = semaphore->gpu_addr;
- unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 1));
- amdgpu_ring_write(ring, addr & 0xffffffff);
- amdgpu_ring_write(ring, (upper_32_bits(addr) & 0xffff) | sel);
-
- if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
- /* Prevent the PFP from running ahead of the semaphore wait */
- amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
- amdgpu_ring_write(ring, 0x0);
- }
-
- return true;
-}
-
/*
* IB stuff
*/
@@ -2593,8 +2060,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
else
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -2622,8 +2088,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -2661,7 +2126,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(ring, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
goto err1;
@@ -2671,9 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
@@ -2700,7 +2163,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
err2:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -2842,7 +2306,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v7_0_cp_gfx_enable(adev, true);
- r = amdgpu_ring_lock(ring, gfx_v7_0_get_csb_size(adev) + 8);
+ r = amdgpu_ring_alloc(ring, gfx_v7_0_get_csb_size(adev) + 8);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
return r;
@@ -2911,7 +2375,7 @@ static int gfx_v7_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, 0x0000000e); /* VGT_VERTEX_REUSE_BLOCK_CNTL */
amdgpu_ring_write(ring, 0x00000010); /* VGT_OUT_DEALLOC_CNTL */
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
return 0;
}
@@ -2989,21 +2453,14 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev)
static u32 gfx_v7_0_ring_get_rptr_gfx(struct amdgpu_ring *ring)
{
- u32 rptr;
-
- rptr = ring->adev->wb.wb[ring->rptr_offs];
-
- return rptr;
+ return ring->adev->wb.wb[ring->rptr_offs];
}
static u32 gfx_v7_0_ring_get_wptr_gfx(struct amdgpu_ring *ring)
{
struct amdgpu_device *adev = ring->adev;
- u32 wptr;
-
- wptr = RREG32(mmCP_RB0_WPTR);
- return wptr;
+ return RREG32(mmCP_RB0_WPTR);
}
static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
@@ -3016,21 +2473,13 @@ static void gfx_v7_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
static u32 gfx_v7_0_ring_get_rptr_compute(struct amdgpu_ring *ring)
{
- u32 rptr;
-
- rptr = ring->adev->wb.wb[ring->rptr_offs];
-
- return rptr;
+ return ring->adev->wb.wb[ring->rptr_offs];
}
static u32 gfx_v7_0_ring_get_wptr_compute(struct amdgpu_ring *ring)
{
- u32 wptr;
-
/* XXX check if swapping is necessary on BE */
- wptr = ring->adev->wb.wb[ring->wptr_offs];
-
- return wptr;
+ return ring->adev->wb.wb[ring->wptr_offs];
}
static void gfx_v7_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
@@ -3125,21 +2574,6 @@ static int gfx_v7_0_cp_compute_load_microcode(struct amdgpu_device *adev)
return 0;
}
-/**
- * gfx_v7_0_cp_compute_start - start the compute queues
- *
- * @adev: amdgpu_device pointer
- *
- * Enable the compute queues.
- * Returns 0 for success, error for failure.
- */
-static int gfx_v7_0_cp_compute_start(struct amdgpu_device *adev)
-{
- gfx_v7_0_cp_compute_enable(adev, true);
-
- return 0;
-}
-
/**
* gfx_v7_0_cp_compute_fini - stop the compute queues
*
@@ -3330,9 +2764,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev)
u32 *buf;
struct bonaire_mqd *mqd;
- r = gfx_v7_0_cp_compute_start(adev);
- if (r)
- return r;
+ gfx_v7_0_cp_compute_enable(adev, true);
/* fix up chicken bits */
tmp = RREG32(mmCP_CPF_DEBUG);
@@ -3610,6 +3042,26 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ *
+ * @ring: the ring to emmit the commands to
+ *
+ * Sync the command pipeline with the PFP. E.g. wait for everything
+ * to be completed.
+ */
+static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ if (usepfp) {
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3641,14 +3093,6 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, 0xffffffff);
amdgpu_ring_write(ring, 4); /* poll interval */
- if (usepfp) {
- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- }
-
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
WRITE_DATA_DST_SEL(0)));
@@ -4408,28 +3852,19 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
}
}
-static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev,
- u32 se, u32 sh)
+static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
{
- u32 mask = 0, tmp, tmp1;
- int i;
-
- gfx_v7_0_select_se_sh(adev, se, sh);
- tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
- tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
- gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ u32 data, mask;
- tmp &= 0xffff0000;
+ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
- tmp |= tmp1;
- tmp >>= 16;
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
- for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
- mask <<= 1;
- mask |= 1;
- }
+ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
- return (~tmp) & mask;
+ return (~data) & mask;
}
static void gfx_v7_0_init_ao_cu_mask(struct amdgpu_device *adev)
@@ -4767,6 +4202,172 @@ static int gfx_v7_0_late_init(void *handle)
return 0;
}
+static void gfx_v7_0_gpu_early_init(struct amdgpu_device *adev)
+{
+ u32 gb_addr_config;
+ u32 mc_shared_chmap, mc_arb_ramcfg;
+ u32 dimm00_addr_map, dimm01_addr_map, dimm10_addr_map, dimm11_addr_map;
+ u32 tmp;
+
+ switch (adev->asic_type) {
+ case CHIP_BONAIRE:
+ adev->gfx.config.max_shader_engines = 2;
+ adev->gfx.config.max_tile_pipes = 4;
+ adev->gfx.config.max_cu_per_sh = 7;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 2;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_HAWAII:
+ adev->gfx.config.max_shader_engines = 4;
+ adev->gfx.config.max_tile_pipes = 16;
+ adev->gfx.config.max_cu_per_sh = 11;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 4;
+ adev->gfx.config.max_texture_channel_caches = 16;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 32;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = HAWAII_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_KAVERI:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 4;
+ if ((adev->pdev->device == 0x1304) ||
+ (adev->pdev->device == 0x1305) ||
+ (adev->pdev->device == 0x130C) ||
+ (adev->pdev->device == 0x130F) ||
+ (adev->pdev->device == 0x1310) ||
+ (adev->pdev->device == 0x1311) ||
+ (adev->pdev->device == 0x131C)) {
+ adev->gfx.config.max_cu_per_sh = 8;
+ adev->gfx.config.max_backends_per_se = 2;
+ } else if ((adev->pdev->device == 0x1309) ||
+ (adev->pdev->device == 0x130A) ||
+ (adev->pdev->device == 0x130D) ||
+ (adev->pdev->device == 0x1313) ||
+ (adev->pdev->device == 0x131D)) {
+ adev->gfx.config.max_cu_per_sh = 6;
+ adev->gfx.config.max_backends_per_se = 2;
+ } else if ((adev->pdev->device == 0x1306) ||
+ (adev->pdev->device == 0x1307) ||
+ (adev->pdev->device == 0x130B) ||
+ (adev->pdev->device == 0x130E) ||
+ (adev->pdev->device == 0x1315) ||
+ (adev->pdev->device == 0x131B)) {
+ adev->gfx.config.max_cu_per_sh = 4;
+ adev->gfx.config.max_backends_per_se = 1;
+ } else {
+ adev->gfx.config.max_cu_per_sh = 3;
+ adev->gfx.config.max_backends_per_se = 1;
+ }
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_texture_channel_caches = 4;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ case CHIP_KABINI:
+ case CHIP_MULLINS:
+ default:
+ adev->gfx.config.max_shader_engines = 1;
+ adev->gfx.config.max_tile_pipes = 2;
+ adev->gfx.config.max_cu_per_sh = 2;
+ adev->gfx.config.max_sh_per_se = 1;
+ adev->gfx.config.max_backends_per_se = 1;
+ adev->gfx.config.max_texture_channel_caches = 2;
+ adev->gfx.config.max_gprs = 256;
+ adev->gfx.config.max_gs_threads = 16;
+ adev->gfx.config.max_hw_contexts = 8;
+
+ adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
+ adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
+ adev->gfx.config.sc_hiz_tile_fifo_size = 0x30;
+ adev->gfx.config.sc_earlyz_tile_fifo_size = 0x130;
+ gb_addr_config = BONAIRE_GB_ADDR_CONFIG_GOLDEN;
+ break;
+ }
+
+ mc_shared_chmap = RREG32(mmMC_SHARED_CHMAP);
+ adev->gfx.config.mc_arb_ramcfg = RREG32(mmMC_ARB_RAMCFG);
+ mc_arb_ramcfg = adev->gfx.config.mc_arb_ramcfg;
+
+ adev->gfx.config.num_tile_pipes = adev->gfx.config.max_tile_pipes;
+ adev->gfx.config.mem_max_burst_length_bytes = 256;
+ if (adev->flags & AMD_IS_APU) {
+ /* Get memory bank mapping mode. */
+ tmp = RREG32(mmMC_FUS_DRAM0_BANK_ADDR_MAPPING);
+ dimm00_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+ dimm01_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM0_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+ tmp = RREG32(mmMC_FUS_DRAM1_BANK_ADDR_MAPPING);
+ dimm10_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM0ADDRMAP);
+ dimm11_addr_map = REG_GET_FIELD(tmp, MC_FUS_DRAM1_BANK_ADDR_MAPPING, DIMM1ADDRMAP);
+
+ /* Validate settings in case only one DIMM installed. */
+ if ((dimm00_addr_map == 0) || (dimm00_addr_map == 3) || (dimm00_addr_map == 4) || (dimm00_addr_map > 12))
+ dimm00_addr_map = 0;
+ if ((dimm01_addr_map == 0) || (dimm01_addr_map == 3) || (dimm01_addr_map == 4) || (dimm01_addr_map > 12))
+ dimm01_addr_map = 0;
+ if ((dimm10_addr_map == 0) || (dimm10_addr_map == 3) || (dimm10_addr_map == 4) || (dimm10_addr_map > 12))
+ dimm10_addr_map = 0;
+ if ((dimm11_addr_map == 0) || (dimm11_addr_map == 3) || (dimm11_addr_map == 4) || (dimm11_addr_map > 12))
+ dimm11_addr_map = 0;
+
+ /* If DIMM Addr map is 8GB, ROW size should be 2KB. Otherwise 1KB. */
+ /* If ROW size(DIMM1) != ROW size(DMIMM0), ROW size should be larger one. */
+ if ((dimm00_addr_map == 11) || (dimm01_addr_map == 11) || (dimm10_addr_map == 11) || (dimm11_addr_map == 11))
+ adev->gfx.config.mem_row_size_in_kb = 2;
+ else
+ adev->gfx.config.mem_row_size_in_kb = 1;
+ } else {
+ tmp = (mc_arb_ramcfg & MC_ARB_RAMCFG__NOOFCOLS_MASK) >> MC_ARB_RAMCFG__NOOFCOLS__SHIFT;
+ adev->gfx.config.mem_row_size_in_kb = (4 * (1 << (8 + tmp))) / 1024;
+ if (adev->gfx.config.mem_row_size_in_kb > 4)
+ adev->gfx.config.mem_row_size_in_kb = 4;
+ }
+ /* XXX use MC settings? */
+ adev->gfx.config.shader_engine_tile_size = 32;
+ adev->gfx.config.num_gpus = 1;
+ adev->gfx.config.multi_gpu_tile_size = 64;
+
+ /* fix up row size */
+ gb_addr_config &= ~GB_ADDR_CONFIG__ROW_SIZE_MASK;
+ switch (adev->gfx.config.mem_row_size_in_kb) {
+ case 1:
+ default:
+ gb_addr_config |= (0 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+ break;
+ case 2:
+ gb_addr_config |= (1 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+ break;
+ case 4:
+ gb_addr_config |= (2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT);
+ break;
+ }
+ adev->gfx.config.gb_addr_config = gb_addr_config;
+}
+
static int gfx_v7_0_sw_init(void *handle)
{
struct amdgpu_ring *ring;
@@ -4870,6 +4471,10 @@ static int gfx_v7_0_sw_init(void *handle)
if (r)
return r;
+ adev->gfx.ce_ram_size = 0x8000;
+
+ gfx_v7_0_gpu_early_init(adev);
+
return r;
}
@@ -4910,8 +4515,6 @@ static int gfx_v7_0_hw_init(void *handle)
if (r)
return r;
- adev->gfx.ce_ram_size = 0x8000;
-
return r;
}
@@ -5028,16 +4631,6 @@ static void gfx_v7_0_print_status(void *handle)
RREG32(mmHDP_ADDR_CONFIG));
dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
RREG32(mmDMIF_ADDR_CALC));
- dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
- RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
- dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
- RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
- dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
- RREG32(mmUVD_UDEC_ADDR_CONFIG));
- dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
- RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
- dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
- RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
RREG32(mmCP_MEQ_THRESHOLDS));
@@ -5580,13 +5173,15 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.parse_cs = NULL,
.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
- .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
+ .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
@@ -5596,13 +5191,15 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.parse_cs = NULL,
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
- .emit_semaphore = gfx_v7_0_ring_emit_semaphore,
+ .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void gfx_v7_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5672,7 +5269,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
- struct amdgpu_cu_info *cu_info)
+ struct amdgpu_cu_info *cu_info)
{
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
@@ -5680,16 +5277,19 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
+ memset(cu_info, 0, sizeof(*cu_info));
+
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
- bitmap = gfx_v7_0_get_cu_active_bitmap(adev, i, j);
+ gfx_v7_0_select_se_sh(adev, i, j);
+ bitmap = gfx_v7_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ for (k = 0; k < 16; k ++) {
if (bitmap & mask) {
if (counter < 2)
ao_bitmap |= mask;
@@ -5701,9 +5301,11 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
}
}
+ gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
- mutex_unlock(&adev->grbm_idx_mutex);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7086ac17abee1..f0c7b35964809 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -43,9 +43,6 @@
#include "gca/gfx_8_0_sh_mask.h"
#include "gca/gfx_8_0_enum.h"
-#include "uvd/uvd_5_0_d.h"
-#include "uvd/uvd_5_0_sh_mask.h"
-
#include "dce/dce_10_0_d.h"
#include "dce/dce_10_0_sh_mask.h"
@@ -652,7 +649,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
WREG32(scratch, 0xCAFEDEAD);
- r = amdgpu_ring_lock(ring, 3);
+ r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
@@ -662,7 +659,7 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(scratch);
@@ -699,7 +696,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
}
WREG32(scratch, 0xCAFEDEAD);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(ring, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
goto err1;
@@ -709,9 +706,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
@@ -737,7 +732,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
}
err2:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -1171,7 +1167,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
/* allocate an indirect buffer to put the commands in */
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(ring, NULL, total_size, &ib);
+ r = amdgpu_ib_get(adev, NULL, total_size, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
return r;
@@ -1266,9 +1262,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
/* shedule the ib on the ring */
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r) {
DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
goto fail;
@@ -1296,7 +1290,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
fail:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
return r;
}
@@ -2574,11 +2569,6 @@ static void gfx_v8_0_tiling_mode_table_init(struct amdgpu_device *adev)
}
}
-static u32 gfx_v8_0_create_bitmask(u32 bit_width)
-{
- return (u32)((1ULL << bit_width) - 1);
-}
-
void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
{
u32 data = REG_SET_FIELD(0, GRBM_GFX_INDEX, INSTANCE_BROADCAST_WRITES, 1);
@@ -2599,89 +2589,49 @@ void gfx_v8_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num)
WREG32(mmGRBM_GFX_INDEX, data);
}
-static u32 gfx_v8_0_get_rb_disabled(struct amdgpu_device *adev,
- u32 max_rb_num_per_se,
- u32 sh_per_se)
+static u32 gfx_v8_0_create_bitmask(u32 bit_width)
+{
+ return (u32)((1ULL << bit_width) - 1);
+}
+
+static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
{
u32 data, mask;
data = RREG32(mmCC_RB_BACKEND_DISABLE);
- data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-
data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+ data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
data >>= GC_USER_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
- mask = gfx_v8_0_create_bitmask(max_rb_num_per_se / sh_per_se);
+ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se);
- return data & mask;
+ return (~data) & mask;
}
-static void gfx_v8_0_setup_rb(struct amdgpu_device *adev,
- u32 se_num, u32 sh_per_se,
- u32 max_rb_num_per_se)
+static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
{
int i, j;
- u32 data, mask;
- u32 disabled_rbs = 0;
- u32 enabled_rbs = 0;
+ u32 data;
+ u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < se_num; i++) {
- for (j = 0; j < sh_per_se; j++) {
+ for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+ for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v8_0_select_se_sh(adev, i, j);
- data = gfx_v8_0_get_rb_disabled(adev,
- max_rb_num_per_se, sh_per_se);
- disabled_rbs |= data << ((i * sh_per_se + j) *
- RB_BITMAP_WIDTH_PER_SH);
+ data = gfx_v8_0_get_rb_active_bitmap(adev);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
}
}
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
- mask = 1;
- for (i = 0; i < max_rb_num_per_se * se_num; i++) {
- if (!(disabled_rbs & mask))
- enabled_rbs |= mask;
- mask <<= 1;
- }
-
- adev->gfx.config.backend_enable_mask = enabled_rbs;
-
- mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < se_num; i++) {
- gfx_v8_0_select_se_sh(adev, i, 0xffffffff);
- data = RREG32(mmPA_SC_RASTER_CONFIG);
- for (j = 0; j < sh_per_se; j++) {
- switch (enabled_rbs & 3) {
- case 0:
- if (j == 0)
- data |= (RASTER_CONFIG_RB_MAP_3 <<
- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
- else
- data |= (RASTER_CONFIG_RB_MAP_0 <<
- PA_SC_RASTER_CONFIG__PKR_MAP__SHIFT);
- break;
- case 1:
- data |= (RASTER_CONFIG_RB_MAP_0 <<
- (i * sh_per_se + j) * 2);
- break;
- case 2:
- data |= (RASTER_CONFIG_RB_MAP_3 <<
- (i * sh_per_se + j) * 2);
- break;
- case 3:
- default:
- data |= (RASTER_CONFIG_RB_MAP_2 <<
- (i * sh_per_se + j) * 2);
- break;
- }
- enabled_rbs >>= 2;
- }
- WREG32(mmPA_SC_RASTER_CONFIG, data);
- }
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
- mutex_unlock(&adev->grbm_idx_mutex);
+ adev->gfx.config.backend_enable_mask = active_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
}
/**
@@ -2741,19 +2691,10 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
WREG32(mmGB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
WREG32(mmHDP_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
WREG32(mmDMIF_ADDR_CALC, adev->gfx.config.gb_addr_config);
- WREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET,
- adev->gfx.config.gb_addr_config & 0x70);
- WREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET,
- adev->gfx.config.gb_addr_config & 0x70);
- WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
- WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
- WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
gfx_v8_0_tiling_mode_table_init(adev);
- gfx_v8_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
- adev->gfx.config.max_sh_per_se,
- adev->gfx.config.max_backends_per_se);
+ gfx_v8_0_setup_rb(adev);
/* XXX SH_MEM regs */
/* where to put LDS, scratch, GPUVM in FSA64 space */
@@ -3062,7 +3003,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
gfx_v8_0_cp_gfx_enable(adev, true);
- r = amdgpu_ring_lock(ring, gfx_v8_0_get_csb_size(adev) + 4);
+ r = amdgpu_ring_alloc(ring, gfx_v8_0_get_csb_size(adev) + 4);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring (%d).\n", r);
return r;
@@ -3126,7 +3067,7 @@ static int gfx_v8_0_cp_gfx_start(struct amdgpu_device *adev)
amdgpu_ring_write(ring, 0x8000);
amdgpu_ring_write(ring, 0x8000);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
return 0;
}
@@ -3226,13 +3167,6 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
udelay(50);
}
-static int gfx_v8_0_cp_compute_start(struct amdgpu_device *adev)
-{
- gfx_v8_0_cp_compute_enable(adev, true);
-
- return 0;
-}
-
static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev)
{
const struct gfx_firmware_header_v1_0 *mec_hdr;
@@ -3802,9 +3736,7 @@ static int gfx_v8_0_cp_compute_resume(struct amdgpu_device *adev)
WREG32(mmCP_PQ_STATUS, tmp);
}
- r = gfx_v8_0_cp_compute_start(adev);
- if (r)
- return r;
+ gfx_v8_0_cp_compute_enable(adev, true);
for (i = 0; i < adev->gfx.num_compute_rings; i++) {
struct amdgpu_ring *ring = &adev->gfx.compute_ring[i];
@@ -4016,16 +3948,6 @@ static void gfx_v8_0_print_status(void *handle)
RREG32(mmHDP_ADDR_CONFIG));
dev_info(adev->dev, " DMIF_ADDR_CALC=0x%08X\n",
RREG32(mmDMIF_ADDR_CALC));
- dev_info(adev->dev, " SDMA0_TILING_CONFIG=0x%08X\n",
- RREG32(mmSDMA0_TILING_CONFIG + SDMA0_REGISTER_OFFSET));
- dev_info(adev->dev, " SDMA1_TILING_CONFIG=0x%08X\n",
- RREG32(mmSDMA0_TILING_CONFIG + SDMA1_REGISTER_OFFSET));
- dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
- RREG32(mmUVD_UDEC_ADDR_CONFIG));
- dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
- RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
- dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
- RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
dev_info(adev->dev, " CP_MEQ_THRESHOLDS=0x%08X\n",
RREG32(mmCP_MEQ_THRESHOLDS));
@@ -4667,6 +4589,18 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x20); /* poll interval */
}
+static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) |
+ WR_CONFIRM));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 1);
+
+}
+
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
@@ -4699,8 +4633,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
else
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -4729,8 +4662,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -4762,49 +4694,10 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
}
-/**
- * gfx_v8_0_ring_emit_semaphore - emit a semaphore on the CP ring
- *
- * @ring: amdgpu ring buffer object
- * @semaphore: amdgpu semaphore object
- * @emit_wait: Is this a sempahore wait?
- *
- * Emits a semaphore signal/wait packet to the CP ring and prevents the PFP
- * from running ahead of semaphore waits.
- */
-static bool gfx_v8_0_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- uint64_t addr = semaphore->gpu_addr;
- unsigned sel = emit_wait ? PACKET3_SEM_SEL_WAIT : PACKET3_SEM_SEL_SIGNAL;
-
- if (ring->adev->asic_type == CHIP_TOPAZ ||
- ring->adev->asic_type == CHIP_TONGA ||
- ring->adev->asic_type == CHIP_FIJI)
- /* we got a hw semaphore bug in VI TONGA, return false to switch back to sw fence wait */
- return false;
- else {
- amdgpu_ring_write(ring, PACKET3(PACKET3_MEM_SEMAPHORE, 2));
- amdgpu_ring_write(ring, lower_32_bits(addr));
- amdgpu_ring_write(ring, upper_32_bits(addr));
- amdgpu_ring_write(ring, sel);
- }
-
- if (emit_wait && (ring->type == AMDGPU_RING_TYPE_GFX)) {
- /* Prevent the PFP from running ahead of the semaphore wait */
- amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
- amdgpu_ring_write(ring, 0x0);
- }
-
- return true;
-}
-
-static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vm_id, uint64_t pd_addr)
+static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
- uint32_t seq = ring->fence_drv.sync_seq[ring->idx];
+ uint32_t seq = ring->fence_drv.sync_seq;
uint64_t addr = ring->fence_drv.gpu_addr;
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
@@ -4824,6 +4717,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
+}
+
+static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
+{
+ int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -5146,13 +5045,15 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.parse_cs = NULL,
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
- .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
+ .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
@@ -5162,13 +5063,15 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.parse_cs = NULL,
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
- .emit_semaphore = gfx_v8_0_ring_emit_semaphore,
+ .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void gfx_v8_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -5237,32 +5140,23 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
}
}
-static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev,
- u32 se, u32 sh)
+static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
{
- u32 mask = 0, tmp, tmp1;
- int i;
-
- gfx_v8_0_select_se_sh(adev, se, sh);
- tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
- tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
- gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ u32 data, mask;
- tmp &= 0xffff0000;
+ data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
+ data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
- tmp |= tmp1;
- tmp >>= 16;
+ data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
+ data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
- for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
- mask <<= 1;
- mask |= 1;
- }
+ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
- return (~tmp) & mask;
+ return (~data) & mask;
}
int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
- struct amdgpu_cu_info *cu_info)
+ struct amdgpu_cu_info *cu_info)
{
int i, j, k, counter, active_cu_number = 0;
u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
@@ -5270,16 +5164,19 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
+ memset(cu_info, 0, sizeof(*cu_info));
+
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
mask = 1;
ao_bitmap = 0;
counter = 0;
- bitmap = gfx_v8_0_get_cu_active_bitmap(adev, i, j);
+ gfx_v8_0_select_se_sh(adev, i, j);
+ bitmap = gfx_v8_0_get_cu_active_bitmap(adev);
cu_info->bitmap[i][j] = bitmap;
- for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+ for (k = 0; k < 16; k ++) {
if (bitmap & mask) {
if (counter < 2)
ao_bitmap |= mask;
@@ -5291,9 +5188,11 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
ao_cu_mask |= (ao_bitmap << (i * 16 + j * 8));
}
}
+ gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+ mutex_unlock(&adev->grbm_idx_mutex);
cu_info->number = active_cu_number;
cu_info->ao_cu_mask = ao_cu_mask;
- mutex_unlock(&adev->grbm_idx_mutex);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index b8060795b27b9..82ce7d9438843 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -339,7 +339,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
tmp = RREG32(mmHDP_MISC_CNTL);
- tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
WREG32(mmHDP_MISC_CNTL, tmp);
tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -694,7 +694,8 @@ static int gmc_v7_0_vm_init(struct amdgpu_device *adev)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+ amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
@@ -926,10 +927,6 @@ static int gmc_v7_0_sw_init(void *handle)
int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_gem_init(adev);
- if (r)
- return r;
-
r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
if (r)
return r;
@@ -1010,7 +1007,7 @@ static int gmc_v7_0_sw_fini(void *handle)
adev->vm_manager.enabled = false;
}
gmc_v7_0_gart_fini(adev);
- amdgpu_gem_fini(adev);
+ amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 3efd45546241a..29bd7b57dc912 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -252,6 +252,12 @@ static int gmc_v8_0_mc_load_microcode(struct amdgpu_device *adev)
if (!adev->mc.fw)
return -EINVAL;
+ /* Skip MC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ */
+ if (adev->virtualization.supports_sr_iov)
+ return 0;
+
hdr = (const struct mc_firmware_header_v1_0 *)adev->mc.fw->data;
amdgpu_ucode_print_mc_hdr(&hdr->header);
@@ -380,7 +386,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
tmp = RREG32(mmHDP_MISC_CNTL);
- tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
WREG32(mmHDP_MISC_CNTL, tmp);
tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -774,7 +780,8 @@ static int gmc_v8_0_vm_init(struct amdgpu_device *adev)
* amdgpu graphics/compute will use VMIDs 1-7
* amdkfd will use VMIDs 8-15
*/
- adev->vm_manager.nvm = AMDGPU_NUM_OF_VMIDS;
+ adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
+ amdgpu_vm_manager_init(adev);
/* base offset of vram pages */
if (adev->flags & AMD_IS_APU) {
@@ -880,10 +887,6 @@ static int gmc_v8_0_sw_init(void *handle)
int dma_bits;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
- r = amdgpu_gem_init(adev);
- if (r)
- return r;
-
r = amdgpu_irq_add_id(adev, 146, &adev->mc.vm_fault);
if (r)
return r;
@@ -964,7 +967,7 @@ static int gmc_v8_0_sw_fini(void *handle)
adev->vm_manager.enabled = false;
}
gmc_v8_0_gart_fini(adev);
- amdgpu_gem_fini(adev);
+ amdgpu_gem_force_release(adev);
amdgpu_bo_fini(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
index 090486c182497..52ee08193295f 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_smc.c
@@ -279,6 +279,12 @@ static int iceland_smu_upload_firmware_image(struct amdgpu_device *adev)
if (!adev->pm.fw)
return -EINVAL;
+ /* Skip SMC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ */
+ if (adev->virtualization.supports_sr_iov)
+ return 0;
+
hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
amdgpu_ucode_print_smc_hdr(&hdr->header);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 2cf50180cc51b..6e0a86a563f3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -32,8 +32,8 @@
#include "oss/oss_2_4_d.h"
#include "oss/oss_2_4_sh_mask.h"
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_enum.h"
@@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
- u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+ u32 vmid = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 2)
@@ -300,6 +300,13 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 1);
+}
/**
* sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
*
@@ -334,31 +341,6 @@ static void sdma_v2_4_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
-/**
- * sdma_v2_4_ring_emit_semaphore - emit a semaphore on the dma ring
- *
- * @ring: amdgpu_ring structure holding ring information
- * @semaphore: amdgpu semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (VI).
- */
-static bool sdma_v2_4_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- u64 addr = semaphore->gpu_addr;
- u32 sig = emit_wait ? 0 : 1;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
- SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
- amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- return true;
-}
-
/**
* sdma_v2_4_gfx_stop - stop the gfx async dma engines
*
@@ -459,6 +441,9 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev)
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+ WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+ adev->gfx.config.gb_addr_config & 0x70);
+
WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
/* Set ring buffer size in dwords */
@@ -636,7 +621,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_lock(ring, 5);
+ r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_wb_free(adev, index);
@@ -649,7 +634,7 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -699,7 +684,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(ring, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
goto err0;
@@ -716,9 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err1;
@@ -744,7 +727,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
err1:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err0:
amdgpu_wb_free(adev, index);
return r;
@@ -797,7 +781,7 @@ static void sdma_v2_4_vm_copy_pte(struct amdgpu_ib *ib,
* Update PTEs by writing them manually using sDMA (CIK).
*/
static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
- uint64_t pe,
+ const dma_addr_t *pages_addr, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
@@ -816,14 +800,7 @@ static void sdma_v2_4_vm_write_pte(struct amdgpu_ib *ib,
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = ndw;
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- if (flags & AMDGPU_PTE_SYSTEM) {
- value = amdgpu_vm_map_gart(ib->ring->adev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
- } else if (flags & AMDGPU_PTE_VALID) {
- value = addr;
- } else {
- value = 0;
- }
+ value = amdgpu_vm_map_gart(pages_addr, addr);
addr += incr;
value |= flags;
ib->ptr[ib->length_dw++] = value;
@@ -881,14 +858,14 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib,
}
/**
- * sdma_v2_4_vm_pad_ib - pad the IB to the required number of dw
+ * sdma_v2_4_ring_pad_ib - pad the IB to the required number of dw
*
* @ib: indirect buffer to fill with padding
*
*/
-static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
+static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
+ struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
u32 pad_count;
int i;
@@ -903,6 +880,31 @@ static void sdma_v2_4_vm_pad_ib(struct amdgpu_ib *ib)
SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
}
+/**
+ * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
/**
* sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
*
@@ -1111,6 +1113,8 @@ static void sdma_v2_4_print_status(void *handle)
i, RREG32(mmSDMA0_GFX_RB_BASE + sdma_offsets[i]));
dev_info(adev->dev, " SDMA%d_GFX_RB_BASE_HI=0x%08X\n",
i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
+ dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
+ i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
vi_srbm_select(adev, 0, 0, 0, j);
@@ -1302,12 +1306,14 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.parse_cs = NULL,
.emit_ib = sdma_v2_4_ring_emit_ib,
.emit_fence = sdma_v2_4_ring_emit_fence,
- .emit_semaphore = sdma_v2_4_ring_emit_semaphore,
+ .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
.test_ring = sdma_v2_4_ring_test_ring,
.test_ib = sdma_v2_4_ring_test_ib,
.insert_nop = sdma_v2_4_ring_insert_nop,
+ .pad_ib = sdma_v2_4_ring_pad_ib,
};
static void sdma_v2_4_set_ring_funcs(struct amdgpu_device *adev)
@@ -1405,14 +1411,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = {
.copy_pte = sdma_v2_4_vm_copy_pte,
.write_pte = sdma_v2_4_vm_write_pte,
.set_pte_pde = sdma_v2_4_vm_set_pte_pde,
- .pad_ib = sdma_v2_4_vm_pad_ib,
};
static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ unsigned i;
+
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs;
- adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
- adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->vm_manager.vm_pte_rings[i] =
+ &adev->sdma.instance[i].ring;
+
+ adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index ad54c46751b00..8c8ca98dd1298 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
- u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+ u32 vmid = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 2)
@@ -410,6 +410,14 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 1);
+}
+
/**
* sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
*
@@ -444,32 +452,6 @@ static void sdma_v3_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
amdgpu_ring_write(ring, SDMA_PKT_TRAP_INT_CONTEXT_INT_CONTEXT(0));
}
-
-/**
- * sdma_v3_0_ring_emit_semaphore - emit a semaphore on the dma ring
- *
- * @ring: amdgpu_ring structure holding ring information
- * @semaphore: amdgpu semaphore object
- * @emit_wait: wait or signal semaphore
- *
- * Add a DMA semaphore packet to the ring wait on or signal
- * other rings (VI).
- */
-static bool sdma_v3_0_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- u64 addr = semaphore->gpu_addr;
- u32 sig = emit_wait ? 0 : 1;
-
- amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SEM) |
- SDMA_PKT_SEMAPHORE_HEADER_SIGNAL(sig));
- amdgpu_ring_write(ring, lower_32_bits(addr) & 0xfffffff8);
- amdgpu_ring_write(ring, upper_32_bits(addr));
-
- return true;
-}
-
/**
* sdma_v3_0_gfx_stop - stop the gfx async dma engines
*
@@ -596,6 +578,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev)
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+ WREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i],
+ adev->gfx.config.gb_addr_config & 0x70);
+
WREG32(mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL + sdma_offsets[i], 0);
/* Set ring buffer size in dwords */
@@ -788,7 +773,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
- r = amdgpu_ring_lock(ring, 5);
+ r = amdgpu_ring_alloc(ring, 5);
if (r) {
DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r);
amdgpu_wb_free(adev, index);
@@ -801,7 +786,7 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, upper_32_bits(gpu_addr));
amdgpu_ring_write(ring, SDMA_PKT_WRITE_UNTILED_DW_3_COUNT(1));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = le32_to_cpu(adev->wb.wb[index]);
@@ -851,7 +836,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
tmp = 0xCAFEDEAD;
adev->wb.wb[index] = cpu_to_le32(tmp);
memset(&ib, 0, sizeof(ib));
- r = amdgpu_ib_get(ring, NULL, 256, &ib);
+ r = amdgpu_ib_get(adev, NULL, 256, &ib);
if (r) {
DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
goto err0;
@@ -868,9 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
- r = amdgpu_sched_ib_submit_kernel_helper(adev, ring, &ib, 1, NULL,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err1;
@@ -895,7 +878,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
}
err1:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err0:
amdgpu_wb_free(adev, index);
return r;
@@ -948,7 +932,7 @@ static void sdma_v3_0_vm_copy_pte(struct amdgpu_ib *ib,
* Update PTEs by writing them manually using sDMA (CIK).
*/
static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
- uint64_t pe,
+ const dma_addr_t *pages_addr, uint64_t pe,
uint64_t addr, unsigned count,
uint32_t incr, uint32_t flags)
{
@@ -967,14 +951,7 @@ static void sdma_v3_0_vm_write_pte(struct amdgpu_ib *ib,
ib->ptr[ib->length_dw++] = upper_32_bits(pe);
ib->ptr[ib->length_dw++] = ndw;
for (; ndw > 0; ndw -= 2, --count, pe += 8) {
- if (flags & AMDGPU_PTE_SYSTEM) {
- value = amdgpu_vm_map_gart(ib->ring->adev, addr);
- value &= 0xFFFFFFFFFFFFF000ULL;
- } else if (flags & AMDGPU_PTE_VALID) {
- value = addr;
- } else {
- value = 0;
- }
+ value = amdgpu_vm_map_gart(pages_addr, addr);
addr += incr;
value |= flags;
ib->ptr[ib->length_dw++] = value;
@@ -1032,14 +1009,14 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib,
}
/**
- * sdma_v3_0_vm_pad_ib - pad the IB to the required number of dw
+ * sdma_v3_0_ring_pad_ib - pad the IB to the required number of dw
*
* @ib: indirect buffer to fill with padding
*
*/
-static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
+static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
{
- struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ib->ring);
+ struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring);
u32 pad_count;
int i;
@@ -1054,6 +1031,31 @@ static void sdma_v3_0_vm_pad_ib(struct amdgpu_ib *ib)
SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
}
+/**
+ * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
/**
* sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
*
@@ -1275,6 +1277,8 @@ static void sdma_v3_0_print_status(void *handle)
i, RREG32(mmSDMA0_GFX_RB_BASE_HI + sdma_offsets[i]));
dev_info(adev->dev, " SDMA%d_GFX_DOORBELL=0x%08X\n",
i, RREG32(mmSDMA0_GFX_DOORBELL + sdma_offsets[i]));
+ dev_info(adev->dev, " SDMA%d_TILING_CONFIG=0x%08X\n",
+ i, RREG32(mmSDMA0_TILING_CONFIG + sdma_offsets[i]));
mutex_lock(&adev->srbm_mutex);
for (j = 0; j < 16; j++) {
vi_srbm_select(adev, 0, 0, 0, j);
@@ -1570,12 +1574,14 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.parse_cs = NULL,
.emit_ib = sdma_v3_0_ring_emit_ib,
.emit_fence = sdma_v3_0_ring_emit_fence,
- .emit_semaphore = sdma_v3_0_ring_emit_semaphore,
+ .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,
.test_ring = sdma_v3_0_ring_test_ring,
.test_ib = sdma_v3_0_ring_test_ib,
.insert_nop = sdma_v3_0_ring_insert_nop,
+ .pad_ib = sdma_v3_0_ring_pad_ib,
};
static void sdma_v3_0_set_ring_funcs(struct amdgpu_device *adev)
@@ -1673,14 +1679,18 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = {
.copy_pte = sdma_v3_0_vm_copy_pte,
.write_pte = sdma_v3_0_vm_write_pte,
.set_pte_pde = sdma_v3_0_vm_set_pte_pde,
- .pad_ib = sdma_v3_0_vm_pad_ib,
};
static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev)
{
+ unsigned i;
+
if (adev->vm_manager.vm_pte_funcs == NULL) {
adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs;
- adev->vm_manager.vm_pte_funcs_ring = &adev->sdma.instance[0].ring;
- adev->vm_manager.vm_pte_funcs_ring->is_pte_ring = true;
+ for (i = 0; i < adev->sdma.num_instances; i++)
+ adev->vm_manager.vm_pte_rings[i] =
+ &adev->sdma.instance[i].ring;
+
+ adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
index 361c49a823230..083893dd68c06 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_smc.c
@@ -272,6 +272,12 @@ static int tonga_smu_upload_firmware_image(struct amdgpu_device *adev)
if (!adev->pm.fw)
return -EINVAL;
+ /* Skip SMC ucode loading on SR-IOV capable boards.
+ * vbios does this for us in asic_init in that case.
+ */
+ if (adev->virtualization.supports_sr_iov)
+ return 0;
+
hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
amdgpu_ucode_print_smc_hdr(&hdr->header);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index fbd3767671bb9..c606ccb38d8b9 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -164,7 +164,7 @@ static int uvd_v4_2_hw_init(void *handle)
goto done;
}
- r = amdgpu_ring_lock(ring, 10);
+ r = amdgpu_ring_alloc(ring, 10);
if (r) {
DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
goto done;
@@ -189,7 +189,7 @@ static int uvd_v4_2_hw_init(void *handle)
amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
amdgpu_ring_write(ring, 3);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
done:
/* lower clocks again */
@@ -438,33 +438,6 @@ static void uvd_v4_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, 2);
}
-/**
- * uvd_v4_2_ring_emit_semaphore - emit semaphore command
- *
- * @ring: amdgpu_ring pointer
- * @semaphore: semaphore to emit commands for
- * @emit_wait: true if we should emit a wait command
- *
- * Emit a semaphore command (either wait or signal) to the UVD ring.
- */
-static bool uvd_v4_2_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- uint64_t addr = semaphore->gpu_addr;
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
- amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
- amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
- amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-
- return true;
-}
-
/**
* uvd_v4_2_ring_test_ring - register write test
*
@@ -480,7 +453,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
int r;
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
- r = amdgpu_ring_lock(ring, 3);
+ r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
@@ -488,7 +461,7 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
@@ -549,7 +522,7 @@ static int uvd_v4_2_ring_test_ib(struct amdgpu_ring *ring)
goto error;
}
- r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence);
+ r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
goto error;
@@ -603,6 +576,10 @@ static void uvd_v4_2_mc_resume(struct amdgpu_device *adev)
addr = (adev->uvd.gpu_addr >> 32) & 0xFF;
WREG32(mmUVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+
uvd_v4_2_init_cg(adev);
}
@@ -804,6 +781,13 @@ static void uvd_v4_2_print_status(void *handle)
RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n",
RREG32(mmUVD_CONTEXT_ID));
+ dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_ADDR_CONFIG));
+ dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
+ dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
+
}
static int uvd_v4_2_set_interrupt_state(struct amdgpu_device *adev,
@@ -888,10 +872,10 @@ static const struct amdgpu_ring_funcs uvd_v4_2_ring_funcs = {
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_ib = uvd_v4_2_ring_emit_ib,
.emit_fence = uvd_v4_2_ring_emit_fence,
- .emit_semaphore = uvd_v4_2_ring_emit_semaphore,
.test_ring = uvd_v4_2_ring_test_ring,
.test_ib = uvd_v4_2_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void uvd_v4_2_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 57f1c5bf3bf19..e3c852d9d79a4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -160,7 +160,7 @@ static int uvd_v5_0_hw_init(void *handle)
goto done;
}
- r = amdgpu_ring_lock(ring, 10);
+ r = amdgpu_ring_alloc(ring, 10);
if (r) {
DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
goto done;
@@ -185,7 +185,7 @@ static int uvd_v5_0_hw_init(void *handle)
amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
amdgpu_ring_write(ring, 3);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
done:
/* lower clocks again */
@@ -279,6 +279,10 @@ static void uvd_v5_0_mc_resume(struct amdgpu_device *adev)
size = AMDGPU_UVD_HEAP_SIZE;
WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
/**
@@ -482,33 +486,6 @@ static void uvd_v5_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, 2);
}
-/**
- * uvd_v5_0_ring_emit_semaphore - emit semaphore command
- *
- * @ring: amdgpu_ring pointer
- * @semaphore: semaphore to emit commands for
- * @emit_wait: true if we should emit a wait command
- *
- * Emit a semaphore command (either wait or signal) to the UVD ring.
- */
-static bool uvd_v5_0_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- uint64_t addr = semaphore->gpu_addr;
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
- amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
- amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
- amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-
- return true;
-}
-
/**
* uvd_v5_0_ring_test_ring - register write test
*
@@ -524,7 +501,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
- r = amdgpu_ring_lock(ring, 3);
+ r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
@@ -532,7 +509,7 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
@@ -595,7 +572,7 @@ static int uvd_v5_0_ring_test_ib(struct amdgpu_ring *ring)
goto error;
}
- r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence);
+ r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
goto error;
@@ -751,6 +728,12 @@ static void uvd_v5_0_print_status(void *handle)
RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n",
RREG32(mmUVD_CONTEXT_ID));
+ dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_ADDR_CONFIG));
+ dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
+ dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
}
static int uvd_v5_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -829,10 +812,10 @@ static const struct amdgpu_ring_funcs uvd_v5_0_ring_funcs = {
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_ib = uvd_v5_0_ring_emit_ib,
.emit_fence = uvd_v5_0_ring_emit_fence,
- .emit_semaphore = uvd_v5_0_ring_emit_semaphore,
.test_ring = uvd_v5_0_ring_test_ring,
.test_ib = uvd_v5_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void uvd_v5_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 0b365b7651ffb..3375e614ac671 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -157,7 +157,7 @@ static int uvd_v6_0_hw_init(void *handle)
goto done;
}
- r = amdgpu_ring_lock(ring, 10);
+ r = amdgpu_ring_alloc(ring, 10);
if (r) {
DRM_ERROR("amdgpu: ring failed to lock UVD ring (%d).\n", r);
goto done;
@@ -182,7 +182,7 @@ static int uvd_v6_0_hw_init(void *handle)
amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CNTL, 0));
amdgpu_ring_write(ring, 3);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
done:
if (!r)
@@ -277,6 +277,10 @@ static void uvd_v6_0_mc_resume(struct amdgpu_device *adev)
size = AMDGPU_UVD_HEAP_SIZE;
WREG32(mmUVD_VCPU_CACHE_OFFSET2, offset >> 3);
WREG32(mmUVD_VCPU_CACHE_SIZE2, size);
+
+ WREG32(mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DB_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
+ WREG32(mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config);
}
static void cz_set_uvd_clock_gating_branches(struct amdgpu_device *adev,
@@ -721,33 +725,6 @@ static void uvd_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq
amdgpu_ring_write(ring, 2);
}
-/**
- * uvd_v6_0_ring_emit_semaphore - emit semaphore command
- *
- * @ring: amdgpu_ring pointer
- * @semaphore: semaphore to emit commands for
- * @emit_wait: true if we should emit a wait command
- *
- * Emit a semaphore command (either wait or signal) to the UVD ring.
- */
-static bool uvd_v6_0_ring_emit_semaphore(struct amdgpu_ring *ring,
- struct amdgpu_semaphore *semaphore,
- bool emit_wait)
-{
- uint64_t addr = semaphore->gpu_addr;
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_LOW, 0));
- amdgpu_ring_write(ring, (addr >> 3) & 0x000FFFFF);
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_ADDR_HIGH, 0));
- amdgpu_ring_write(ring, (addr >> 23) & 0x000FFFFF);
-
- amdgpu_ring_write(ring, PACKET0(mmUVD_SEMA_CMD, 0));
- amdgpu_ring_write(ring, 0x80 | (emit_wait ? 1 : 0));
-
- return true;
-}
-
/**
* uvd_v6_0_ring_test_ring - register write test
*
@@ -763,7 +740,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
int r;
WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD);
- r = amdgpu_ring_lock(ring, 3);
+ r = amdgpu_ring_alloc(ring, 3);
if (r) {
DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
ring->idx, r);
@@ -771,7 +748,7 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring)
}
amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0));
amdgpu_ring_write(ring, 0xDEADBEEF);
- amdgpu_ring_unlock_commit(ring);
+ amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
tmp = RREG32(mmUVD_CONTEXT_ID);
if (tmp == 0xDEADBEEF)
@@ -827,7 +804,7 @@ static int uvd_v6_0_ring_test_ib(struct amdgpu_ring *ring)
goto error;
}
- r = amdgpu_uvd_get_destroy_msg(ring, 1, &fence);
+ r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
if (r) {
DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
goto error;
@@ -974,6 +951,12 @@ static void uvd_v6_0_print_status(void *handle)
RREG32(mmUVD_SEMA_SIGNAL_INCOMPLETE_TIMEOUT_CNTL));
dev_info(adev->dev, " UVD_CONTEXT_ID=0x%08X\n",
RREG32(mmUVD_CONTEXT_ID));
+ dev_info(adev->dev, " UVD_UDEC_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_ADDR_CONFIG));
+ dev_info(adev->dev, " UVD_UDEC_DB_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_DB_ADDR_CONFIG));
+ dev_info(adev->dev, " UVD_UDEC_DBW_ADDR_CONFIG=0x%08X\n",
+ RREG32(mmUVD_UDEC_DBW_ADDR_CONFIG));
}
static int uvd_v6_0_set_interrupt_state(struct amdgpu_device *adev,
@@ -1065,10 +1048,10 @@ static const struct amdgpu_ring_funcs uvd_v6_0_ring_funcs = {
.parse_cs = amdgpu_uvd_ring_parse_cs,
.emit_ib = uvd_v6_0_ring_emit_ib,
.emit_fence = uvd_v6_0_ring_emit_fence,
- .emit_semaphore = uvd_v6_0_ring_emit_semaphore,
.test_ring = uvd_v6_0_ring_test_ring,
.test_ib = uvd_v6_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void uvd_v6_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index a822edacfa95c..c7e885bcfd417 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -642,10 +642,10 @@ static const struct amdgpu_ring_funcs vce_v2_0_ring_funcs = {
.parse_cs = amdgpu_vce_ring_parse_cs,
.emit_ib = amdgpu_vce_ring_emit_ib,
.emit_fence = amdgpu_vce_ring_emit_fence,
- .emit_semaphore = amdgpu_vce_ring_emit_semaphore,
.test_ring = amdgpu_vce_ring_test_ring,
.test_ib = amdgpu_vce_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index d662fa9f9091a..ce468ee5da2a7 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -762,10 +762,10 @@ static const struct amdgpu_ring_funcs vce_v3_0_ring_funcs = {
.parse_cs = amdgpu_vce_ring_parse_cs,
.emit_ib = amdgpu_vce_ring_emit_ib,
.emit_fence = amdgpu_vce_ring_emit_fence,
- .emit_semaphore = amdgpu_vce_ring_emit_semaphore,
.test_ring = amdgpu_vce_ring_test_ring,
.test_ib = amdgpu_vce_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
+ .pad_ib = amdgpu_ring_generic_pad_ib,
};
static void vce_v3_0_set_ring_funcs(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 0d14d108a6c4a..1c120efa292ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -74,6 +74,9 @@
#include "uvd_v6_0.h"
#include "vce_v3_0.h"
#include "amdgpu_powerplay.h"
+#if defined(CONFIG_DRM_AMD_ACP)
+#include "amdgpu_acp.h"
+#endif
/*
* Indirect registers accessor
@@ -571,374 +574,12 @@ static int vi_read_register(struct amdgpu_device *adev, u32 se_num,
return -EINVAL;
}
-static void vi_print_gpu_status_regs(struct amdgpu_device *adev)
-{
- dev_info(adev->dev, " GRBM_STATUS=0x%08X\n",
- RREG32(mmGRBM_STATUS));
- dev_info(adev->dev, " GRBM_STATUS2=0x%08X\n",
- RREG32(mmGRBM_STATUS2));
- dev_info(adev->dev, " GRBM_STATUS_SE0=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE0));
- dev_info(adev->dev, " GRBM_STATUS_SE1=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE1));
- dev_info(adev->dev, " GRBM_STATUS_SE2=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE2));
- dev_info(adev->dev, " GRBM_STATUS_SE3=0x%08X\n",
- RREG32(mmGRBM_STATUS_SE3));
- dev_info(adev->dev, " SRBM_STATUS=0x%08X\n",
- RREG32(mmSRBM_STATUS));
- dev_info(adev->dev, " SRBM_STATUS2=0x%08X\n",
- RREG32(mmSRBM_STATUS2));
- dev_info(adev->dev, " SDMA0_STATUS_REG = 0x%08X\n",
- RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET));
- if (adev->sdma.num_instances > 1) {
- dev_info(adev->dev, " SDMA1_STATUS_REG = 0x%08X\n",
- RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET));
- }
- dev_info(adev->dev, " CP_STAT = 0x%08x\n", RREG32(mmCP_STAT));
- dev_info(adev->dev, " CP_STALLED_STAT1 = 0x%08x\n",
- RREG32(mmCP_STALLED_STAT1));
- dev_info(adev->dev, " CP_STALLED_STAT2 = 0x%08x\n",
- RREG32(mmCP_STALLED_STAT2));
- dev_info(adev->dev, " CP_STALLED_STAT3 = 0x%08x\n",
- RREG32(mmCP_STALLED_STAT3));
- dev_info(adev->dev, " CP_CPF_BUSY_STAT = 0x%08x\n",
- RREG32(mmCP_CPF_BUSY_STAT));
- dev_info(adev->dev, " CP_CPF_STALLED_STAT1 = 0x%08x\n",
- RREG32(mmCP_CPF_STALLED_STAT1));
- dev_info(adev->dev, " CP_CPF_STATUS = 0x%08x\n", RREG32(mmCP_CPF_STATUS));
- dev_info(adev->dev, " CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(mmCP_CPC_BUSY_STAT));
- dev_info(adev->dev, " CP_CPC_STALLED_STAT1 = 0x%08x\n",
- RREG32(mmCP_CPC_STALLED_STAT1));
- dev_info(adev->dev, " CP_CPC_STATUS = 0x%08x\n", RREG32(mmCP_CPC_STATUS));
-}
-
-/**
- * vi_gpu_check_soft_reset - check which blocks are busy
- *
- * @adev: amdgpu_device pointer
- *
- * Check which blocks are busy and return the relevant reset
- * mask to be used by vi_gpu_soft_reset().
- * Returns a mask of the blocks to be reset.
- */
-u32 vi_gpu_check_soft_reset(struct amdgpu_device *adev)
-{
- u32 reset_mask = 0;
- u32 tmp;
-
- /* GRBM_STATUS */
- tmp = RREG32(mmGRBM_STATUS);
- if (tmp & (GRBM_STATUS__PA_BUSY_MASK | GRBM_STATUS__SC_BUSY_MASK |
- GRBM_STATUS__BCI_BUSY_MASK | GRBM_STATUS__SX_BUSY_MASK |
- GRBM_STATUS__TA_BUSY_MASK | GRBM_STATUS__VGT_BUSY_MASK |
- GRBM_STATUS__DB_BUSY_MASK | GRBM_STATUS__CB_BUSY_MASK |
- GRBM_STATUS__GDS_BUSY_MASK | GRBM_STATUS__SPI_BUSY_MASK |
- GRBM_STATUS__IA_BUSY_MASK | GRBM_STATUS__IA_BUSY_NO_DMA_MASK))
- reset_mask |= AMDGPU_RESET_GFX;
-
- if (tmp & (GRBM_STATUS__CP_BUSY_MASK | GRBM_STATUS__CP_COHERENCY_BUSY_MASK))
- reset_mask |= AMDGPU_RESET_CP;
-
- /* GRBM_STATUS2 */
- tmp = RREG32(mmGRBM_STATUS2);
- if (tmp & GRBM_STATUS2__RLC_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_RLC;
-
- if (tmp & (GRBM_STATUS2__CPF_BUSY_MASK |
- GRBM_STATUS2__CPC_BUSY_MASK |
- GRBM_STATUS2__CPG_BUSY_MASK))
- reset_mask |= AMDGPU_RESET_CP;
-
- /* SRBM_STATUS2 */
- tmp = RREG32(mmSRBM_STATUS2);
- if (tmp & SRBM_STATUS2__SDMA_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_DMA;
-
- if (tmp & SRBM_STATUS2__SDMA1_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_DMA1;
-
- /* SRBM_STATUS */
- tmp = RREG32(mmSRBM_STATUS);
-
- if (tmp & SRBM_STATUS__IH_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_IH;
-
- if (tmp & SRBM_STATUS__SEM_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_SEM;
-
- if (tmp & SRBM_STATUS__GRBM_RQ_PENDING_MASK)
- reset_mask |= AMDGPU_RESET_GRBM;
-
- if (adev->asic_type != CHIP_TOPAZ) {
- if (tmp & (SRBM_STATUS__UVD_RQ_PENDING_MASK |
- SRBM_STATUS__UVD_BUSY_MASK))
- reset_mask |= AMDGPU_RESET_UVD;
- }
-
- if (tmp & SRBM_STATUS__VMC_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_VMC;
-
- if (tmp & (SRBM_STATUS__MCB_BUSY_MASK | SRBM_STATUS__MCB_NON_DISPLAY_BUSY_MASK |
- SRBM_STATUS__MCC_BUSY_MASK | SRBM_STATUS__MCD_BUSY_MASK))
- reset_mask |= AMDGPU_RESET_MC;
-
- /* SDMA0_STATUS_REG */
- tmp = RREG32(mmSDMA0_STATUS_REG + SDMA0_REGISTER_OFFSET);
- if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
- reset_mask |= AMDGPU_RESET_DMA;
-
- /* SDMA1_STATUS_REG */
- if (adev->sdma.num_instances > 1) {
- tmp = RREG32(mmSDMA0_STATUS_REG + SDMA1_REGISTER_OFFSET);
- if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK))
- reset_mask |= AMDGPU_RESET_DMA1;
- }
-#if 0
- /* VCE_STATUS */
- if (adev->asic_type != CHIP_TOPAZ) {
- tmp = RREG32(mmVCE_STATUS);
- if (tmp & VCE_STATUS__VCPU_REPORT_RB0_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_VCE;
- if (tmp & VCE_STATUS__VCPU_REPORT_RB1_BUSY_MASK)
- reset_mask |= AMDGPU_RESET_VCE1;
-
- }
-
- if (adev->asic_type != CHIP_TOPAZ) {
- if (amdgpu_display_is_display_hung(adev))
- reset_mask |= AMDGPU_RESET_DISPLAY;
- }
-#endif
-
- /* Skip MC reset as it's mostly likely not hung, just busy */
- if (reset_mask & AMDGPU_RESET_MC) {
- DRM_DEBUG("MC busy: 0x%08X, clearing.\n", reset_mask);
- reset_mask &= ~AMDGPU_RESET_MC;
- }
-
- return reset_mask;
-}
-
-/**
- * vi_gpu_soft_reset - soft reset GPU
- *
- * @adev: amdgpu_device pointer
- * @reset_mask: mask of which blocks to reset
- *
- * Soft reset the blocks specified in @reset_mask.
- */
-static void vi_gpu_soft_reset(struct amdgpu_device *adev, u32 reset_mask)
-{
- struct amdgpu_mode_mc_save save;
- u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
- u32 tmp;
-
- if (reset_mask == 0)
- return;
-
- dev_info(adev->dev, "GPU softreset: 0x%08X\n", reset_mask);
-
- vi_print_gpu_status_regs(adev);
- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_ADDR 0x%08X\n",
- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_ADDR));
- dev_info(adev->dev, " VM_CONTEXT1_PROTECTION_FAULT_STATUS 0x%08X\n",
- RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS));
-
- /* disable CG/PG */
-
- /* stop the rlc */
- //XXX
- //gfx_v8_0_rlc_stop(adev);
-
- /* Disable GFX parsing/prefetching */
- tmp = RREG32(mmCP_ME_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
- tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
- tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
- WREG32(mmCP_ME_CNTL, tmp);
-
- /* Disable MEC parsing/prefetching */
- tmp = RREG32(mmCP_MEC_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1);
- tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1);
- WREG32(mmCP_MEC_CNTL, tmp);
-
- if (reset_mask & AMDGPU_RESET_DMA) {
- /* sdma0 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
- tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
- WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
- }
- if (reset_mask & AMDGPU_RESET_DMA1) {
- /* sdma1 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
- tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
- WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
- }
-
- gmc_v8_0_mc_stop(adev, &save);
- if (amdgpu_asic_wait_for_mc_idle(adev)) {
- dev_warn(adev->dev, "Wait for MC idle timedout !\n");
- }
-
- if (reset_mask & (AMDGPU_RESET_GFX | AMDGPU_RESET_COMPUTE | AMDGPU_RESET_CP)) {
- grbm_soft_reset =
- REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
- grbm_soft_reset =
- REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX, 1);
- }
-
- if (reset_mask & AMDGPU_RESET_CP) {
- grbm_soft_reset =
- REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP, 1);
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
- }
-
- if (reset_mask & AMDGPU_RESET_DMA)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA, 1);
-
- if (reset_mask & AMDGPU_RESET_DMA1)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SDMA1, 1);
-
- if (reset_mask & AMDGPU_RESET_DISPLAY)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_DC, 1);
-
- if (reset_mask & AMDGPU_RESET_RLC)
- grbm_soft_reset =
- REG_SET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
-
- if (reset_mask & AMDGPU_RESET_SEM)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_SEM, 1);
-
- if (reset_mask & AMDGPU_RESET_IH)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_IH, 1);
-
- if (reset_mask & AMDGPU_RESET_GRBM)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_GRBM, 1);
-
- if (reset_mask & AMDGPU_RESET_VMC)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VMC, 1);
-
- if (reset_mask & AMDGPU_RESET_UVD)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_UVD, 1);
-
- if (reset_mask & AMDGPU_RESET_VCE)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE0, 1);
-
- if (reset_mask & AMDGPU_RESET_VCE)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_VCE1, 1);
-
- if (!(adev->flags & AMD_IS_APU)) {
- if (reset_mask & AMDGPU_RESET_MC)
- srbm_soft_reset =
- REG_SET_FIELD(srbm_soft_reset, SRBM_SOFT_RESET, SOFT_RESET_MC, 1);
- }
-
- if (grbm_soft_reset) {
- tmp = RREG32(mmGRBM_SOFT_RESET);
- tmp |= grbm_soft_reset;
- dev_info(adev->dev, "GRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmGRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~grbm_soft_reset;
- WREG32(mmGRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmGRBM_SOFT_RESET);
- }
-
- if (srbm_soft_reset) {
- tmp = RREG32(mmSRBM_SOFT_RESET);
- tmp |= srbm_soft_reset;
- dev_info(adev->dev, "SRBM_SOFT_RESET=0x%08X\n", tmp);
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
-
- udelay(50);
-
- tmp &= ~srbm_soft_reset;
- WREG32(mmSRBM_SOFT_RESET, tmp);
- tmp = RREG32(mmSRBM_SOFT_RESET);
- }
-
- /* Wait a little for things to settle down */
- udelay(50);
-
- gmc_v8_0_mc_resume(adev, &save);
- udelay(50);
-
- vi_print_gpu_status_regs(adev);
-}
-
static void vi_gpu_pci_config_reset(struct amdgpu_device *adev)
{
- struct amdgpu_mode_mc_save save;
- u32 tmp, i;
+ u32 i;
dev_info(adev->dev, "GPU pci config reset\n");
- /* disable dpm? */
-
- /* disable cg/pg */
-
- /* Disable GFX parsing/prefetching */
- tmp = RREG32(mmCP_ME_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
- tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
- tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
- WREG32(mmCP_ME_CNTL, tmp);
-
- /* Disable MEC parsing/prefetching */
- tmp = RREG32(mmCP_MEC_CNTL);
- tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME1_HALT, 1);
- tmp = REG_SET_FIELD(tmp, CP_MEC_CNTL, MEC_ME2_HALT, 1);
- WREG32(mmCP_MEC_CNTL, tmp);
-
- /* Disable GFX parsing/prefetching */
- WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK |
- CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK);
-
- /* Disable MEC parsing/prefetching */
- WREG32(mmCP_MEC_CNTL,
- CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK);
-
- /* sdma0 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET);
- tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
- WREG32(mmSDMA0_F32_CNTL + SDMA0_REGISTER_OFFSET, tmp);
-
- /* sdma1 */
- tmp = RREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET);
- tmp = REG_SET_FIELD(tmp, SDMA0_F32_CNTL, HALT, 1);
- WREG32(mmSDMA0_F32_CNTL + SDMA1_REGISTER_OFFSET, tmp);
-
- /* XXX other engines? */
-
- /* halt the rlc, disable cp internal ints */
- //XXX
- //gfx_v8_0_rlc_stop(adev);
-
- udelay(50);
-
- /* disable mem access */
- gmc_v8_0_mc_stop(adev, &save);
- if (amdgpu_asic_wait_for_mc_idle(adev)) {
- dev_warn(adev->dev, "Wait for MC idle timed out !\n");
- }
-
/* disable BM */
pci_clear_master(adev->pdev);
/* reset */
@@ -978,26 +619,11 @@ static void vi_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hun
*/
static int vi_asic_reset(struct amdgpu_device *adev)
{
- u32 reset_mask;
-
- reset_mask = vi_gpu_check_soft_reset(adev);
-
- if (reset_mask)
- vi_set_bios_scratch_engine_hung(adev, true);
-
- /* try soft reset */
- vi_gpu_soft_reset(adev, reset_mask);
-
- reset_mask = vi_gpu_check_soft_reset(adev);
+ vi_set_bios_scratch_engine_hung(adev, true);
- /* try pci config reset */
- if (reset_mask && amdgpu_hard_reset)
- vi_gpu_pci_config_reset(adev);
+ vi_gpu_pci_config_reset(adev);
- reset_mask = vi_gpu_check_soft_reset(adev);
-
- if (!reset_mask)
- vi_set_bios_scratch_engine_hung(adev, false);
+ vi_set_bios_scratch_engine_hung(adev, false);
return 0;
}
@@ -1347,6 +973,15 @@ static const struct amdgpu_ip_block_version cz_ip_blocks[] =
.rev = 0,
.funcs = &vce_v3_0_ip_funcs,
},
+#if defined(CONFIG_DRM_AMD_ACP)
+ {
+ .type = AMD_IP_BLOCK_TYPE_ACP,
+ .major = 2,
+ .minor = 2,
+ .rev = 0,
+ .funcs = &acp_ip_funcs,
+ },
+#endif
};
int vi_set_ip_blocks(struct amdgpu_device *adev)
@@ -1436,26 +1071,22 @@ static int vi_common_early_init(void *handle)
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
case CHIP_TOPAZ:
- adev->has_uvd = false;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = 0x1;
break;
case CHIP_FIJI:
- adev->has_uvd = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x3c;
break;
case CHIP_TONGA:
- adev->has_uvd = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
- adev->has_uvd = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x1;
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index d98aa9d82fa19..ace49976f7be0 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -71,8 +71,6 @@
#define VMID(x) ((x) << 4)
#define QUEUEID(x) ((x) << 8)
-#define RB_BITMAP_WIDTH_PER_SH 2
-
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
#define MC_SEQ_MISC0__MT__DDR2 0x20000000
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
index c34c393e9aea0..d5e19b5fbbfb0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
@@ -513,7 +513,7 @@ static int dbgdev_wave_control_set_registers(
union SQ_CMD_BITS *in_reg_sq_cmd,
union GRBM_GFX_INDEX_BITS *in_reg_gfx_index)
{
- int status;
+ int status = 0;
union SQ_CMD_BITS reg_sq_cmd;
union GRBM_GFX_INDEX_BITS reg_gfx_index;
struct HsaDbgWaveMsgAMDGen2 *pMsg;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
index ca8410e8683d3..850a5623661f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
@@ -59,18 +59,23 @@ module_param(send_sigterm, int, 0444);
MODULE_PARM_DESC(send_sigterm,
"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
-bool kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
+static int amdkfd_init_completed;
+
+int kgd2kfd_init(unsigned interface_version, const struct kgd2kfd_calls **g2f)
{
+ if (!amdkfd_init_completed)
+ return -EPROBE_DEFER;
+
/*
* Only one interface version is supported,
* no kfd/kgd version skew allowed.
*/
if (interface_version != KFD_INTERFACE_VERSION)
- return false;
+ return -EINVAL;
*g2f = &kgd2kfd;
- return true;
+ return 0;
}
EXPORT_SYMBOL(kgd2kfd_init);
@@ -111,6 +116,8 @@ static int __init kfd_module_init(void)
kfd_process_create_wq();
+ amdkfd_init_completed = 1;
+
dev_info(kfd_device, "Initialized module\n");
return 0;
@@ -125,6 +132,8 @@ static int __init kfd_module_init(void)
static void __exit kfd_module_exit(void)
{
+ amdkfd_init_completed = 0;
+
kfd_process_destroy_wq();
kfd_topology_shutdown();
kfd_chardev_exit();
diff --git a/drivers/gpu/drm/amd/include/amd_acpi.h b/drivers/gpu/drm/amd/include/amd_acpi.h
index 496360eb3fba2..50e8933251410 100644
--- a/drivers/gpu/drm/amd/include/amd_acpi.h
+++ b/drivers/gpu/drm/amd/include/amd_acpi.h
@@ -340,6 +340,8 @@ struct atcs_pref_req_output {
# define ATPX_FIXED_NOT_SUPPORTED (1 << 9)
# define ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED (1 << 10)
# define ATPX_DGPU_REQ_POWER_FOR_DISPLAYS (1 << 11)
+# define ATPX_DGPU_CAN_DRIVE_DISPLAYS (1 << 12)
+# define ATPX_MS_HYBRID_GFX_SUPPORTED (1 << 14)
#define ATPX_FUNCTION_POWER_CONTROL 0x2
/* ARG0: ATPX_FUNCTION_POWER_CONTROL
* ARG1:
diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
index dbf7e6413cab4..04e4090666fb2 100644
--- a/drivers/gpu/drm/amd/include/amd_shared.h
+++ b/drivers/gpu/drm/amd/include/amd_shared.h
@@ -73,6 +73,7 @@ enum amd_ip_block_type {
AMD_IP_BLOCK_TYPE_SDMA,
AMD_IP_BLOCK_TYPE_UVD,
AMD_IP_BLOCK_TYPE_VCE,
+ AMD_IP_BLOCK_TYPE_ACP,
};
enum amd_clockgating_state {
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
index dc52ea0df4b41..d3ccf5a86de03 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_d.h
@@ -1379,6 +1379,7 @@
#define mmDC_GPIO_PAD_STRENGTH_1 0x1978
#define mmDC_GPIO_PAD_STRENGTH_2 0x1979
#define mmPHY_AUX_CNTL 0x197f
+#define mmDC_GPIO_I2CPAD_MASK 0x1974
#define mmDC_GPIO_I2CPAD_A 0x1975
#define mmDC_GPIO_I2CPAD_EN 0x1976
#define mmDC_GPIO_I2CPAD_Y 0x1977
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
new file mode 100644
index 0000000000000..6bea30ef3df5b
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_enum.h
@@ -0,0 +1,1117 @@
+/*
+ * DCE_8_0 Register documentation
+ *
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef DCE_8_0_ENUM_H
+#define DCE_8_0_ENUM_H
+
+typedef enum SurfaceEndian {
+ ENDIAN_NONE = 0x0,
+ ENDIAN_8IN16 = 0x1,
+ ENDIAN_8IN32 = 0x2,
+ ENDIAN_8IN64 = 0x3,
+} SurfaceEndian;
+typedef enum ArrayMode {
+ ARRAY_LINEAR_GENERAL = 0x0,
+ ARRAY_LINEAR_ALIGNED = 0x1,
+ ARRAY_1D_TILED_THIN1 = 0x2,
+ ARRAY_1D_TILED_THICK = 0x3,
+ ARRAY_2D_TILED_THIN1 = 0x4,
+ ARRAY_PRT_TILED_THIN1 = 0x5,
+ ARRAY_PRT_2D_TILED_THIN1 = 0x6,
+ ARRAY_2D_TILED_THICK = 0x7,
+ ARRAY_2D_TILED_XTHICK = 0x8,
+ ARRAY_PRT_TILED_THICK = 0x9,
+ ARRAY_PRT_2D_TILED_THICK = 0xa,
+ ARRAY_PRT_3D_TILED_THIN1 = 0xb,
+ ARRAY_3D_TILED_THIN1 = 0xc,
+ ARRAY_3D_TILED_THICK = 0xd,
+ ARRAY_3D_TILED_XTHICK = 0xe,
+ ARRAY_PRT_3D_TILED_THICK = 0xf,
+} ArrayMode;
+typedef enum PipeTiling {
+ CONFIG_1_PIPE = 0x0,
+ CONFIG_2_PIPE = 0x1,
+ CONFIG_4_PIPE = 0x2,
+ CONFIG_8_PIPE = 0x3,
+} PipeTiling;
+typedef enum BankTiling {
+ CONFIG_4_BANK = 0x0,
+ CONFIG_8_BANK = 0x1,
+} BankTiling;
+typedef enum GroupInterleave {
+ CONFIG_256B_GROUP = 0x0,
+ CONFIG_512B_GROUP = 0x1,
+} GroupInterleave;
+typedef enum RowTiling {
+ CONFIG_1KB_ROW = 0x0,
+ CONFIG_2KB_ROW = 0x1,
+ CONFIG_4KB_ROW = 0x2,
+ CONFIG_8KB_ROW = 0x3,
+ CONFIG_1KB_ROW_OPT = 0x4,
+ CONFIG_2KB_ROW_OPT = 0x5,
+ CONFIG_4KB_ROW_OPT = 0x6,
+ CONFIG_8KB_ROW_OPT = 0x7,
+} RowTiling;
+typedef enum BankSwapBytes {
+ CONFIG_128B_SWAPS = 0x0,
+ CONFIG_256B_SWAPS = 0x1,
+ CONFIG_512B_SWAPS = 0x2,
+ CONFIG_1KB_SWAPS = 0x3,
+} BankSwapBytes;
+typedef enum SampleSplitBytes {
+ CONFIG_1KB_SPLIT = 0x0,
+ CONFIG_2KB_SPLIT = 0x1,
+ CONFIG_4KB_SPLIT = 0x2,
+ CONFIG_8KB_SPLIT = 0x3,
+} SampleSplitBytes;
+typedef enum NumPipes {
+ ADDR_CONFIG_1_PIPE = 0x0,
+ ADDR_CONFIG_2_PIPE = 0x1,
+ ADDR_CONFIG_4_PIPE = 0x2,
+ ADDR_CONFIG_8_PIPE = 0x3,
+} NumPipes;
+typedef enum PipeInterleaveSize {
+ ADDR_CONFIG_PIPE_INTERLEAVE_256B = 0x0,
+ ADDR_CONFIG_PIPE_INTERLEAVE_512B = 0x1,
+} PipeInterleaveSize;
+typedef enum BankInterleaveSize {
+ ADDR_CONFIG_BANK_INTERLEAVE_1 = 0x0,
+ ADDR_CONFIG_BANK_INTERLEAVE_2 = 0x1,
+ ADDR_CONFIG_BANK_INTERLEAVE_4 = 0x2,
+ ADDR_CONFIG_BANK_INTERLEAVE_8 = 0x3,
+} BankInterleaveSize;
+typedef enum NumShaderEngines {
+ ADDR_CONFIG_1_SHADER_ENGINE = 0x0,
+ ADDR_CONFIG_2_SHADER_ENGINE = 0x1,
+} NumShaderEngines;
+typedef enum ShaderEngineTileSize {
+ ADDR_CONFIG_SE_TILE_16 = 0x0,
+ ADDR_CONFIG_SE_TILE_32 = 0x1,
+} ShaderEngineTileSize;
+typedef enum NumGPUs {
+ ADDR_CONFIG_1_GPU = 0x0,
+ ADDR_CONFIG_2_GPU = 0x1,
+ ADDR_CONFIG_4_GPU = 0x2,
+} NumGPUs;
+typedef enum MultiGPUTileSize {
+ ADDR_CONFIG_GPU_TILE_16 = 0x0,
+ ADDR_CONFIG_GPU_TILE_32 = 0x1,
+ ADDR_CONFIG_GPU_TILE_64 = 0x2,
+ ADDR_CONFIG_GPU_TILE_128 = 0x3,
+} MultiGPUTileSize;
+typedef enum RowSize {
+ ADDR_CONFIG_1KB_ROW = 0x0,
+ ADDR_CONFIG_2KB_ROW = 0x1,
+ ADDR_CONFIG_4KB_ROW = 0x2,
+} RowSize;
+typedef enum NumLowerPipes {
+ ADDR_CONFIG_1_LOWER_PIPES = 0x0,
+ ADDR_CONFIG_2_LOWER_PIPES = 0x1,
+} NumLowerPipes;
+typedef enum DebugBlockId {
+ DBG_CLIENT_BLKID_RESERVED = 0x0,
+ DBG_CLIENT_BLKID_dbg = 0x1,
+ DBG_CLIENT_BLKID_uvdu_0 = 0x2,
+ DBG_CLIENT_BLKID_uvdu_1 = 0x3,
+ DBG_CLIENT_BLKID_uvdu_2 = 0x4,
+ DBG_CLIENT_BLKID_uvdu_3 = 0x5,
+ DBG_CLIENT_BLKID_uvdu_4 = 0x6,
+ DBG_CLIENT_BLKID_uvdu_5 = 0x7,
+ DBG_CLIENT_BLKID_uvdu_6 = 0x8,
+ DBG_CLIENT_BLKID_uvdm_0 = 0x9,
+ DBG_CLIENT_BLKID_uvdm_1 = 0xa,
+ DBG_CLIENT_BLKID_uvdm_2 = 0xb,
+ DBG_CLIENT_BLKID_uvdm_3 = 0xc,
+ DBG_CLIENT_BLKID_vcea_0 = 0xd,
+ DBG_CLIENT_BLKID_vcea_1 = 0xe,
+ DBG_CLIENT_BLKID_vcea_2 = 0xf,
+ DBG_CLIENT_BLKID_vcea_3 = 0x10,
+ DBG_CLIENT_BLKID_vcea_4 = 0x11,
+ DBG_CLIENT_BLKID_vcea_5 = 0x12,
+ DBG_CLIENT_BLKID_vcea_6 = 0x13,
+ DBG_CLIENT_BLKID_vceb_0 = 0x14,
+ DBG_CLIENT_BLKID_vceb_1 = 0x15,
+ DBG_CLIENT_BLKID_vceb_2 = 0x16,
+ DBG_CLIENT_BLKID_dco = 0x17,
+ DBG_CLIENT_BLKID_xdma = 0x18,
+ DBG_CLIENT_BLKID_smu_0 = 0x19,
+ DBG_CLIENT_BLKID_smu_1 = 0x1a,
+ DBG_CLIENT_BLKID_smu_2 = 0x1b,
+ DBG_CLIENT_BLKID_gck = 0x1c,
+ DBG_CLIENT_BLKID_tmonw0 = 0x1d,
+ DBG_CLIENT_BLKID_tmonw1 = 0x1e,
+ DBG_CLIENT_BLKID_grbm = 0x1f,
+ DBG_CLIENT_BLKID_rlc = 0x20,
+ DBG_CLIENT_BLKID_ds0 = 0x21,
+ DBG_CLIENT_BLKID_cpg_0 = 0x22,
+ DBG_CLIENT_BLKID_cpg_1 = 0x23,
+ DBG_CLIENT_BLKID_cpc_0 = 0x24,
+ DBG_CLIENT_BLKID_cpc_1 = 0x25,
+ DBG_CLIENT_BLKID_cpf = 0x26,
+ DBG_CLIENT_BLKID_scf0 = 0x27,
+ DBG_CLIENT_BLKID_scf1 = 0x28,
+ DBG_CLIENT_BLKID_scf2 = 0x29,
+ DBG_CLIENT_BLKID_scf3 = 0x2a,
+ DBG_CLIENT_BLKID_pc0 = 0x2b,
+ DBG_CLIENT_BLKID_pc1 = 0x2c,
+ DBG_CLIENT_BLKID_pc2 = 0x2d,
+ DBG_CLIENT_BLKID_pc3 = 0x2e,
+ DBG_CLIENT_BLKID_vgt0 = 0x2f,
+ DBG_CLIENT_BLKID_vgt1 = 0x30,
+ DBG_CLIENT_BLKID_vgt2 = 0x31,
+ DBG_CLIENT_BLKID_vgt3 = 0x32,
+ DBG_CLIENT_BLKID_sx00 = 0x33,
+ DBG_CLIENT_BLKID_sx10 = 0x34,
+ DBG_CLIENT_BLKID_sx20 = 0x35,
+ DBG_CLIENT_BLKID_sx30 = 0x36,
+ DBG_CLIENT_BLKID_cb001 = 0x37,
+ DBG_CLIENT_BLKID_cb200 = 0x38,
+ DBG_CLIENT_BLKID_cb201 = 0x39,
+ DBG_CLIENT_BLKID_cbr0 = 0x3a,
+ DBG_CLIENT_BLKID_cb000 = 0x3b,
+ DBG_CLIENT_BLKID_cb101 = 0x3c,
+ DBG_CLIENT_BLKID_cb300 = 0x3d,
+ DBG_CLIENT_BLKID_cb301 = 0x3e,
+ DBG_CLIENT_BLKID_cbr1 = 0x3f,
+ DBG_CLIENT_BLKID_cb100 = 0x40,
+ DBG_CLIENT_BLKID_ia0 = 0x41,
+ DBG_CLIENT_BLKID_ia1 = 0x42,
+ DBG_CLIENT_BLKID_bci0 = 0x43,
+ DBG_CLIENT_BLKID_bci1 = 0x44,
+ DBG_CLIENT_BLKID_bci2 = 0x45,
+ DBG_CLIENT_BLKID_bci3 = 0x46,
+ DBG_CLIENT_BLKID_pa0 = 0x47,
+ DBG_CLIENT_BLKID_pa1 = 0x48,
+ DBG_CLIENT_BLKID_spim0 = 0x49,
+ DBG_CLIENT_BLKID_spim1 = 0x4a,
+ DBG_CLIENT_BLKID_spim2 = 0x4b,
+ DBG_CLIENT_BLKID_spim3 = 0x4c,
+ DBG_CLIENT_BLKID_sdma = 0x4d,
+ DBG_CLIENT_BLKID_ih = 0x4e,
+ DBG_CLIENT_BLKID_sem = 0x4f,
+ DBG_CLIENT_BLKID_srbm = 0x50,
+ DBG_CLIENT_BLKID_hdp = 0x51,
+ DBG_CLIENT_BLKID_acp_0 = 0x52,
+ DBG_CLIENT_BLKID_acp_1 = 0x53,
+ DBG_CLIENT_BLKID_sam = 0x54,
+ DBG_CLIENT_BLKID_mcc0 = 0x55,
+ DBG_CLIENT_BLKID_mcc1 = 0x56,
+ DBG_CLIENT_BLKID_mcc2 = 0x57,
+ DBG_CLIENT_BLKID_mcc3 = 0x58,
+ DBG_CLIENT_BLKID_mcd0 = 0x59,
+ DBG_CLIENT_BLKID_mcd1 = 0x5a,
+ DBG_CLIENT_BLKID_mcd2 = 0x5b,
+ DBG_CLIENT_BLKID_mcd3 = 0x5c,
+ DBG_CLIENT_BLKID_mcb = 0x5d,
+ DBG_CLIENT_BLKID_vmc = 0x5e,
+ DBG_CLIENT_BLKID_gmcon = 0x5f,
+ DBG_CLIENT_BLKID_gdc_0 = 0x60,
+ DBG_CLIENT_BLKID_gdc_1 = 0x61,
+ DBG_CLIENT_BLKID_gdc_2 = 0x62,
+ DBG_CLIENT_BLKID_gdc_3 = 0x63,
+ DBG_CLIENT_BLKID_gdc_4 = 0x64,
+ DBG_CLIENT_BLKID_gdc_5 = 0x65,
+ DBG_CLIENT_BLKID_gdc_6 = 0x66,
+ DBG_CLIENT_BLKID_gdc_7 = 0x67,
+ DBG_CLIENT_BLKID_gdc_8 = 0x68,
+ DBG_CLIENT_BLKID_gdc_9 = 0x69,
+ DBG_CLIENT_BLKID_gdc_10 = 0x6a,
+ DBG_CLIENT_BLKID_gdc_11 = 0x6b,
+ DBG_CLIENT_BLKID_gdc_12 = 0x6c,
+ DBG_CLIENT_BLKID_gdc_13 = 0x6d,
+ DBG_CLIENT_BLKID_gdc_14 = 0x6e,
+ DBG_CLIENT_BLKID_gdc_15 = 0x6f,
+ DBG_CLIENT_BLKID_gdc_16 = 0x70,
+ DBG_CLIENT_BLKID_gdc_17 = 0x71,
+ DBG_CLIENT_BLKID_gdc_18 = 0x72,
+ DBG_CLIENT_BLKID_gdc_19 = 0x73,
+ DBG_CLIENT_BLKID_gdc_20 = 0x74,
+ DBG_CLIENT_BLKID_gdc_21 = 0x75,
+ DBG_CLIENT_BLKID_gdc_22 = 0x76,
+ DBG_CLIENT_BLKID_wd = 0x77,
+ DBG_CLIENT_BLKID_sdma_0 = 0x78,
+ DBG_CLIENT_BLKID_sdma_1 = 0x79,
+} DebugBlockId;
+typedef enum DebugBlockId_OLD {
+ DBG_BLOCK_ID_RESERVED = 0x0,
+ DBG_BLOCK_ID_DBG = 0x1,
+ DBG_BLOCK_ID_VMC = 0x2,
+ DBG_BLOCK_ID_PDMA = 0x3,
+ DBG_BLOCK_ID_CG = 0x4,
+ DBG_BLOCK_ID_SRBM = 0x5,
+ DBG_BLOCK_ID_GRBM = 0x6,
+ DBG_BLOCK_ID_RLC = 0x7,
+ DBG_BLOCK_ID_CSC = 0x8,
+ DBG_BLOCK_ID_SEM = 0x9,
+ DBG_BLOCK_ID_IH = 0xa,
+ DBG_BLOCK_ID_SC = 0xb,
+ DBG_BLOCK_ID_SQ = 0xc,
+ DBG_BLOCK_ID_AVP = 0xd,
+ DBG_BLOCK_ID_GMCON = 0xe,
+ DBG_BLOCK_ID_SMU = 0xf,
+ DBG_BLOCK_ID_DMA0 = 0x10,
+ DBG_BLOCK_ID_DMA1 = 0x11,
+ DBG_BLOCK_ID_SPIM = 0x12,
+ DBG_BLOCK_ID_GDS = 0x13,
+ DBG_BLOCK_ID_SPIS = 0x14,
+ DBG_BLOCK_ID_UNUSED0 = 0x15,
+ DBG_BLOCK_ID_PA0 = 0x16,
+ DBG_BLOCK_ID_PA1 = 0x17,
+ DBG_BLOCK_ID_CP0 = 0x18,
+ DBG_BLOCK_ID_CP1 = 0x19,
+ DBG_BLOCK_ID_CP2 = 0x1a,
+ DBG_BLOCK_ID_UNUSED1 = 0x1b,
+ DBG_BLOCK_ID_UVDU = 0x1c,
+ DBG_BLOCK_ID_UVDM = 0x1d,
+ DBG_BLOCK_ID_VCE = 0x1e,
+ DBG_BLOCK_ID_UNUSED2 = 0x1f,
+ DBG_BLOCK_ID_VGT0 = 0x20,
+ DBG_BLOCK_ID_VGT1 = 0x21,
+ DBG_BLOCK_ID_IA = 0x22,
+ DBG_BLOCK_ID_UNUSED3 = 0x23,
+ DBG_BLOCK_ID_SCT0 = 0x24,
+ DBG_BLOCK_ID_SCT1 = 0x25,
+ DBG_BLOCK_ID_SPM0 = 0x26,
+ DBG_BLOCK_ID_SPM1 = 0x27,
+ DBG_BLOCK_ID_TCAA = 0x28,
+ DBG_BLOCK_ID_TCAB = 0x29,
+ DBG_BLOCK_ID_TCCA = 0x2a,
+ DBG_BLOCK_ID_TCCB = 0x2b,
+ DBG_BLOCK_ID_MCC0 = 0x2c,
+ DBG_BLOCK_ID_MCC1 = 0x2d,
+ DBG_BLOCK_ID_MCC2 = 0x2e,
+ DBG_BLOCK_ID_MCC3 = 0x2f,
+ DBG_BLOCK_ID_SX0 = 0x30,
+ DBG_BLOCK_ID_SX1 = 0x31,
+ DBG_BLOCK_ID_SX2 = 0x32,
+ DBG_BLOCK_ID_SX3 = 0x33,
+ DBG_BLOCK_ID_UNUSED4 = 0x34,
+ DBG_BLOCK_ID_UNUSED5 = 0x35,
+ DBG_BLOCK_ID_UNUSED6 = 0x36,
+ DBG_BLOCK_ID_UNUSED7 = 0x37,
+ DBG_BLOCK_ID_PC0 = 0x38,
+ DBG_BLOCK_ID_PC1 = 0x39,
+ DBG_BLOCK_ID_UNUSED8 = 0x3a,
+ DBG_BLOCK_ID_UNUSED9 = 0x3b,
+ DBG_BLOCK_ID_UNUSED10 = 0x3c,
+ DBG_BLOCK_ID_UNUSED11 = 0x3d,
+ DBG_BLOCK_ID_MCB = 0x3e,
+ DBG_BLOCK_ID_UNUSED12 = 0x3f,
+ DBG_BLOCK_ID_SCB0 = 0x40,
+ DBG_BLOCK_ID_SCB1 = 0x41,
+ DBG_BLOCK_ID_UNUSED13 = 0x42,
+ DBG_BLOCK_ID_UNUSED14 = 0x43,
+ DBG_BLOCK_ID_SCF0 = 0x44,
+ DBG_BLOCK_ID_SCF1 = 0x45,
+ DBG_BLOCK_ID_UNUSED15 = 0x46,
+ DBG_BLOCK_ID_UNUSED16 = 0x47,
+ DBG_BLOCK_ID_BCI0 = 0x48,
+ DBG_BLOCK_ID_BCI1 = 0x49,
+ DBG_BLOCK_ID_BCI2 = 0x4a,
+ DBG_BLOCK_ID_BCI3 = 0x4b,
+ DBG_BLOCK_ID_UNUSED17 = 0x4c,
+ DBG_BLOCK_ID_UNUSED18 = 0x4d,
+ DBG_BLOCK_ID_UNUSED19 = 0x4e,
+ DBG_BLOCK_ID_UNUSED20 = 0x4f,
+ DBG_BLOCK_ID_CB00 = 0x50,
+ DBG_BLOCK_ID_CB01 = 0x51,
+ DBG_BLOCK_ID_CB02 = 0x52,
+ DBG_BLOCK_ID_CB03 = 0x53,
+ DBG_BLOCK_ID_CB04 = 0x54,
+ DBG_BLOCK_ID_UNUSED21 = 0x55,
+ DBG_BLOCK_ID_UNUSED22 = 0x56,
+ DBG_BLOCK_ID_UNUSED23 = 0x57,
+ DBG_BLOCK_ID_CB10 = 0x58,
+ DBG_BLOCK_ID_CB11 = 0x59,
+ DBG_BLOCK_ID_CB12 = 0x5a,
+ DBG_BLOCK_ID_CB13 = 0x5b,
+ DBG_BLOCK_ID_CB14 = 0x5c,
+ DBG_BLOCK_ID_UNUSED24 = 0x5d,
+ DBG_BLOCK_ID_UNUSED25 = 0x5e,
+ DBG_BLOCK_ID_UNUSED26 = 0x5f,
+ DBG_BLOCK_ID_TCP0 = 0x60,
+ DBG_BLOCK_ID_TCP1 = 0x61,
+ DBG_BLOCK_ID_TCP2 = 0x62,
+ DBG_BLOCK_ID_TCP3 = 0x63,
+ DBG_BLOCK_ID_TCP4 = 0x64,
+ DBG_BLOCK_ID_TCP5 = 0x65,
+ DBG_BLOCK_ID_TCP6 = 0x66,
+ DBG_BLOCK_ID_TCP7 = 0x67,
+ DBG_BLOCK_ID_TCP8 = 0x68,
+ DBG_BLOCK_ID_TCP9 = 0x69,
+ DBG_BLOCK_ID_TCP10 = 0x6a,
+ DBG_BLOCK_ID_TCP11 = 0x6b,
+ DBG_BLOCK_ID_TCP12 = 0x6c,
+ DBG_BLOCK_ID_TCP13 = 0x6d,
+ DBG_BLOCK_ID_TCP14 = 0x6e,
+ DBG_BLOCK_ID_TCP15 = 0x6f,
+ DBG_BLOCK_ID_TCP16 = 0x70,
+ DBG_BLOCK_ID_TCP17 = 0x71,
+ DBG_BLOCK_ID_TCP18 = 0x72,
+ DBG_BLOCK_ID_TCP19 = 0x73,
+ DBG_BLOCK_ID_TCP20 = 0x74,
+ DBG_BLOCK_ID_TCP21 = 0x75,
+ DBG_BLOCK_ID_TCP22 = 0x76,
+ DBG_BLOCK_ID_TCP23 = 0x77,
+ DBG_BLOCK_ID_TCP_RESERVED0 = 0x78,
+ DBG_BLOCK_ID_TCP_RESERVED1 = 0x79,
+ DBG_BLOCK_ID_TCP_RESERVED2 = 0x7a,
+ DBG_BLOCK_ID_TCP_RESERVED3 = 0x7b,
+ DBG_BLOCK_ID_TCP_RESERVED4 = 0x7c,
+ DBG_BLOCK_ID_TCP_RESERVED5 = 0x7d,
+ DBG_BLOCK_ID_TCP_RESERVED6 = 0x7e,
+ DBG_BLOCK_ID_TCP_RESERVED7 = 0x7f,
+ DBG_BLOCK_ID_DB00 = 0x80,
+ DBG_BLOCK_ID_DB01 = 0x81,
+ DBG_BLOCK_ID_DB02 = 0x82,
+ DBG_BLOCK_ID_DB03 = 0x83,
+ DBG_BLOCK_ID_DB04 = 0x84,
+ DBG_BLOCK_ID_UNUSED27 = 0x85,
+ DBG_BLOCK_ID_UNUSED28 = 0x86,
+ DBG_BLOCK_ID_UNUSED29 = 0x87,
+ DBG_BLOCK_ID_DB10 = 0x88,
+ DBG_BLOCK_ID_DB11 = 0x89,
+ DBG_BLOCK_ID_DB12 = 0x8a,
+ DBG_BLOCK_ID_DB13 = 0x8b,
+ DBG_BLOCK_ID_DB14 = 0x8c,
+ DBG_BLOCK_ID_UNUSED30 = 0x8d,
+ DBG_BLOCK_ID_UNUSED31 = 0x8e,
+ DBG_BLOCK_ID_UNUSED32 = 0x8f,
+ DBG_BLOCK_ID_TCC0 = 0x90,
+ DBG_BLOCK_ID_TCC1 = 0x91,
+ DBG_BLOCK_ID_TCC2 = 0x92,
+ DBG_BLOCK_ID_TCC3 = 0x93,
+ DBG_BLOCK_ID_TCC4 = 0x94,
+ DBG_BLOCK_ID_TCC5 = 0x95,
+ DBG_BLOCK_ID_TCC6 = 0x96,
+ DBG_BLOCK_ID_TCC7 = 0x97,
+ DBG_BLOCK_ID_SPS00 = 0x98,
+ DBG_BLOCK_ID_SPS01 = 0x99,
+ DBG_BLOCK_ID_SPS02 = 0x9a,
+ DBG_BLOCK_ID_SPS10 = 0x9b,
+ DBG_BLOCK_ID_SPS11 = 0x9c,
+ DBG_BLOCK_ID_SPS12 = 0x9d,
+ DBG_BLOCK_ID_UNUSED33 = 0x9e,
+ DBG_BLOCK_ID_UNUSED34 = 0x9f,
+ DBG_BLOCK_ID_TA00 = 0xa0,
+ DBG_BLOCK_ID_TA01 = 0xa1,
+ DBG_BLOCK_ID_TA02 = 0xa2,
+ DBG_BLOCK_ID_TA03 = 0xa3,
+ DBG_BLOCK_ID_TA04 = 0xa4,
+ DBG_BLOCK_ID_TA05 = 0xa5,
+ DBG_BLOCK_ID_TA06 = 0xa6,
+ DBG_BLOCK_ID_TA07 = 0xa7,
+ DBG_BLOCK_ID_TA08 = 0xa8,
+ DBG_BLOCK_ID_TA09 = 0xa9,
+ DBG_BLOCK_ID_TA0A = 0xaa,
+ DBG_BLOCK_ID_TA0B = 0xab,
+ DBG_BLOCK_ID_UNUSED35 = 0xac,
+ DBG_BLOCK_ID_UNUSED36 = 0xad,
+ DBG_BLOCK_ID_UNUSED37 = 0xae,
+ DBG_BLOCK_ID_UNUSED38 = 0xaf,
+ DBG_BLOCK_ID_TA10 = 0xb0,
+ DBG_BLOCK_ID_TA11 = 0xb1,
+ DBG_BLOCK_ID_TA12 = 0xb2,
+ DBG_BLOCK_ID_TA13 = 0xb3,
+ DBG_BLOCK_ID_TA14 = 0xb4,
+ DBG_BLOCK_ID_TA15 = 0xb5,
+ DBG_BLOCK_ID_TA16 = 0xb6,
+ DBG_BLOCK_ID_TA17 = 0xb7,
+ DBG_BLOCK_ID_TA18 = 0xb8,
+ DBG_BLOCK_ID_TA19 = 0xb9,
+ DBG_BLOCK_ID_TA1A = 0xba,
+ DBG_BLOCK_ID_TA1B = 0xbb,
+ DBG_BLOCK_ID_UNUSED39 = 0xbc,
+ DBG_BLOCK_ID_UNUSED40 = 0xbd,
+ DBG_BLOCK_ID_UNUSED41 = 0xbe,
+ DBG_BLOCK_ID_UNUSED42 = 0xbf,
+ DBG_BLOCK_ID_TD00 = 0xc0,
+ DBG_BLOCK_ID_TD01 = 0xc1,
+ DBG_BLOCK_ID_TD02 = 0xc2,
+ DBG_BLOCK_ID_TD03 = 0xc3,
+ DBG_BLOCK_ID_TD04 = 0xc4,
+ DBG_BLOCK_ID_TD05 = 0xc5,
+ DBG_BLOCK_ID_TD06 = 0xc6,
+ DBG_BLOCK_ID_TD07 = 0xc7,
+ DBG_BLOCK_ID_TD08 = 0xc8,
+ DBG_BLOCK_ID_TD09 = 0xc9,
+ DBG_BLOCK_ID_TD0A = 0xca,
+ DBG_BLOCK_ID_TD0B = 0xcb,
+ DBG_BLOCK_ID_UNUSED43 = 0xcc,
+ DBG_BLOCK_ID_UNUSED44 = 0xcd,
+ DBG_BLOCK_ID_UNUSED45 = 0xce,
+ DBG_BLOCK_ID_UNUSED46 = 0xcf,
+ DBG_BLOCK_ID_TD10 = 0xd0,
+ DBG_BLOCK_ID_TD11 = 0xd1,
+ DBG_BLOCK_ID_TD12 = 0xd2,
+ DBG_BLOCK_ID_TD13 = 0xd3,
+ DBG_BLOCK_ID_TD14 = 0xd4,
+ DBG_BLOCK_ID_TD15 = 0xd5,
+ DBG_BLOCK_ID_TD16 = 0xd6,
+ DBG_BLOCK_ID_TD17 = 0xd7,
+ DBG_BLOCK_ID_TD18 = 0xd8,
+ DBG_BLOCK_ID_TD19 = 0xd9,
+ DBG_BLOCK_ID_TD1A = 0xda,
+ DBG_BLOCK_ID_TD1B = 0xdb,
+ DBG_BLOCK_ID_UNUSED47 = 0xdc,
+ DBG_BLOCK_ID_UNUSED48 = 0xdd,
+ DBG_BLOCK_ID_UNUSED49 = 0xde,
+ DBG_BLOCK_ID_UNUSED50 = 0xdf,
+ DBG_BLOCK_ID_MCD0 = 0xe0,
+ DBG_BLOCK_ID_MCD1 = 0xe1,
+ DBG_BLOCK_ID_MCD2 = 0xe2,
+ DBG_BLOCK_ID_MCD3 = 0xe3,
+ DBG_BLOCK_ID_MCD4 = 0xe4,
+ DBG_BLOCK_ID_MCD5 = 0xe5,
+ DBG_BLOCK_ID_UNUSED51 = 0xe6,
+ DBG_BLOCK_ID_UNUSED52 = 0xe7,
+} DebugBlockId_OLD;
+typedef enum DebugBlockId_BY2 {
+ DBG_BLOCK_ID_RESERVED_BY2 = 0x0,
+ DBG_BLOCK_ID_VMC_BY2 = 0x1,
+ DBG_BLOCK_ID_CG_BY2 = 0x2,
+ DBG_BLOCK_ID_GRBM_BY2 = 0x3,
+ DBG_BLOCK_ID_CSC_BY2 = 0x4,
+ DBG_BLOCK_ID_IH_BY2 = 0x5,
+ DBG_BLOCK_ID_SQ_BY2 = 0x6,
+ DBG_BLOCK_ID_GMCON_BY2 = 0x7,
+ DBG_BLOCK_ID_DMA0_BY2 = 0x8,
+ DBG_BLOCK_ID_SPIM_BY2 = 0x9,
+ DBG_BLOCK_ID_SPIS_BY2 = 0xa,
+ DBG_BLOCK_ID_PA0_BY2 = 0xb,
+ DBG_BLOCK_ID_CP0_BY2 = 0xc,
+ DBG_BLOCK_ID_CP2_BY2 = 0xd,
+ DBG_BLOCK_ID_UVDU_BY2 = 0xe,
+ DBG_BLOCK_ID_VCE_BY2 = 0xf,
+ DBG_BLOCK_ID_VGT0_BY2 = 0x10,
+ DBG_BLOCK_ID_IA_BY2 = 0x11,
+ DBG_BLOCK_ID_SCT0_BY2 = 0x12,
+ DBG_BLOCK_ID_SPM0_BY2 = 0x13,
+ DBG_BLOCK_ID_TCAA_BY2 = 0x14,
+ DBG_BLOCK_ID_TCCA_BY2 = 0x15,
+ DBG_BLOCK_ID_MCC0_BY2 = 0x16,
+ DBG_BLOCK_ID_MCC2_BY2 = 0x17,
+ DBG_BLOCK_ID_SX0_BY2 = 0x18,
+ DBG_BLOCK_ID_SX2_BY2 = 0x19,
+ DBG_BLOCK_ID_UNUSED4_BY2 = 0x1a,
+ DBG_BLOCK_ID_UNUSED6_BY2 = 0x1b,
+ DBG_BLOCK_ID_PC0_BY2 = 0x1c,
+ DBG_BLOCK_ID_UNUSED8_BY2 = 0x1d,
+ DBG_BLOCK_ID_UNUSED10_BY2 = 0x1e,
+ DBG_BLOCK_ID_MCB_BY2 = 0x1f,
+ DBG_BLOCK_ID_SCB0_BY2 = 0x20,
+ DBG_BLOCK_ID_UNUSED13_BY2 = 0x21,
+ DBG_BLOCK_ID_SCF0_BY2 = 0x22,
+ DBG_BLOCK_ID_UNUSED15_BY2 = 0x23,
+ DBG_BLOCK_ID_BCI0_BY2 = 0x24,
+ DBG_BLOCK_ID_BCI2_BY2 = 0x25,
+ DBG_BLOCK_ID_UNUSED17_BY2 = 0x26,
+ DBG_BLOCK_ID_UNUSED19_BY2 = 0x27,
+ DBG_BLOCK_ID_CB00_BY2 = 0x28,
+ DBG_BLOCK_ID_CB02_BY2 = 0x29,
+ DBG_BLOCK_ID_CB04_BY2 = 0x2a,
+ DBG_BLOCK_ID_UNUSED22_BY2 = 0x2b,
+ DBG_BLOCK_ID_CB10_BY2 = 0x2c,
+ DBG_BLOCK_ID_CB12_BY2 = 0x2d,
+ DBG_BLOCK_ID_CB14_BY2 = 0x2e,
+ DBG_BLOCK_ID_UNUSED25_BY2 = 0x2f,
+ DBG_BLOCK_ID_TCP0_BY2 = 0x30,
+ DBG_BLOCK_ID_TCP2_BY2 = 0x31,
+ DBG_BLOCK_ID_TCP4_BY2 = 0x32,
+ DBG_BLOCK_ID_TCP6_BY2 = 0x33,
+ DBG_BLOCK_ID_TCP8_BY2 = 0x34,
+ DBG_BLOCK_ID_TCP10_BY2 = 0x35,
+ DBG_BLOCK_ID_TCP12_BY2 = 0x36,
+ DBG_BLOCK_ID_TCP14_BY2 = 0x37,
+ DBG_BLOCK_ID_TCP16_BY2 = 0x38,
+ DBG_BLOCK_ID_TCP18_BY2 = 0x39,
+ DBG_BLOCK_ID_TCP20_BY2 = 0x3a,
+ DBG_BLOCK_ID_TCP22_BY2 = 0x3b,
+ DBG_BLOCK_ID_TCP_RESERVED0_BY2 = 0x3c,
+ DBG_BLOCK_ID_TCP_RESERVED2_BY2 = 0x3d,
+ DBG_BLOCK_ID_TCP_RESERVED4_BY2 = 0x3e,
+ DBG_BLOCK_ID_TCP_RESERVED6_BY2 = 0x3f,
+ DBG_BLOCK_ID_DB00_BY2 = 0x40,
+ DBG_BLOCK_ID_DB02_BY2 = 0x41,
+ DBG_BLOCK_ID_DB04_BY2 = 0x42,
+ DBG_BLOCK_ID_UNUSED28_BY2 = 0x43,
+ DBG_BLOCK_ID_DB10_BY2 = 0x44,
+ DBG_BLOCK_ID_DB12_BY2 = 0x45,
+ DBG_BLOCK_ID_DB14_BY2 = 0x46,
+ DBG_BLOCK_ID_UNUSED31_BY2 = 0x47,
+ DBG_BLOCK_ID_TCC0_BY2 = 0x48,
+ DBG_BLOCK_ID_TCC2_BY2 = 0x49,
+ DBG_BLOCK_ID_TCC4_BY2 = 0x4a,
+ DBG_BLOCK_ID_TCC6_BY2 = 0x4b,
+ DBG_BLOCK_ID_SPS00_BY2 = 0x4c,
+ DBG_BLOCK_ID_SPS02_BY2 = 0x4d,
+ DBG_BLOCK_ID_SPS11_BY2 = 0x4e,
+ DBG_BLOCK_ID_UNUSED33_BY2 = 0x4f,
+ DBG_BLOCK_ID_TA00_BY2 = 0x50,
+ DBG_BLOCK_ID_TA02_BY2 = 0x51,
+ DBG_BLOCK_ID_TA04_BY2 = 0x52,
+ DBG_BLOCK_ID_TA06_BY2 = 0x53,
+ DBG_BLOCK_ID_TA08_BY2 = 0x54,
+ DBG_BLOCK_ID_TA0A_BY2 = 0x55,
+ DBG_BLOCK_ID_UNUSED35_BY2 = 0x56,
+ DBG_BLOCK_ID_UNUSED37_BY2 = 0x57,
+ DBG_BLOCK_ID_TA10_BY2 = 0x58,
+ DBG_BLOCK_ID_TA12_BY2 = 0x59,
+ DBG_BLOCK_ID_TA14_BY2 = 0x5a,
+ DBG_BLOCK_ID_TA16_BY2 = 0x5b,
+ DBG_BLOCK_ID_TA18_BY2 = 0x5c,
+ DBG_BLOCK_ID_TA1A_BY2 = 0x5d,
+ DBG_BLOCK_ID_UNUSED39_BY2 = 0x5e,
+ DBG_BLOCK_ID_UNUSED41_BY2 = 0x5f,
+ DBG_BLOCK_ID_TD00_BY2 = 0x60,
+ DBG_BLOCK_ID_TD02_BY2 = 0x61,
+ DBG_BLOCK_ID_TD04_BY2 = 0x62,
+ DBG_BLOCK_ID_TD06_BY2 = 0x63,
+ DBG_BLOCK_ID_TD08_BY2 = 0x64,
+ DBG_BLOCK_ID_TD0A_BY2 = 0x65,
+ DBG_BLOCK_ID_UNUSED43_BY2 = 0x66,
+ DBG_BLOCK_ID_UNUSED45_BY2 = 0x67,
+ DBG_BLOCK_ID_TD10_BY2 = 0x68,
+ DBG_BLOCK_ID_TD12_BY2 = 0x69,
+ DBG_BLOCK_ID_TD14_BY2 = 0x6a,
+ DBG_BLOCK_ID_TD16_BY2 = 0x6b,
+ DBG_BLOCK_ID_TD18_BY2 = 0x6c,
+ DBG_BLOCK_ID_TD1A_BY2 = 0x6d,
+ DBG_BLOCK_ID_UNUSED47_BY2 = 0x6e,
+ DBG_BLOCK_ID_UNUSED49_BY2 = 0x6f,
+ DBG_BLOCK_ID_MCD0_BY2 = 0x70,
+ DBG_BLOCK_ID_MCD2_BY2 = 0x71,
+ DBG_BLOCK_ID_MCD4_BY2 = 0x72,
+ DBG_BLOCK_ID_UNUSED51_BY2 = 0x73,
+} DebugBlockId_BY2;
+typedef enum DebugBlockId_BY4 {
+ DBG_BLOCK_ID_RESERVED_BY4 = 0x0,
+ DBG_BLOCK_ID_CG_BY4 = 0x1,
+ DBG_BLOCK_ID_CSC_BY4 = 0x2,
+ DBG_BLOCK_ID_SQ_BY4 = 0x3,
+ DBG_BLOCK_ID_DMA0_BY4 = 0x4,
+ DBG_BLOCK_ID_SPIS_BY4 = 0x5,
+ DBG_BLOCK_ID_CP0_BY4 = 0x6,
+ DBG_BLOCK_ID_UVDU_BY4 = 0x7,
+ DBG_BLOCK_ID_VGT0_BY4 = 0x8,
+ DBG_BLOCK_ID_SCT0_BY4 = 0x9,
+ DBG_BLOCK_ID_TCAA_BY4 = 0xa,
+ DBG_BLOCK_ID_MCC0_BY4 = 0xb,
+ DBG_BLOCK_ID_SX0_BY4 = 0xc,
+ DBG_BLOCK_ID_UNUSED4_BY4 = 0xd,
+ DBG_BLOCK_ID_PC0_BY4 = 0xe,
+ DBG_BLOCK_ID_UNUSED10_BY4 = 0xf,
+ DBG_BLOCK_ID_SCB0_BY4 = 0x10,
+ DBG_BLOCK_ID_SCF0_BY4 = 0x11,
+ DBG_BLOCK_ID_BCI0_BY4 = 0x12,
+ DBG_BLOCK_ID_UNUSED17_BY4 = 0x13,
+ DBG_BLOCK_ID_CB00_BY4 = 0x14,
+ DBG_BLOCK_ID_CB04_BY4 = 0x15,
+ DBG_BLOCK_ID_CB10_BY4 = 0x16,
+ DBG_BLOCK_ID_CB14_BY4 = 0x17,
+ DBG_BLOCK_ID_TCP0_BY4 = 0x18,
+ DBG_BLOCK_ID_TCP4_BY4 = 0x19,
+ DBG_BLOCK_ID_TCP8_BY4 = 0x1a,
+ DBG_BLOCK_ID_TCP12_BY4 = 0x1b,
+ DBG_BLOCK_ID_TCP16_BY4 = 0x1c,
+ DBG_BLOCK_ID_TCP20_BY4 = 0x1d,
+ DBG_BLOCK_ID_TCP_RESERVED0_BY4 = 0x1e,
+ DBG_BLOCK_ID_TCP_RESERVED4_BY4 = 0x1f,
+ DBG_BLOCK_ID_DB_BY4 = 0x20,
+ DBG_BLOCK_ID_DB04_BY4 = 0x21,
+ DBG_BLOCK_ID_DB10_BY4 = 0x22,
+ DBG_BLOCK_ID_DB14_BY4 = 0x23,
+ DBG_BLOCK_ID_TCC0_BY4 = 0x24,
+ DBG_BLOCK_ID_TCC4_BY4 = 0x25,
+ DBG_BLOCK_ID_SPS00_BY4 = 0x26,
+ DBG_BLOCK_ID_SPS11_BY4 = 0x27,
+ DBG_BLOCK_ID_TA00_BY4 = 0x28,
+ DBG_BLOCK_ID_TA04_BY4 = 0x29,
+ DBG_BLOCK_ID_TA08_BY4 = 0x2a,
+ DBG_BLOCK_ID_UNUSED35_BY4 = 0x2b,
+ DBG_BLOCK_ID_TA10_BY4 = 0x2c,
+ DBG_BLOCK_ID_TA14_BY4 = 0x2d,
+ DBG_BLOCK_ID_TA18_BY4 = 0x2e,
+ DBG_BLOCK_ID_UNUSED39_BY4 = 0x2f,
+ DBG_BLOCK_ID_TD00_BY4 = 0x30,
+ DBG_BLOCK_ID_TD04_BY4 = 0x31,
+ DBG_BLOCK_ID_TD08_BY4 = 0x32,
+ DBG_BLOCK_ID_UNUSED43_BY4 = 0x33,
+ DBG_BLOCK_ID_TD10_BY4 = 0x34,
+ DBG_BLOCK_ID_TD14_BY4 = 0x35,
+ DBG_BLOCK_ID_TD18_BY4 = 0x36,
+ DBG_BLOCK_ID_UNUSED47_BY4 = 0x37,
+ DBG_BLOCK_ID_MCD0_BY4 = 0x38,
+ DBG_BLOCK_ID_MCD4_BY4 = 0x39,
+} DebugBlockId_BY4;
+typedef enum DebugBlockId_BY8 {
+ DBG_BLOCK_ID_RESERVED_BY8 = 0x0,
+ DBG_BLOCK_ID_CSC_BY8 = 0x1,
+ DBG_BLOCK_ID_DMA0_BY8 = 0x2,
+ DBG_BLOCK_ID_CP0_BY8 = 0x3,
+ DBG_BLOCK_ID_VGT0_BY8 = 0x4,
+ DBG_BLOCK_ID_TCAA_BY8 = 0x5,
+ DBG_BLOCK_ID_SX0_BY8 = 0x6,
+ DBG_BLOCK_ID_PC0_BY8 = 0x7,
+ DBG_BLOCK_ID_SCB0_BY8 = 0x8,
+ DBG_BLOCK_ID_BCI0_BY8 = 0x9,
+ DBG_BLOCK_ID_CB00_BY8 = 0xa,
+ DBG_BLOCK_ID_CB10_BY8 = 0xb,
+ DBG_BLOCK_ID_TCP0_BY8 = 0xc,
+ DBG_BLOCK_ID_TCP8_BY8 = 0xd,
+ DBG_BLOCK_ID_TCP16_BY8 = 0xe,
+ DBG_BLOCK_ID_TCP_RESERVED0_BY8 = 0xf,
+ DBG_BLOCK_ID_DB00_BY8 = 0x10,
+ DBG_BLOCK_ID_DB10_BY8 = 0x11,
+ DBG_BLOCK_ID_TCC0_BY8 = 0x12,
+ DBG_BLOCK_ID_SPS00_BY8 = 0x13,
+ DBG_BLOCK_ID_TA00_BY8 = 0x14,
+ DBG_BLOCK_ID_TA08_BY8 = 0x15,
+ DBG_BLOCK_ID_TA10_BY8 = 0x16,
+ DBG_BLOCK_ID_TA18_BY8 = 0x17,
+ DBG_BLOCK_ID_TD00_BY8 = 0x18,
+ DBG_BLOCK_ID_TD08_BY8 = 0x19,
+ DBG_BLOCK_ID_TD10_BY8 = 0x1a,
+ DBG_BLOCK_ID_TD18_BY8 = 0x1b,
+ DBG_BLOCK_ID_MCD0_BY8 = 0x1c,
+} DebugBlockId_BY8;
+typedef enum DebugBlockId_BY16 {
+ DBG_BLOCK_ID_RESERVED_BY16 = 0x0,
+ DBG_BLOCK_ID_DMA0_BY16 = 0x1,
+ DBG_BLOCK_ID_VGT0_BY16 = 0x2,
+ DBG_BLOCK_ID_SX0_BY16 = 0x3,
+ DBG_BLOCK_ID_SCB0_BY16 = 0x4,
+ DBG_BLOCK_ID_CB00_BY16 = 0x5,
+ DBG_BLOCK_ID_TCP0_BY16 = 0x6,
+ DBG_BLOCK_ID_TCP16_BY16 = 0x7,
+ DBG_BLOCK_ID_DB00_BY16 = 0x8,
+ DBG_BLOCK_ID_TCC0_BY16 = 0x9,
+ DBG_BLOCK_ID_TA00_BY16 = 0xa,
+ DBG_BLOCK_ID_TA10_BY16 = 0xb,
+ DBG_BLOCK_ID_TD00_BY16 = 0xc,
+ DBG_BLOCK_ID_TD10_BY16 = 0xd,
+ DBG_BLOCK_ID_MCD0_BY16 = 0xe,
+} DebugBlockId_BY16;
+typedef enum CompareRef {
+ REF_NEVER = 0x0,
+ REF_LESS = 0x1,
+ REF_EQUAL = 0x2,
+ REF_LEQUAL = 0x3,
+ REF_GREATER = 0x4,
+ REF_NOTEQUAL = 0x5,
+ REF_GEQUAL = 0x6,
+ REF_ALWAYS = 0x7,
+} CompareRef;
+typedef enum ReadSize {
+ READ_256_BITS = 0x0,
+ READ_512_BITS = 0x1,
+} ReadSize;
+typedef enum DepthFormat {
+ DEPTH_INVALID = 0x0,
+ DEPTH_16 = 0x1,
+ DEPTH_X8_24 = 0x2,
+ DEPTH_8_24 = 0x3,
+ DEPTH_X8_24_FLOAT = 0x4,
+ DEPTH_8_24_FLOAT = 0x5,
+ DEPTH_32_FLOAT = 0x6,
+ DEPTH_X24_8_32_FLOAT = 0x7,
+} DepthFormat;
+typedef enum ZFormat {
+ Z_INVALID = 0x0,
+ Z_16 = 0x1,
+ Z_24 = 0x2,
+ Z_32_FLOAT = 0x3,
+} ZFormat;
+typedef enum StencilFormat {
+ STENCIL_INVALID = 0x0,
+ STENCIL_8 = 0x1,
+} StencilFormat;
+typedef enum CmaskMode {
+ CMASK_CLEAR_NONE = 0x0,
+ CMASK_CLEAR_ONE = 0x1,
+ CMASK_CLEAR_ALL = 0x2,
+ CMASK_ANY_EXPANDED = 0x3,
+ CMASK_ALPHA0_FRAG1 = 0x4,
+ CMASK_ALPHA0_FRAG2 = 0x5,
+ CMASK_ALPHA0_FRAG4 = 0x6,
+ CMASK_ALPHA0_FRAGS = 0x7,
+ CMASK_ALPHA1_FRAG1 = 0x8,
+ CMASK_ALPHA1_FRAG2 = 0x9,
+ CMASK_ALPHA1_FRAG4 = 0xa,
+ CMASK_ALPHA1_FRAGS = 0xb,
+ CMASK_ALPHAX_FRAG1 = 0xc,
+ CMASK_ALPHAX_FRAG2 = 0xd,
+ CMASK_ALPHAX_FRAG4 = 0xe,
+ CMASK_ALPHAX_FRAGS = 0xf,
+} CmaskMode;
+typedef enum QuadExportFormat {
+ EXPORT_UNUSED = 0x0,
+ EXPORT_32_R = 0x1,
+ EXPORT_32_GR = 0x2,
+ EXPORT_32_AR = 0x3,
+ EXPORT_FP16_ABGR = 0x4,
+ EXPORT_UNSIGNED16_ABGR = 0x5,
+ EXPORT_SIGNED16_ABGR = 0x6,
+ EXPORT_32_ABGR = 0x7,
+} QuadExportFormat;
+typedef enum QuadExportFormatOld {
+ EXPORT_4P_32BPC_ABGR = 0x0,
+ EXPORT_4P_16BPC_ABGR = 0x1,
+ EXPORT_4P_32BPC_GR = 0x2,
+ EXPORT_4P_32BPC_AR = 0x3,
+ EXPORT_2P_32BPC_ABGR = 0x4,
+ EXPORT_8P_32BPC_R = 0x5,
+} QuadExportFormatOld;
+typedef enum ColorFormat {
+ COLOR_INVALID = 0x0,
+ COLOR_8 = 0x1,
+ COLOR_16 = 0x2,
+ COLOR_8_8 = 0x3,
+ COLOR_32 = 0x4,
+ COLOR_16_16 = 0x5,
+ COLOR_10_11_11 = 0x6,
+ COLOR_11_11_10 = 0x7,
+ COLOR_10_10_10_2 = 0x8,
+ COLOR_2_10_10_10 = 0x9,
+ COLOR_8_8_8_8 = 0xa,
+ COLOR_32_32 = 0xb,
+ COLOR_16_16_16_16 = 0xc,
+ COLOR_RESERVED_13 = 0xd,
+ COLOR_32_32_32_32 = 0xe,
+ COLOR_RESERVED_15 = 0xf,
+ COLOR_5_6_5 = 0x10,
+ COLOR_1_5_5_5 = 0x11,
+ COLOR_5_5_5_1 = 0x12,
+ COLOR_4_4_4_4 = 0x13,
+ COLOR_8_24 = 0x14,
+ COLOR_24_8 = 0x15,
+ COLOR_X24_8_32_FLOAT = 0x16,
+ COLOR_RESERVED_23 = 0x17,
+} ColorFormat;
+typedef enum SurfaceFormat {
+ FMT_INVALID = 0x0,
+ FMT_8 = 0x1,
+ FMT_16 = 0x2,
+ FMT_8_8 = 0x3,
+ FMT_32 = 0x4,
+ FMT_16_16 = 0x5,
+ FMT_10_11_11 = 0x6,
+ FMT_11_11_10 = 0x7,
+ FMT_10_10_10_2 = 0x8,
+ FMT_2_10_10_10 = 0x9,
+ FMT_8_8_8_8 = 0xa,
+ FMT_32_32 = 0xb,
+ FMT_16_16_16_16 = 0xc,
+ FMT_32_32_32 = 0xd,
+ FMT_32_32_32_32 = 0xe,
+ FMT_RESERVED_4 = 0xf,
+ FMT_5_6_5 = 0x10,
+ FMT_1_5_5_5 = 0x11,
+ FMT_5_5_5_1 = 0x12,
+ FMT_4_4_4_4 = 0x13,
+ FMT_8_24 = 0x14,
+ FMT_24_8 = 0x15,
+ FMT_X24_8_32_FLOAT = 0x16,
+ FMT_RESERVED_33 = 0x17,
+ FMT_11_11_10_FLOAT = 0x18,
+ FMT_16_FLOAT = 0x19,
+ FMT_32_FLOAT = 0x1a,
+ FMT_16_16_FLOAT = 0x1b,
+ FMT_8_24_FLOAT = 0x1c,
+ FMT_24_8_FLOAT = 0x1d,
+ FMT_32_32_FLOAT = 0x1e,
+ FMT_10_11_11_FLOAT = 0x1f,
+ FMT_16_16_16_16_FLOAT = 0x20,
+ FMT_3_3_2 = 0x21,
+ FMT_6_5_5 = 0x22,
+ FMT_32_32_32_32_FLOAT = 0x23,
+ FMT_RESERVED_36 = 0x24,
+ FMT_1 = 0x25,
+ FMT_1_REVERSED = 0x26,
+ FMT_GB_GR = 0x27,
+ FMT_BG_RG = 0x28,
+ FMT_32_AS_8 = 0x29,
+ FMT_32_AS_8_8 = 0x2a,
+ FMT_5_9_9_9_SHAREDEXP = 0x2b,
+ FMT_8_8_8 = 0x2c,
+ FMT_16_16_16 = 0x2d,
+ FMT_16_16_16_FLOAT = 0x2e,
+ FMT_4_4 = 0x2f,
+ FMT_32_32_32_FLOAT = 0x30,
+ FMT_BC1 = 0x31,
+ FMT_BC2 = 0x32,
+ FMT_BC3 = 0x33,
+ FMT_BC4 = 0x34,
+ FMT_BC5 = 0x35,
+ FMT_BC6 = 0x36,
+ FMT_BC7 = 0x37,
+ FMT_32_AS_32_32_32_32 = 0x38,
+ FMT_APC3 = 0x39,
+ FMT_APC4 = 0x3a,
+ FMT_APC5 = 0x3b,
+ FMT_APC6 = 0x3c,
+ FMT_APC7 = 0x3d,
+ FMT_CTX1 = 0x3e,
+ FMT_RESERVED_63 = 0x3f,
+} SurfaceFormat;
+typedef enum BUF_DATA_FORMAT {
+ BUF_DATA_FORMAT_INVALID = 0x0,
+ BUF_DATA_FORMAT_8 = 0x1,
+ BUF_DATA_FORMAT_16 = 0x2,
+ BUF_DATA_FORMAT_8_8 = 0x3,
+ BUF_DATA_FORMAT_32 = 0x4,
+ BUF_DATA_FORMAT_16_16 = 0x5,
+ BUF_DATA_FORMAT_10_11_11 = 0x6,
+ BUF_DATA_FORMAT_11_11_10 = 0x7,
+ BUF_DATA_FORMAT_10_10_10_2 = 0x8,
+ BUF_DATA_FORMAT_2_10_10_10 = 0x9,
+ BUF_DATA_FORMAT_8_8_8_8 = 0xa,
+ BUF_DATA_FORMAT_32_32 = 0xb,
+ BUF_DATA_FORMAT_16_16_16_16 = 0xc,
+ BUF_DATA_FORMAT_32_32_32 = 0xd,
+ BUF_DATA_FORMAT_32_32_32_32 = 0xe,
+ BUF_DATA_FORMAT_RESERVED_15 = 0xf,
+} BUF_DATA_FORMAT;
+typedef enum IMG_DATA_FORMAT {
+ IMG_DATA_FORMAT_INVALID = 0x0,
+ IMG_DATA_FORMAT_8 = 0x1,
+ IMG_DATA_FORMAT_16 = 0x2,
+ IMG_DATA_FORMAT_8_8 = 0x3,
+ IMG_DATA_FORMAT_32 = 0x4,
+ IMG_DATA_FORMAT_16_16 = 0x5,
+ IMG_DATA_FORMAT_10_11_11 = 0x6,
+ IMG_DATA_FORMAT_11_11_10 = 0x7,
+ IMG_DATA_FORMAT_10_10_10_2 = 0x8,
+ IMG_DATA_FORMAT_2_10_10_10 = 0x9,
+ IMG_DATA_FORMAT_8_8_8_8 = 0xa,
+ IMG_DATA_FORMAT_32_32 = 0xb,
+ IMG_DATA_FORMAT_16_16_16_16 = 0xc,
+ IMG_DATA_FORMAT_32_32_32 = 0xd,
+ IMG_DATA_FORMAT_32_32_32_32 = 0xe,
+ IMG_DATA_FORMAT_RESERVED_15 = 0xf,
+ IMG_DATA_FORMAT_5_6_5 = 0x10,
+ IMG_DATA_FORMAT_1_5_5_5 = 0x11,
+ IMG_DATA_FORMAT_5_5_5_1 = 0x12,
+ IMG_DATA_FORMAT_4_4_4_4 = 0x13,
+ IMG_DATA_FORMAT_8_24 = 0x14,
+ IMG_DATA_FORMAT_24_8 = 0x15,
+ IMG_DATA_FORMAT_X24_8_32 = 0x16,
+ IMG_DATA_FORMAT_RESERVED_23 = 0x17,
+ IMG_DATA_FORMAT_RESERVED_24 = 0x18,
+ IMG_DATA_FORMAT_RESERVED_25 = 0x19,
+ IMG_DATA_FORMAT_RESERVED_26 = 0x1a,
+ IMG_DATA_FORMAT_RESERVED_27 = 0x1b,
+ IMG_DATA_FORMAT_RESERVED_28 = 0x1c,
+ IMG_DATA_FORMAT_RESERVED_29 = 0x1d,
+ IMG_DATA_FORMAT_RESERVED_30 = 0x1e,
+ IMG_DATA_FORMAT_RESERVED_31 = 0x1f,
+ IMG_DATA_FORMAT_GB_GR = 0x20,
+ IMG_DATA_FORMAT_BG_RG = 0x21,
+ IMG_DATA_FORMAT_5_9_9_9 = 0x22,
+ IMG_DATA_FORMAT_BC1 = 0x23,
+ IMG_DATA_FORMAT_BC2 = 0x24,
+ IMG_DATA_FORMAT_BC3 = 0x25,
+ IMG_DATA_FORMAT_BC4 = 0x26,
+ IMG_DATA_FORMAT_BC5 = 0x27,
+ IMG_DATA_FORMAT_BC6 = 0x28,
+ IMG_DATA_FORMAT_BC7 = 0x29,
+ IMG_DATA_FORMAT_RESERVED_42 = 0x2a,
+ IMG_DATA_FORMAT_RESERVED_43 = 0x2b,
+ IMG_DATA_FORMAT_FMASK8_S2_F1 = 0x2c,
+ IMG_DATA_FORMAT_FMASK8_S4_F1 = 0x2d,
+ IMG_DATA_FORMAT_FMASK8_S8_F1 = 0x2e,
+ IMG_DATA_FORMAT_FMASK8_S2_F2 = 0x2f,
+ IMG_DATA_FORMAT_FMASK8_S4_F2 = 0x30,
+ IMG_DATA_FORMAT_FMASK8_S4_F4 = 0x31,
+ IMG_DATA_FORMAT_FMASK16_S16_F1 = 0x32,
+ IMG_DATA_FORMAT_FMASK16_S8_F2 = 0x33,
+ IMG_DATA_FORMAT_FMASK32_S16_F2 = 0x34,
+ IMG_DATA_FORMAT_FMASK32_S8_F4 = 0x35,
+ IMG_DATA_FORMAT_FMASK32_S8_F8 = 0x36,
+ IMG_DATA_FORMAT_FMASK64_S16_F4 = 0x37,
+ IMG_DATA_FORMAT_FMASK64_S16_F8 = 0x38,
+ IMG_DATA_FORMAT_4_4 = 0x39,
+ IMG_DATA_FORMAT_6_5_5 = 0x3a,
+ IMG_DATA_FORMAT_1 = 0x3b,
+ IMG_DATA_FORMAT_1_REVERSED = 0x3c,
+ IMG_DATA_FORMAT_32_AS_8 = 0x3d,
+ IMG_DATA_FORMAT_32_AS_8_8 = 0x3e,
+ IMG_DATA_FORMAT_32_AS_32_32_32_32 = 0x3f,
+} IMG_DATA_FORMAT;
+typedef enum BUF_NUM_FORMAT {
+ BUF_NUM_FORMAT_UNORM = 0x0,
+ BUF_NUM_FORMAT_SNORM = 0x1,
+ BUF_NUM_FORMAT_USCALED = 0x2,
+ BUF_NUM_FORMAT_SSCALED = 0x3,
+ BUF_NUM_FORMAT_UINT = 0x4,
+ BUF_NUM_FORMAT_SINT = 0x5,
+ BUF_NUM_FORMAT_SNORM_OGL = 0x6,
+ BUF_NUM_FORMAT_FLOAT = 0x7,
+} BUF_NUM_FORMAT;
+typedef enum IMG_NUM_FORMAT {
+ IMG_NUM_FORMAT_UNORM = 0x0,
+ IMG_NUM_FORMAT_SNORM = 0x1,
+ IMG_NUM_FORMAT_USCALED = 0x2,
+ IMG_NUM_FORMAT_SSCALED = 0x3,
+ IMG_NUM_FORMAT_UINT = 0x4,
+ IMG_NUM_FORMAT_SINT = 0x5,
+ IMG_NUM_FORMAT_SNORM_OGL = 0x6,
+ IMG_NUM_FORMAT_FLOAT = 0x7,
+ IMG_NUM_FORMAT_RESERVED_8 = 0x8,
+ IMG_NUM_FORMAT_SRGB = 0x9,
+ IMG_NUM_FORMAT_UBNORM = 0xa,
+ IMG_NUM_FORMAT_UBNORM_OGL = 0xb,
+ IMG_NUM_FORMAT_UBINT = 0xc,
+ IMG_NUM_FORMAT_UBSCALED = 0xd,
+ IMG_NUM_FORMAT_RESERVED_14 = 0xe,
+ IMG_NUM_FORMAT_RESERVED_15 = 0xf,
+} IMG_NUM_FORMAT;
+typedef enum TileType {
+ ARRAY_COLOR_TILE = 0x0,
+ ARRAY_DEPTH_TILE = 0x1,
+} TileType;
+typedef enum NonDispTilingOrder {
+ ADDR_SURF_MICRO_TILING_DISPLAY = 0x0,
+ ADDR_SURF_MICRO_TILING_NON_DISPLAY = 0x1,
+} NonDispTilingOrder;
+typedef enum MicroTileMode {
+ ADDR_SURF_DISPLAY_MICRO_TILING = 0x0,
+ ADDR_SURF_THIN_MICRO_TILING = 0x1,
+ ADDR_SURF_DEPTH_MICRO_TILING = 0x2,
+ ADDR_SURF_ROTATED_MICRO_TILING = 0x3,
+ ADDR_SURF_THICK_MICRO_TILING = 0x4,
+} MicroTileMode;
+typedef enum TileSplit {
+ ADDR_SURF_TILE_SPLIT_64B = 0x0,
+ ADDR_SURF_TILE_SPLIT_128B = 0x1,
+ ADDR_SURF_TILE_SPLIT_256B = 0x2,
+ ADDR_SURF_TILE_SPLIT_512B = 0x3,
+ ADDR_SURF_TILE_SPLIT_1KB = 0x4,
+ ADDR_SURF_TILE_SPLIT_2KB = 0x5,
+ ADDR_SURF_TILE_SPLIT_4KB = 0x6,
+} TileSplit;
+typedef enum SampleSplit {
+ ADDR_SURF_SAMPLE_SPLIT_1 = 0x0,
+ ADDR_SURF_SAMPLE_SPLIT_2 = 0x1,
+ ADDR_SURF_SAMPLE_SPLIT_4 = 0x2,
+ ADDR_SURF_SAMPLE_SPLIT_8 = 0x3,
+} SampleSplit;
+typedef enum PipeConfig {
+ ADDR_SURF_P2 = 0x0,
+ ADDR_SURF_P2_RESERVED0 = 0x1,
+ ADDR_SURF_P2_RESERVED1 = 0x2,
+ ADDR_SURF_P2_RESERVED2 = 0x3,
+ ADDR_SURF_P4_8x16 = 0x4,
+ ADDR_SURF_P4_16x16 = 0x5,
+ ADDR_SURF_P4_16x32 = 0x6,
+ ADDR_SURF_P4_32x32 = 0x7,
+ ADDR_SURF_P8_16x16_8x16 = 0x8,
+ ADDR_SURF_P8_16x32_8x16 = 0x9,
+ ADDR_SURF_P8_32x32_8x16 = 0xa,
+ ADDR_SURF_P8_16x32_16x16 = 0xb,
+ ADDR_SURF_P8_32x32_16x16 = 0xc,
+ ADDR_SURF_P8_32x32_16x32 = 0xd,
+ ADDR_SURF_P8_32x64_32x32 = 0xe,
+} PipeConfig;
+typedef enum NumBanks {
+ ADDR_SURF_2_BANK = 0x0,
+ ADDR_SURF_4_BANK = 0x1,
+ ADDR_SURF_8_BANK = 0x2,
+ ADDR_SURF_16_BANK = 0x3,
+} NumBanks;
+typedef enum BankWidth {
+ ADDR_SURF_BANK_WIDTH_1 = 0x0,
+ ADDR_SURF_BANK_WIDTH_2 = 0x1,
+ ADDR_SURF_BANK_WIDTH_4 = 0x2,
+ ADDR_SURF_BANK_WIDTH_8 = 0x3,
+} BankWidth;
+typedef enum BankHeight {
+ ADDR_SURF_BANK_HEIGHT_1 = 0x0,
+ ADDR_SURF_BANK_HEIGHT_2 = 0x1,
+ ADDR_SURF_BANK_HEIGHT_4 = 0x2,
+ ADDR_SURF_BANK_HEIGHT_8 = 0x3,
+} BankHeight;
+typedef enum BankWidthHeight {
+ ADDR_SURF_BANK_WH_1 = 0x0,
+ ADDR_SURF_BANK_WH_2 = 0x1,
+ ADDR_SURF_BANK_WH_4 = 0x2,
+ ADDR_SURF_BANK_WH_8 = 0x3,
+} BankWidthHeight;
+typedef enum MacroTileAspect {
+ ADDR_SURF_MACRO_ASPECT_1 = 0x0,
+ ADDR_SURF_MACRO_ASPECT_2 = 0x1,
+ ADDR_SURF_MACRO_ASPECT_4 = 0x2,
+ ADDR_SURF_MACRO_ASPECT_8 = 0x3,
+} MacroTileAspect;
+typedef enum TCC_CACHE_POLICIES {
+ TCC_CACHE_POLICY_LRU = 0x0,
+ TCC_CACHE_POLICY_STREAM = 0x1,
+ TCC_CACHE_POLICY_BYPASS = 0x2,
+} TCC_CACHE_POLICIES;
+typedef enum PERFMON_COUNTER_MODE {
+ PERFMON_COUNTER_MODE_ACCUM = 0x0,
+ PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x1,
+ PERFMON_COUNTER_MODE_MAX = 0x2,
+ PERFMON_COUNTER_MODE_DIRTY = 0x3,
+ PERFMON_COUNTER_MODE_SAMPLE = 0x4,
+ PERFMON_COUNTER_MODE_CYCLES_SINCE_FIRST_EVENT = 0x5,
+ PERFMON_COUNTER_MODE_CYCLES_SINCE_LAST_EVENT = 0x6,
+ PERFMON_COUNTER_MODE_CYCLES_GE_HI = 0x7,
+ PERFMON_COUNTER_MODE_CYCLES_EQ_HI = 0x8,
+ PERFMON_COUNTER_MODE_INACTIVE_CYCLES = 0x9,
+ PERFMON_COUNTER_MODE_RESERVED = 0xf,
+} PERFMON_COUNTER_MODE;
+typedef enum PERFMON_SPM_MODE {
+ PERFMON_SPM_MODE_OFF = 0x0,
+ PERFMON_SPM_MODE_16BIT_CLAMP = 0x1,
+ PERFMON_SPM_MODE_16BIT_NO_CLAMP = 0x2,
+ PERFMON_SPM_MODE_32BIT_CLAMP = 0x3,
+ PERFMON_SPM_MODE_32BIT_NO_CLAMP = 0x4,
+ PERFMON_SPM_MODE_RESERVED_5 = 0x5,
+ PERFMON_SPM_MODE_RESERVED_6 = 0x6,
+ PERFMON_SPM_MODE_RESERVED_7 = 0x7,
+ PERFMON_SPM_MODE_TEST_MODE_0 = 0x8,
+ PERFMON_SPM_MODE_TEST_MODE_1 = 0x9,
+ PERFMON_SPM_MODE_TEST_MODE_2 = 0xa,
+} PERFMON_SPM_MODE;
+typedef enum SurfaceTiling {
+ ARRAY_LINEAR = 0x0,
+ ARRAY_TILED = 0x1,
+} SurfaceTiling;
+typedef enum SurfaceArray {
+ ARRAY_1D = 0x0,
+ ARRAY_2D = 0x1,
+ ARRAY_3D = 0x2,
+ ARRAY_3D_SLICE = 0x3,
+} SurfaceArray;
+typedef enum ColorArray {
+ ARRAY_2D_ALT_COLOR = 0x0,
+ ARRAY_2D_COLOR = 0x1,
+ ARRAY_3D_SLICE_COLOR = 0x3,
+} ColorArray;
+typedef enum DepthArray {
+ ARRAY_2D_ALT_DEPTH = 0x0,
+ ARRAY_2D_DEPTH = 0x1,
+} DepthArray;
+
+#endif /* DCE_8_0_ENUM_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
index 8a29307344776..c331c9fe7b81c 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dce/dce_8_0_sh_mask.h
@@ -4130,6 +4130,18 @@
#define PHY_AUX_CNTL__AUX_PAD_WAKE__SHIFT 0xe
#define PHY_AUX_CNTL__AUX_PAD_RXSEL_MASK 0x10000
#define PHY_AUX_CNTL__AUX_PAD_RXSEL__SHIFT 0x10
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK_MASK 0x1
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_MASK__SHIFT 0x0
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS_MASK 0x2
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_PD_DIS__SHIFT 0x1
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV_MASK 0x4
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SCL_RECV__SHIFT 0x2
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK_MASK 0x10
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_MASK__SHIFT 0x4
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS_MASK 0x20
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_PD_DIS__SHIFT 0x5
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV_MASK 0x40
+#define DC_GPIO_I2CPAD_MASK__DC_GPIO_SDA_RECV__SHIFT 0x6
#define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A_MASK 0x1
#define DC_GPIO_I2CPAD_A__DC_GPIO_SCL_A__SHIFT 0x0
#define DC_GPIO_I2CPAD_A__DC_GPIO_SDA_A_MASK 0x2
diff --git a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
index 9d4347dd61254..dfe78799100d7 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/gca/gfx_7_2_enum.h
@@ -6225,6 +6225,12 @@ typedef enum TCC_CACHE_POLICIES {
TCC_CACHE_POLICY_STREAM = 0x1,
TCC_CACHE_POLICY_BYPASS = 0x2,
} TCC_CACHE_POLICIES;
+typedef enum MTYPE {
+ MTYPE_NC_NV = 0x0,
+ MTYPE_NC = 0x1,
+ MTYPE_CC = 0x2,
+ MTYPE_UC = 0x3,
+} MTYPE;
typedef enum PERFMON_COUNTER_MODE {
PERFMON_COUNTER_MODE_ACCUM = 0x0,
PERFMON_COUNTER_MODE_ACTIVE_CYCLES = 0x1,
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
new file mode 100644
index 0000000000000..d21c6b14662f3
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/ivsrcid/ivsrcid_vislands30.h
@@ -0,0 +1,102 @@
+/*
+ * Volcanic Islands IV SRC Register documentation
+ *
+ * Copyright (C) 2015 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _IVSRCID_VISLANDS30_H_
+#define _IVSRCID_VISLANDS30_H_
+
+
+// IV Source IDs
+
+#define VISLANDS30_IV_SRCID_D1_V_UPDATE_INT 7 // 0x07
+#define VISLANDS30_IV_EXTID_D1_V_UPDATE_INT 0
+
+#define VISLANDS30_IV_SRCID_D1_GRPH_PFLIP 8 // 0x08
+#define VISLANDS30_IV_EXTID_D1_GRPH_PFLIP 0
+
+#define VISLANDS30_IV_SRCID_D2_V_UPDATE_INT 9 // 0x09
+#define VISLANDS30_IV_EXTID_D2_V_UPDATE_INT 0
+
+#define VISLANDS30_IV_SRCID_D2_GRPH_PFLIP 10 // 0x0a
+#define VISLANDS30_IV_EXTID_D2_GRPH_PFLIP 0
+
+#define VISLANDS30_IV_SRCID_D3_V_UPDATE_INT 11 // 0x0b
+#define VISLANDS30_IV_EXTID_D3_V_UPDATE_INT 0
+
+#define VISLANDS30_IV_SRCID_D3_GRPH_PFLIP 12 // 0x0c
+#define VISLANDS30_IV_EXTID_D3_GRPH_PFLIP 0
+
+#define VISLANDS30_IV_SRCID_D4_V_UPDATE_INT 13 // 0x0d
+#define VISLANDS30_IV_EXTID_D4_V_UPDATE_INT 0
+
+#define VISLANDS30_IV_SRCID_D4_GRPH_PFLIP 14 // 0x0e
+#define VISLANDS30_IV_EXTID_D4_GRPH_PFLIP 0
+
+#define VISLANDS30_IV_SRCID_D5_V_UPDATE_INT 15 // 0x0f
+#define VISLANDS30_IV_EXTID_D5_V_UPDATE_INT 0
+
+#define VISLANDS30_IV_SRCID_D5_GRPH_PFLIP 16 // 0x10
+#define VISLANDS30_IV_EXTID_D5_GRPH_PFLIP 0
+
+#define VISLANDS30_IV_SRCID_D6_V_UPDATE_INT 17 // 0x11
+#define VISLANDS30_IV_EXTID_D6_V_UPDATE_INT 0
+
+#define VISLANDS30_IV_SRCID_D6_GRPH_PFLIP 18 // 0x12
+#define VISLANDS30_IV_EXTID_D6_GRPH_PFLIP 0
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_A 0
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_B 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_B 1
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_C 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_C 2
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_D 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_D 3
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_E 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_E 4
+
+#define VISLANDS30_IV_SRCID_HOTPLUG_DETECT_F 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HOTPLUG_DETECT_F 5
+
+#define VISLANDS30_IV_SRCID_HPD_RX_A 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HPD_RX_A 6
+
+#define VISLANDS30_IV_SRCID_HPD_RX_B 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HPD_RX_B 7
+
+#define VISLANDS30_IV_SRCID_HPD_RX_C 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HPD_RX_C 8
+
+#define VISLANDS30_IV_SRCID_HPD_RX_D 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HPD_RX_D 9
+
+#define VISLANDS30_IV_SRCID_HPD_RX_E 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HPD_RX_E 10
+
+#define VISLANDS30_IV_SRCID_HPD_RX_F 42 // 0x2a
+#define VISLANDS30_IV_EXTID_HPD_RX_F 11
+
+#endif // _IVSRCID_VISLANDS30_H_
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 888250b33ea89..a09d9f352871e 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -221,7 +221,7 @@ struct kgd2kfd_calls {
int (*resume)(struct kfd_dev *kfd);
};
-bool kgd2kfd_init(unsigned interface_version,
+int kgd2kfd_init(unsigned interface_version,
const struct kgd2kfd_calls **g2f);
#endif /* KGD_KFD_INTERFACE_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index 589599f66fcce..9d2290044708d 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -29,6 +29,7 @@
#include "pp_instance.h"
#include "power_state.h"
#include "eventmanager.h"
+#include "pp_debug.h"
#define PP_CHECK(handle) \
do { \
@@ -436,7 +437,10 @@ enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle)
case PP_StateUILabel_Performance:
return POWER_STATE_TYPE_PERFORMANCE;
default:
- return POWER_STATE_TYPE_DEFAULT;
+ if (state->classification.flags & PP_StateClassificationFlag_Boot)
+ return POWER_STATE_TYPE_INTERNAL_BOOT;
+ else
+ return POWER_STATE_TYPE_DEFAULT;
}
}
@@ -538,6 +542,112 @@ static int pp_dpm_get_temperature(void *handle)
return hwmgr->hwmgr_func->get_temperature(hwmgr);
}
+static int pp_dpm_get_pp_num_states(void *handle,
+ struct pp_states_info *data)
+{
+ struct pp_hwmgr *hwmgr;
+ int i;
+
+ if (!handle)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ if (hwmgr == NULL || hwmgr->ps == NULL)
+ return -EINVAL;
+
+ data->nums = hwmgr->num_ps;
+
+ for (i = 0; i < hwmgr->num_ps; i++) {
+ struct pp_power_state *state = (struct pp_power_state *)
+ ((unsigned long)hwmgr->ps + i * hwmgr->ps_size);
+ switch (state->classification.ui_label) {
+ case PP_StateUILabel_Battery:
+ data->states[i] = POWER_STATE_TYPE_BATTERY;
+ break;
+ case PP_StateUILabel_Balanced:
+ data->states[i] = POWER_STATE_TYPE_BALANCED;
+ break;
+ case PP_StateUILabel_Performance:
+ data->states[i] = POWER_STATE_TYPE_PERFORMANCE;
+ break;
+ default:
+ if (state->classification.flags & PP_StateClassificationFlag_Boot)
+ data->states[i] = POWER_STATE_TYPE_INTERNAL_BOOT;
+ else
+ data->states[i] = POWER_STATE_TYPE_DEFAULT;
+ }
+ }
+
+ return 0;
+}
+
+static int pp_dpm_get_pp_table(void *handle, char **table)
+{
+ struct pp_hwmgr *hwmgr;
+
+ if (!handle)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+ hwmgr->hwmgr_func->get_pp_table == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->get_pp_table(hwmgr, table);
+}
+
+static int pp_dpm_set_pp_table(void *handle, const char *buf, size_t size)
+{
+ struct pp_hwmgr *hwmgr;
+
+ if (!handle)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+ hwmgr->hwmgr_func->set_pp_table == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->set_pp_table(hwmgr, buf, size);
+}
+
+static int pp_dpm_force_clock_level(void *handle,
+ enum pp_clock_type type, int level)
+{
+ struct pp_hwmgr *hwmgr;
+
+ if (!handle)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+ hwmgr->hwmgr_func->force_clock_level == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->force_clock_level(hwmgr, type, level);
+}
+
+static int pp_dpm_print_clock_levels(void *handle,
+ enum pp_clock_type type, char *buf)
+{
+ struct pp_hwmgr *hwmgr;
+
+ if (!handle)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ if (hwmgr == NULL || hwmgr->hwmgr_func == NULL ||
+ hwmgr->hwmgr_func->print_clock_levels == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->print_clock_levels(hwmgr, type, buf);
+}
+
const struct amd_powerplay_funcs pp_dpm_funcs = {
.get_temperature = pp_dpm_get_temperature,
.load_firmware = pp_dpm_load_fw,
@@ -555,6 +665,11 @@ const struct amd_powerplay_funcs pp_dpm_funcs = {
.get_fan_control_mode = pp_dpm_get_fan_control_mode,
.set_fan_speed_percent = pp_dpm_set_fan_speed_percent,
.get_fan_speed_percent = pp_dpm_get_fan_speed_percent,
+ .get_pp_num_states = pp_dpm_get_pp_num_states,
+ .get_pp_table = pp_dpm_get_pp_table,
+ .set_pp_table = pp_dpm_set_pp_table,
+ .force_clock_level = pp_dpm_force_clock_level,
+ .print_clock_levels = pp_dpm_print_clock_levels,
};
static int amd_pp_instance_init(struct amd_pp_init *pp_init,
@@ -638,10 +753,10 @@ int amd_powerplay_fini(void *handle)
/* export this function to DAL */
-int amd_powerplay_display_configuration_change(void *handle, const void *input)
+int amd_powerplay_display_configuration_change(void *handle,
+ const struct amd_pp_display_configuration *display_config)
{
struct pp_hwmgr *hwmgr;
- const struct amd_pp_display_configuration *display_config = input;
PP_CHECK((struct pp_instance *)handle);
@@ -653,7 +768,7 @@ int amd_powerplay_display_configuration_change(void *handle, const void *input)
}
int amd_powerplay_get_display_power_level(void *handle,
- struct amd_pp_dal_clock_info *output)
+ struct amd_pp_simple_clock_info *output)
{
struct pp_hwmgr *hwmgr;
@@ -666,3 +781,86 @@ int amd_powerplay_get_display_power_level(void *handle,
return phm_get_dal_power_level(hwmgr, output);
}
+
+int amd_powerplay_get_current_clocks(void *handle,
+ struct amd_pp_clock_info *clocks)
+{
+ struct pp_hwmgr *hwmgr;
+ struct amd_pp_simple_clock_info simple_clocks;
+ struct pp_clock_info hw_clocks;
+
+ PP_CHECK((struct pp_instance *)handle);
+
+ if (clocks == NULL)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ phm_get_dal_power_level(hwmgr, &simple_clocks);
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_PowerContainment)) {
+ if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_PowerContainment))
+ PP_ASSERT_WITH_CODE(0, "Error in PHM_GetPowerContainmentClockInfo", return -1);
+ } else {
+ if (0 != phm_get_clock_info(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks, PHM_PerformanceLevelDesignation_Activity))
+ PP_ASSERT_WITH_CODE(0, "Error in PHM_GetClockInfo", return -1);
+ }
+
+ clocks->min_engine_clock = hw_clocks.min_eng_clk;
+ clocks->max_engine_clock = hw_clocks.max_eng_clk;
+ clocks->min_memory_clock = hw_clocks.min_mem_clk;
+ clocks->max_memory_clock = hw_clocks.max_mem_clk;
+ clocks->min_bus_bandwidth = hw_clocks.min_bus_bandwidth;
+ clocks->max_bus_bandwidth = hw_clocks.max_bus_bandwidth;
+
+ clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk;
+ clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk;
+
+ clocks->max_clocks_state = simple_clocks.level;
+
+ if (0 == phm_get_current_shallow_sleep_clocks(hwmgr, &hwmgr->current_ps->hardware, &hw_clocks)) {
+ clocks->max_engine_clock_in_sr = hw_clocks.max_eng_clk;
+ clocks->min_engine_clock_in_sr = hw_clocks.min_eng_clk;
+ }
+
+ return 0;
+
+}
+
+int amd_powerplay_get_clock_by_type(void *handle, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks)
+{
+ int result = -1;
+
+ struct pp_hwmgr *hwmgr;
+
+ PP_CHECK((struct pp_instance *)handle);
+
+ if (clocks == NULL)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ result = phm_get_clock_by_type(hwmgr, type, clocks);
+
+ return result;
+}
+
+int amd_powerplay_get_display_mode_validation_clocks(void *handle,
+ struct amd_pp_simple_clock_info *clocks)
+{
+ int result = -1;
+ struct pp_hwmgr *hwmgr;
+
+ PP_CHECK((struct pp_instance *)handle);
+
+ if (clocks == NULL)
+ return -EINVAL;
+
+ hwmgr = ((struct pp_instance *)handle)->hwmgr;
+
+ if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_DynamicPatchPowerState))
+ result = phm_get_max_high_clocks(hwmgr, clocks);
+
+ return result;
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
index cf01177ca3b5e..5682490337e3d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_hwmgr.c
@@ -241,6 +241,11 @@ static int cz_initialize_dpm_defaults(struct pp_hwmgr *hwmgr)
phm_cap_set(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_DynamicUVDState);
+ phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_UVDDPM);
+ phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+ PHM_PlatformCaps_VCEDPM);
+
cz_hwmgr->cc6_settings.cpu_cc6_disable = false;
cz_hwmgr->cc6_settings.cpu_pstate_disable = false;
cz_hwmgr->cc6_settings.nb_pstate_switch_disable = false;
@@ -733,7 +738,6 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
unsigned long clock = 0;
unsigned long level;
unsigned long stable_pstate_sclk;
- struct PP_Clocks clocks;
unsigned long percentage;
cz_hwmgr->sclk_dpm.soft_min_clk = table->entries[0].clk;
@@ -744,8 +748,10 @@ static int cz_tf_update_sclk_limit(struct pp_hwmgr *hwmgr,
else
cz_hwmgr->sclk_dpm.soft_max_clk = table->entries[table->count - 1].clk;
- /*PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks);*/
- clock = clocks.engineClock;
+ clock = hwmgr->display_config.min_core_set_clock;
+;
+ if (clock == 0)
+ printk(KERN_INFO "[ powerplay ] min_core_set_clock not set\n");
if (cz_hwmgr->sclk_dpm.hard_min_clk != clock) {
cz_hwmgr->sclk_dpm.hard_min_clk = clock;
@@ -901,9 +907,9 @@ static int cz_tf_update_low_mem_pstate(struct pp_hwmgr *hwmgr,
if (pnew_state->action == FORCE_HIGH)
cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch);
- else if(pnew_state->action == CANCEL_FORCE_HIGH)
- cz_nbdpm_pstate_enable_disable(hwmgr, false, disable_switch);
- else
+ else if (pnew_state->action == CANCEL_FORCE_HIGH)
+ cz_nbdpm_pstate_enable_disable(hwmgr, true, disable_switch);
+ else
cz_nbdpm_pstate_enable_disable(hwmgr, enable_low_mem_state, disable_switch);
}
return 0;
@@ -1128,9 +1134,10 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
cast_const_PhwCzPowerState(&pcurrent_ps->hardware);
struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
- struct PP_Clocks clocks;
+ struct PP_Clocks clocks = {0, 0, 0, 0};
bool force_high;
- unsigned long num_of_active_displays = 4;
+ uint32_t num_of_active_displays = 0;
+ struct cgs_display_info info = {0};
cz_ps->evclk = hwmgr->vce_arbiter.evclk;
cz_ps->ecclk = hwmgr->vce_arbiter.ecclk;
@@ -1142,12 +1149,15 @@ static int cz_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
cz_hwmgr->battery_state = (PP_StateUILabel_Battery == prequest_ps->classification.ui_label);
- /* to do PECI_GetMinClockSettings(pHwMgr->pPECI, &clocks); */
- /* PECI_GetNumberOfActiveDisplays(pHwMgr->pPECI, &numOfActiveDisplays); */
+ clocks.memoryClock = hwmgr->display_config.min_mem_set_clock != 0 ?
+ hwmgr->display_config.min_mem_set_clock :
+ cz_hwmgr->sys_info.nbp_memory_clock[1];
+
+ cgs_get_active_displays_info(hwmgr->device, &info);
+ num_of_active_displays = info.display_count;
+
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps, PHM_PlatformCaps_StablePState))
clocks.memoryClock = hwmgr->dyn_state.max_clock_voltage_on_ac.mclk;
- else
- clocks.memoryClock = 0;
if (clocks.memoryClock < hwmgr->gfx_arbiter.mclk)
clocks.memoryClock = hwmgr->gfx_arbiter.mclk;
@@ -1217,6 +1227,7 @@ static int cz_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
printk(KERN_ERR "[ powerplay ] Fail to construct set_power_state\n");
return result;
}
+ hwmgr->platform_descriptor.hardwareActivityPerformanceLevels = CZ_MAX_HARDWARE_POWERLEVELS;
result = phm_construct_table(hwmgr, &cz_phm_enable_clock_power_gatings_master, &(hwmgr->enable_clock_power_gatings));
if (result != 0) {
@@ -1648,10 +1659,10 @@ static void cz_hw_print_display_cfg(
& PWRMGT_SEPARATION_TIME_MASK)
<< PWRMGT_SEPARATION_TIME_SHIFT;
- data|= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0)
+ data |= (hw_data->cc6_settings.cpu_cc6_disable ? 0x1 : 0x0)
<< PWRMGT_DISABLE_CPU_CSTATES_SHIFT;
- data|= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0)
+ data |= (hw_data->cc6_settings.cpu_pstate_disable ? 0x1 : 0x0)
<< PWRMGT_DISABLE_CPU_PSTATES_SHIFT;
PP_DBG_LOG("SetDisplaySizePowerParams data: 0x%X\n",
@@ -1666,9 +1677,9 @@ static void cz_hw_print_display_cfg(
}
- static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
+static int cz_store_cc6_data(struct pp_hwmgr *hwmgr, uint32_t separation_time,
bool cc6_disable, bool pstate_disable, bool pstate_switch_disable)
- {
+{
struct cz_hwmgr *hw_data = (struct cz_hwmgr *)(hwmgr->backend);
if (separation_time !=
@@ -1696,20 +1707,19 @@ static void cz_hw_print_display_cfg(
return 0;
}
- static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr,
- struct amd_pp_dal_clock_info*info)
+static int cz_get_dal_power_level(struct pp_hwmgr *hwmgr,
+ struct amd_pp_simple_clock_info *info)
{
uint32_t i;
- const struct phm_clock_voltage_dependency_table * table =
+ const struct phm_clock_voltage_dependency_table *table =
hwmgr->dyn_state.vddc_dep_on_dal_pwrl;
- const struct phm_clock_and_voltage_limits* limits =
+ const struct phm_clock_and_voltage_limits *limits =
&hwmgr->dyn_state.max_clock_voltage_on_ac;
info->engine_max_clock = limits->sclk;
info->memory_max_clock = limits->mclk;
for (i = table->count - 1; i > 0; i--) {
-
if (limits->vddc >= table->entries[i].v) {
info->level = table->entries[i].clk;
return 0;
@@ -1718,6 +1728,158 @@ static void cz_hw_print_display_cfg(
return -EINVAL;
}
+static int cz_force_clock_level(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, int level)
+{
+ if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+ return -EINVAL;
+
+ switch (type) {
+ case PP_SCLK:
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_SetSclkSoftMin,
+ (1 << level));
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_SetSclkSoftMax,
+ (1 << level));
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int cz_print_clock_levels(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, char *buf)
+{
+ struct phm_clock_voltage_dependency_table *sclk_table =
+ hwmgr->dyn_state.vddc_dependency_on_sclk;
+ int i, now, size = 0;
+
+ switch (type) {
+ case PP_SCLK:
+ now = PHM_GET_FIELD(cgs_read_ind_register(hwmgr->device,
+ CGS_IND_REG__SMC,
+ ixTARGET_AND_CURRENT_PROFILE_INDEX),
+ TARGET_AND_CURRENT_PROFILE_INDEX,
+ CURR_SCLK_INDEX);
+
+ for (i = 0; i < sclk_table->count; i++)
+ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, sclk_table->entries[i].clk / 100,
+ (i == now) ? "*" : "");
+ break;
+ default:
+ break;
+ }
+ return size;
+}
+
+static int cz_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+ PHM_PerformanceLevelDesignation designation, uint32_t index,
+ PHM_PerformanceLevel *level)
+{
+ const struct cz_power_state *ps;
+ struct cz_hwmgr *data;
+ uint32_t level_index;
+ uint32_t i;
+
+ if (level == NULL || hwmgr == NULL || state == NULL)
+ return -EINVAL;
+
+ data = (struct cz_hwmgr *)(hwmgr->backend);
+ ps = cast_const_PhwCzPowerState(state);
+
+ level_index = index > ps->level - 1 ? ps->level - 1 : index;
+
+ level->coreClock = ps->levels[level_index].engineClock;
+
+ if (designation == PHM_PerformanceLevelDesignation_PowerContainment) {
+ for (i = 1; i < ps->level; i++) {
+ if (ps->levels[i].engineClock > data->dce_slow_sclk_threshold) {
+ level->coreClock = ps->levels[i].engineClock;
+ break;
+ }
+ }
+ }
+
+ if (level_index == 0)
+ level->memory_clock = data->sys_info.nbp_memory_clock[CZ_NUM_NBPMEMORYCLOCK - 1];
+ else
+ level->memory_clock = data->sys_info.nbp_memory_clock[0];
+
+ level->vddc = (cz_convert_8Bit_index_to_voltage(hwmgr, ps->levels[level_index].vddcIndex) + 2) / 4;
+ level->nonLocalMemoryFreq = 0;
+ level->nonLocalMemoryWidth = 0;
+
+ return 0;
+}
+
+static int cz_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr,
+ const struct pp_hw_power_state *state, struct pp_clock_info *clock_info)
+{
+ const struct cz_power_state *ps = cast_const_PhwCzPowerState(state);
+
+ clock_info->min_eng_clk = ps->levels[0].engineClock / (1 << (ps->levels[0].ssDividerIndex));
+ clock_info->max_eng_clk = ps->levels[ps->level - 1].engineClock / (1 << (ps->levels[ps->level - 1].ssDividerIndex));
+
+ return 0;
+}
+
+static int cz_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type,
+ struct amd_pp_clocks *clocks)
+{
+ struct cz_hwmgr *data = (struct cz_hwmgr *)(hwmgr->backend);
+ int i;
+ struct phm_clock_voltage_dependency_table *table;
+
+ clocks->count = cz_get_max_sclk_level(hwmgr);
+ switch (type) {
+ case amd_pp_disp_clock:
+ for (i = 0; i < clocks->count; i++)
+ clocks->clock[i] = data->sys_info.display_clock[i];
+ break;
+ case amd_pp_sys_clock:
+ table = hwmgr->dyn_state.vddc_dependency_on_sclk;
+ for (i = 0; i < clocks->count; i++)
+ clocks->clock[i] = table->entries[i].clk;
+ break;
+ case amd_pp_mem_clock:
+ clocks->count = CZ_NUM_NBPMEMORYCLOCK;
+ for (i = 0; i < clocks->count; i++)
+ clocks->clock[i] = data->sys_info.nbp_memory_clock[clocks->count - 1 - i];
+ break;
+ default:
+ return -1;
+ }
+
+ return 0;
+}
+
+static int cz_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks)
+{
+ struct phm_clock_voltage_dependency_table *table =
+ hwmgr->dyn_state.vddc_dependency_on_sclk;
+ unsigned long level;
+ const struct phm_clock_and_voltage_limits *limits =
+ &hwmgr->dyn_state.max_clock_voltage_on_ac;
+
+ if ((NULL == table) || (table->count <= 0) || (clocks == NULL))
+ return -EINVAL;
+
+ level = cz_get_max_sclk_level(hwmgr) - 1;
+
+ if (level < table->count)
+ clocks->engine_max_clock = table->entries[level].clk;
+ else
+ clocks->engine_max_clock = table->entries[table->count - 1].clk;
+
+ clocks->memory_max_clock = limits->mclk;
+
+ return 0;
+}
+
static const struct pp_hwmgr_func cz_hwmgr_funcs = {
.backend_init = cz_hwmgr_backend_init,
.backend_fini = cz_hwmgr_backend_fini,
@@ -1736,7 +1898,13 @@ static const struct pp_hwmgr_func cz_hwmgr_funcs = {
.print_current_perforce_level = cz_print_current_perforce_level,
.set_cpu_power_state = cz_set_cpu_power_state,
.store_cc6_data = cz_store_cc6_data,
- .get_dal_power_level= cz_get_dal_power_level,
+ .force_clock_level = cz_force_clock_level,
+ .print_clock_levels = cz_print_clock_levels,
+ .get_dal_power_level = cz_get_dal_power_level,
+ .get_performance_level = cz_get_performance_level,
+ .get_current_shallow_sleep_clocks = cz_get_current_shallow_sleep_clocks,
+ .get_clock_by_type = cz_get_clock_by_type,
+ .get_max_high_clocks = cz_get_max_high_clocks,
};
int cz_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
index 28031a7eddba6..51dedf84623c6 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.c
@@ -4275,7 +4275,6 @@ static int fiji_populate_and_upload_sclk_mclk_dpm_levels(
if (data->need_update_smu7_dpm_table & DPMTABLE_OD_UPDATE_MCLK) {
dpm_table->mclk_table.dpm_levels
[dpm_table->mclk_table.count - 1].value = mclk;
-
if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
PHM_PlatformCaps_OD6PlusinACSupport) ||
phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -4886,6 +4885,10 @@ static void fiji_print_current_perforce_level(
activity_percent >>= 8;
seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent);
+
+ seq_printf(m, "uvd %sabled\n", data->uvd_power_gated ? "dis" : "en");
+
+ seq_printf(m, "vce %sabled\n", data->vce_power_gated ? "dis" : "en");
}
static int fiji_program_display_gap(struct pp_hwmgr *hwmgr)
@@ -5073,6 +5076,125 @@ static int fiji_get_fan_control_mode(struct pp_hwmgr *hwmgr)
CG_FDO_CTRL2, FDO_PWM_MODE);
}
+static int fiji_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
+{
+ struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+ *table = (char *)&data->smc_state_table;
+
+ return sizeof(struct SMU73_Discrete_DpmTable);
+}
+
+static int fiji_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
+{
+ struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+ void *table = (void *)&data->smc_state_table;
+
+ memcpy(table, buf, size);
+
+ return 0;
+}
+
+static int fiji_force_clock_level(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, int level)
+{
+ struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+
+ if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+ return -EINVAL;
+
+ switch (type) {
+ case PP_SCLK:
+ if (!data->sclk_dpm_key_disabled)
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_SCLKDPM_SetEnabledMask,
+ (1 << level));
+ break;
+ case PP_MCLK:
+ if (!data->mclk_dpm_key_disabled)
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_MCLKDPM_SetEnabledMask,
+ (1 << level));
+ break;
+ case PP_PCIE:
+ if (!data->pcie_dpm_key_disabled)
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_PCIeDPM_ForceLevel,
+ (1 << level));
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int fiji_print_clock_levels(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, char *buf)
+{
+ struct fiji_hwmgr *data = (struct fiji_hwmgr *)(hwmgr->backend);
+ struct fiji_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+ struct fiji_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+ struct fiji_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table);
+ int i, now, size = 0;
+ uint32_t clock, pcie_speed;
+
+ switch (type) {
+ case PP_SCLK:
+ smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency);
+ clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+ for (i = 0; i < sclk_table->count; i++) {
+ if (clock > sclk_table->dpm_levels[i].value)
+ continue;
+ break;
+ }
+ now = i;
+
+ for (i = 0; i < sclk_table->count; i++)
+ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, sclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+ case PP_MCLK:
+ smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency);
+ clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+ for (i = 0; i < mclk_table->count; i++) {
+ if (clock > mclk_table->dpm_levels[i].value)
+ continue;
+ break;
+ }
+ now = i;
+
+ for (i = 0; i < mclk_table->count; i++)
+ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, mclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+ case PP_PCIE:
+ pcie_speed = fiji_get_current_pcie_speed(hwmgr);
+ for (i = 0; i < pcie_table->count; i++) {
+ if (pcie_speed != pcie_table->dpm_levels[i].value)
+ continue;
+ break;
+ }
+ now = i;
+
+ for (i = 0; i < pcie_table->count; i++)
+ size += sprintf(buf + size, "%d: %s %s\n", i,
+ (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
+ (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+ (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+ (i == now) ? "*" : "");
+ break;
+ default:
+ break;
+ }
+ return size;
+}
+
static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
.backend_init = &fiji_hwmgr_backend_init,
.backend_fini = &tonga_hwmgr_backend_fini,
@@ -5108,6 +5230,10 @@ static const struct pp_hwmgr_func fiji_hwmgr_funcs = {
.register_internal_thermal_interrupt = fiji_register_internal_thermal_interrupt,
.set_fan_control_mode = fiji_set_fan_control_mode,
.get_fan_control_mode = fiji_get_fan_control_mode,
+ .get_pp_table = fiji_get_pp_table,
+ .set_pp_table = fiji_set_pp_table,
+ .force_clock_level = fiji_force_clock_level,
+ .print_clock_levels = fiji_print_clock_levels,
};
int fiji_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
index 22e273b1c1c57..a16f7cd4c238f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/fiji_hwmgr.h
@@ -29,6 +29,7 @@
#include "smu73_discrete.h"
#include "ppatomctrl.h"
#include "fiji_ppsmc.h"
+#include "pp_endian.h"
#define FIJI_MAX_HARDWARE_POWERLEVELS 2
#define FIJI_AT_DFLT 30
@@ -347,15 +348,4 @@ int fiji_update_samu_dpm(struct pp_hwmgr *hwmgr, bool bgate);
int fiji_update_acp_dpm(struct pp_hwmgr *hwmgr, bool bgate);
int fiji_enable_disable_vce_dpm(struct pp_hwmgr *hwmgr, bool enable);
-#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
-#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
-
-#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
-#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
-
-#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
-#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
-
-#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
-
#endif /* _FIJI_HWMGR_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
index 9deadabbc81c6..72cfecc4f9f72 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/functiontables.c
@@ -34,6 +34,11 @@ static int phm_run_table(struct pp_hwmgr *hwmgr,
int result = 0;
phm_table_function *function;
+ if (rt_table->function_list == NULL) {
+ printk(KERN_INFO "[ powerplay ] this function not implement!\n");
+ return 0;
+ }
+
for (function = rt_table->function_list; NULL != *function; function++) {
int tmp = (*function)(hwmgr, input, output, temp_storage, result);
@@ -57,9 +62,9 @@ int phm_dispatch_table(struct pp_hwmgr *hwmgr,
int result = 0;
void *temp_storage = NULL;
- if (hwmgr == NULL || rt_table == NULL || rt_table->function_list == NULL) {
+ if (hwmgr == NULL || rt_table == NULL) {
printk(KERN_ERR "[ powerplay ] Invalid Parameter!\n");
- return 0; /*temp return ture because some function not implement on some asic */
+ return -EINVAL;
}
if (0 != rt_table->storage_size) {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 0f2d5e4bc241a..be31bed2538ad 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -26,7 +26,7 @@
#include "power_state.h"
#include "pp_acpi.h"
#include "amd_acpi.h"
-#include "amd_powerplay.h"
+#include "pp_debug.h"
#define PHM_FUNC_CHECK(hw) \
do { \
@@ -313,13 +313,12 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
}
int phm_get_dal_power_level(struct pp_hwmgr *hwmgr,
- struct amd_pp_dal_clock_info *info)
+ struct amd_pp_simple_clock_info *info)
{
PHM_FUNC_CHECK(hwmgr);
if (info == NULL || hwmgr->hwmgr_func->get_dal_power_level == NULL)
return -EINVAL;
-
return hwmgr->hwmgr_func->get_dal_power_level(hwmgr, info);
}
@@ -332,3 +331,91 @@ int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr)
return 0;
}
+
+
+int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+ PHM_PerformanceLevelDesignation designation, uint32_t index,
+ PHM_PerformanceLevel *level)
+{
+ PHM_FUNC_CHECK(hwmgr);
+ if (hwmgr->hwmgr_func->get_performance_level == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->get_performance_level(hwmgr, state, designation, index, level);
+
+
+}
+
+
+/**
+* Gets Clock Info.
+*
+* @param pHwMgr the address of the powerplay hardware manager.
+* @param pPowerState the address of the Power State structure.
+* @param pClockInfo the address of PP_ClockInfo structure where the result will be returned.
+* @exception PP_Result_Failed if any of the paramters is NULL, otherwise the return value from the back-end.
+*/
+int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *pclock_info,
+ PHM_PerformanceLevelDesignation designation)
+{
+ int result;
+ PHM_PerformanceLevel performance_level;
+
+ PHM_FUNC_CHECK(hwmgr);
+
+ PP_ASSERT_WITH_CODE((NULL != state), "Invalid Input!", return -EINVAL);
+ PP_ASSERT_WITH_CODE((NULL != pclock_info), "Invalid Input!", return -EINVAL);
+
+ result = phm_get_performance_level(hwmgr, state, PHM_PerformanceLevelDesignation_Activity, 0, &performance_level);
+
+ PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve minimum clocks.", return result);
+
+
+ pclock_info->min_mem_clk = performance_level.memory_clock;
+ pclock_info->min_eng_clk = performance_level.coreClock;
+ pclock_info->min_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth;
+
+
+ result = phm_get_performance_level(hwmgr, state, designation,
+ (hwmgr->platform_descriptor.hardwareActivityPerformanceLevels - 1), &performance_level);
+
+ PP_ASSERT_WITH_CODE((0 == result), "Failed to retrieve maximum clocks.", return result);
+
+ pclock_info->max_mem_clk = performance_level.memory_clock;
+ pclock_info->max_eng_clk = performance_level.coreClock;
+ pclock_info->max_bus_bandwidth = performance_level.nonLocalMemoryFreq * performance_level.nonLocalMemoryWidth;
+
+ return 0;
+}
+
+int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info)
+{
+ PHM_FUNC_CHECK(hwmgr);
+
+ if (hwmgr->hwmgr_func->get_current_shallow_sleep_clocks == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->get_current_shallow_sleep_clocks(hwmgr, state, clock_info);
+
+}
+
+int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks)
+{
+ PHM_FUNC_CHECK(hwmgr);
+
+ if (hwmgr->hwmgr_func->get_clock_by_type == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->get_clock_by_type(hwmgr, type, clocks);
+
+}
+
+int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks)
+{
+ PHM_FUNC_CHECK(hwmgr);
+
+ if (hwmgr->hwmgr_func->get_max_high_clocks == NULL)
+ return -EINVAL;
+
+ return hwmgr->hwmgr_func->get_max_high_clocks(hwmgr, clocks);
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
index b7429a5278289..b10df328d58cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppevvmath.h
@@ -293,7 +293,7 @@ fInt GetScaledFraction(int X, int factor)
}
if (factor == 1)
- return (ConvertToFraction(X));
+ return ConvertToFraction(X);
fValue = fDivide(ConvertToFraction(X * uPow(-1, bNEGATED)), ConvertToFraction(factor));
@@ -371,7 +371,7 @@ fInt fDivide (fInt X, fInt Y)
fZERO = ConvertToFraction(0);
if (Equal(Y, fZERO))
- return fZERO;
+ return fZERO;
longlongX = (int64_t)X.full;
longlongY = (int64_t)Y.full;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
index 980d3bf8ea768..0d5d8372953ee 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.c
@@ -5185,7 +5185,6 @@ tonga_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m)
mclk = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
seq_printf(m, "\n [ mclk ]: %u MHz\n\n [ sclk ]: %u MHz\n", mclk/100, sclk/100);
-
offset = data->soft_regs_start + offsetof(SMU72_SoftRegisters, AverageGraphicsActivity);
activity_percent = cgs_read_ind_register(hwmgr->device, CGS_IND_REG__SMC, offset);
activity_percent += 0x80;
@@ -5193,6 +5192,9 @@ tonga_print_current_perforce_level(struct pp_hwmgr *hwmgr, struct seq_file *m)
seq_printf(m, "\n [GPU load]: %u%%\n\n", activity_percent > 100 ? 100 : activity_percent);
+ seq_printf(m, "uvd %sabled\n", data->uvd_power_gated ? "dis" : "en");
+
+ seq_printf(m, "vce %sabled\n", data->vce_power_gated ? "dis" : "en");
}
static int tonga_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, const void *input)
@@ -6033,6 +6035,125 @@ static int tonga_get_fan_control_mode(struct pp_hwmgr *hwmgr)
CG_FDO_CTRL2, FDO_PWM_MODE);
}
+static int tonga_get_pp_table(struct pp_hwmgr *hwmgr, char **table)
+{
+ struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+
+ *table = (char *)&data->smc_state_table;
+
+ return sizeof(struct SMU72_Discrete_DpmTable);
+}
+
+static int tonga_set_pp_table(struct pp_hwmgr *hwmgr, const char *buf, size_t size)
+{
+ struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+
+ void *table = (void *)&data->smc_state_table;
+
+ memcpy(table, buf, size);
+
+ return 0;
+}
+
+static int tonga_force_clock_level(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, int level)
+{
+ struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+
+ if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL)
+ return -EINVAL;
+
+ switch (type) {
+ case PP_SCLK:
+ if (!data->sclk_dpm_key_disabled)
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_SCLKDPM_SetEnabledMask,
+ (1 << level));
+ break;
+ case PP_MCLK:
+ if (!data->mclk_dpm_key_disabled)
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_MCLKDPM_SetEnabledMask,
+ (1 << level));
+ break;
+ case PP_PCIE:
+ if (!data->pcie_dpm_key_disabled)
+ smum_send_msg_to_smc_with_parameter(hwmgr->smumgr,
+ PPSMC_MSG_PCIeDPM_ForceLevel,
+ (1 << level));
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static int tonga_print_clock_levels(struct pp_hwmgr *hwmgr,
+ enum pp_clock_type type, char *buf)
+{
+ struct tonga_hwmgr *data = (struct tonga_hwmgr *)(hwmgr->backend);
+ struct tonga_single_dpm_table *sclk_table = &(data->dpm_table.sclk_table);
+ struct tonga_single_dpm_table *mclk_table = &(data->dpm_table.mclk_table);
+ struct tonga_single_dpm_table *pcie_table = &(data->dpm_table.pcie_speed_table);
+ int i, now, size = 0;
+ uint32_t clock, pcie_speed;
+
+ switch (type) {
+ case PP_SCLK:
+ smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetSclkFrequency);
+ clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+ for (i = 0; i < sclk_table->count; i++) {
+ if (clock > sclk_table->dpm_levels[i].value)
+ continue;
+ break;
+ }
+ now = i;
+
+ for (i = 0; i < sclk_table->count; i++)
+ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, sclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+ case PP_MCLK:
+ smum_send_msg_to_smc(hwmgr->smumgr, PPSMC_MSG_API_GetMclkFrequency);
+ clock = cgs_read_register(hwmgr->device, mmSMC_MSG_ARG_0);
+
+ for (i = 0; i < mclk_table->count; i++) {
+ if (clock > mclk_table->dpm_levels[i].value)
+ continue;
+ break;
+ }
+ now = i;
+
+ for (i = 0; i < mclk_table->count; i++)
+ size += sprintf(buf + size, "%d: %uMhz %s\n",
+ i, mclk_table->dpm_levels[i].value / 100,
+ (i == now) ? "*" : "");
+ break;
+ case PP_PCIE:
+ pcie_speed = tonga_get_current_pcie_speed(hwmgr);
+ for (i = 0; i < pcie_table->count; i++) {
+ if (pcie_speed != pcie_table->dpm_levels[i].value)
+ continue;
+ break;
+ }
+ now = i;
+
+ for (i = 0; i < pcie_table->count; i++)
+ size += sprintf(buf + size, "%d: %s %s\n", i,
+ (pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x8" :
+ (pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+ (pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+ (i == now) ? "*" : "");
+ break;
+ default:
+ break;
+ }
+ return size;
+}
+
static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
.backend_init = &tonga_hwmgr_backend_init,
.backend_fini = &tonga_hwmgr_backend_fini,
@@ -6070,6 +6191,10 @@ static const struct pp_hwmgr_func tonga_hwmgr_funcs = {
.check_states_equal = tonga_check_states_equal,
.set_fan_control_mode = tonga_set_fan_control_mode,
.get_fan_control_mode = tonga_get_fan_control_mode,
+ .get_pp_table = tonga_get_pp_table,
+ .set_pp_table = tonga_set_pp_table,
+ .force_clock_level = tonga_force_clock_level,
+ .print_clock_levels = tonga_print_clock_levels,
};
int tonga_hwmgr_init(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
index 49168d262cccf..f88d3bbe66712 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/tonga_hwmgr.h
@@ -28,6 +28,7 @@
#include "ppatomctrl.h"
#include "ppinterrupt.h"
#include "tonga_powertune.h"
+#include "pp_endian.h"
#define TONGA_MAX_HARDWARE_POWERLEVELS 2
#define TONGA_DYNCLK_NUMBER_OF_TREND_COEFFICIENTS 15
@@ -386,17 +387,6 @@ typedef struct tonga_hwmgr tonga_hwmgr;
#define TONGA_UNUSED_GPIO_PIN 0x7F
-#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
-#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
-
-#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
-#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
-
-#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
-#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
-
-#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
-
int tonga_hwmgr_init(struct pp_hwmgr *hwmgr);
int tonga_update_vce_dpm(struct pp_hwmgr *hwmgr, const void *input);
int tonga_update_uvd_dpm(struct pp_hwmgr *hwmgr, bool bgate);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
index e61a3e67852e2..7255f7ddf93a2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/amd_powerplay.h
@@ -29,6 +29,7 @@
#include "amd_shared.h"
#include "cgs_common.h"
+
enum amd_pp_event {
AMD_PP_EVENT_INITIALIZE = 0,
AMD_PP_EVENT_UNINITIALIZE,
@@ -123,6 +124,7 @@ enum amd_dpm_forced_level {
AMD_DPM_FORCED_LEVEL_AUTO = 0,
AMD_DPM_FORCED_LEVEL_LOW = 1,
AMD_DPM_FORCED_LEVEL_HIGH = 2,
+ AMD_DPM_FORCED_LEVEL_MANUAL = 3,
};
struct amd_pp_init {
@@ -212,12 +214,55 @@ struct amd_pp_display_configuration {
uint32_t dce_tolerable_mclk_in_active_latency;
};
-struct amd_pp_dal_clock_info {
+struct amd_pp_simple_clock_info {
uint32_t engine_max_clock;
uint32_t memory_max_clock;
uint32_t level;
};
+enum PP_DAL_POWERLEVEL {
+ PP_DAL_POWERLEVEL_INVALID = 0,
+ PP_DAL_POWERLEVEL_ULTRALOW,
+ PP_DAL_POWERLEVEL_LOW,
+ PP_DAL_POWERLEVEL_NOMINAL,
+ PP_DAL_POWERLEVEL_PERFORMANCE,
+
+ PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW,
+ PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW,
+ PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL,
+ PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE,
+ PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1,
+ PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1,
+ PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1,
+ PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1,
+};
+
+struct amd_pp_clock_info {
+ uint32_t min_engine_clock;
+ uint32_t max_engine_clock;
+ uint32_t min_memory_clock;
+ uint32_t max_memory_clock;
+ uint32_t min_bus_bandwidth;
+ uint32_t max_bus_bandwidth;
+ uint32_t max_engine_clock_in_sr;
+ uint32_t min_engine_clock_in_sr;
+ enum PP_DAL_POWERLEVEL max_clocks_state;
+};
+
+enum amd_pp_clock_type {
+ amd_pp_disp_clock = 1,
+ amd_pp_sys_clock,
+ amd_pp_mem_clock
+};
+
+#define MAX_NUM_CLOCKS 16
+
+struct amd_pp_clocks {
+ uint32_t count;
+ uint32_t clock[MAX_NUM_CLOCKS];
+};
+
+
enum {
PP_GROUP_UNKNOWN = 0,
PP_GROUP_GFX = 1,
@@ -225,6 +270,17 @@ enum {
PP_GROUP_MAX
};
+enum pp_clock_type {
+ PP_SCLK,
+ PP_MCLK,
+ PP_PCIE,
+};
+
+struct pp_states_info {
+ uint32_t nums;
+ uint32_t states[16];
+};
+
#define PP_GROUP_MASK 0xF0000000
#define PP_GROUP_SHIFT 28
@@ -278,6 +334,11 @@ struct amd_powerplay_funcs {
int (*get_fan_control_mode)(void *handle);
int (*set_fan_speed_percent)(void *handle, uint32_t percent);
int (*get_fan_speed_percent)(void *handle, uint32_t *speed);
+ int (*get_pp_num_states)(void *handle, struct pp_states_info *data);
+ int (*get_pp_table)(void *handle, char **table);
+ int (*set_pp_table)(void *handle, const char *buf, size_t size);
+ int (*force_clock_level)(void *handle, enum pp_clock_type type, int level);
+ int (*print_clock_levels)(void *handle, enum pp_clock_type type, char *buf);
};
struct amd_powerplay {
@@ -288,12 +349,23 @@ struct amd_powerplay {
int amd_powerplay_init(struct amd_pp_init *pp_init,
struct amd_powerplay *amd_pp);
+
int amd_powerplay_fini(void *handle);
-int amd_powerplay_display_configuration_change(void *handle, const void *input);
+int amd_powerplay_display_configuration_change(void *handle,
+ const struct amd_pp_display_configuration *input);
int amd_powerplay_get_display_power_level(void *handle,
- struct amd_pp_dal_clock_info *output);
+ struct amd_pp_simple_clock_info *output);
+
+int amd_powerplay_get_current_clocks(void *handle,
+ struct amd_pp_clock_info *output);
+
+int amd_powerplay_get_clock_by_type(void *handle,
+ enum amd_pp_clock_type type,
+ struct amd_pp_clocks *clocks);
+int amd_powerplay_get_display_mode_validation_clocks(void *handle,
+ struct amd_pp_simple_clock_info *output);
#endif /* _AMD_POWERPLAY_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 91795efe13365..040d3f7cbf490 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -31,6 +31,7 @@ struct pp_power_state;
enum amd_dpm_forced_level;
struct PP_TemperatureRange;
+
struct phm_fan_speed_info {
uint32_t min_percent;
uint32_t max_percent;
@@ -290,6 +291,15 @@ struct PP_Clocks {
uint32_t engineClockInSR;
};
+struct pp_clock_info {
+ uint32_t min_mem_clk;
+ uint32_t max_mem_clk;
+ uint32_t min_eng_clk;
+ uint32_t max_eng_clk;
+ uint32_t min_bus_bandwidth;
+ uint32_t max_bus_bandwidth;
+};
+
struct phm_platform_descriptor {
uint32_t platformCaps[PHM_MAX_NUM_CAPS_ULONG_ENTRIES];
uint32_t vbiosInterruptId;
@@ -323,24 +333,6 @@ struct phm_clocks {
uint32_t clock[MAX_NUM_CLOCKS];
};
-enum PP_DAL_POWERLEVEL {
- PP_DAL_POWERLEVEL_INVALID = 0,
- PP_DAL_POWERLEVEL_ULTRALOW,
- PP_DAL_POWERLEVEL_LOW,
- PP_DAL_POWERLEVEL_NOMINAL,
- PP_DAL_POWERLEVEL_PERFORMANCE,
-
- PP_DAL_POWERLEVEL_0 = PP_DAL_POWERLEVEL_ULTRALOW,
- PP_DAL_POWERLEVEL_1 = PP_DAL_POWERLEVEL_LOW,
- PP_DAL_POWERLEVEL_2 = PP_DAL_POWERLEVEL_NOMINAL,
- PP_DAL_POWERLEVEL_3 = PP_DAL_POWERLEVEL_PERFORMANCE,
- PP_DAL_POWERLEVEL_4 = PP_DAL_POWERLEVEL_3+1,
- PP_DAL_POWERLEVEL_5 = PP_DAL_POWERLEVEL_4+1,
- PP_DAL_POWERLEVEL_6 = PP_DAL_POWERLEVEL_5+1,
- PP_DAL_POWERLEVEL_7 = PP_DAL_POWERLEVEL_6+1,
-};
-
-
extern int phm_enable_clock_power_gatings(struct pp_hwmgr *hwmgr);
extern int phm_powergate_uvd(struct pp_hwmgr *hwmgr, bool gate);
extern int phm_powergate_vce(struct pp_hwmgr *hwmgr, bool gate);
@@ -375,11 +367,25 @@ extern int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
const struct amd_pp_display_configuration *display_config);
extern int phm_get_dal_power_level(struct pp_hwmgr *hwmgr,
- struct amd_pp_dal_clock_info*info);
+ struct amd_pp_simple_clock_info *info);
extern int phm_set_cpu_power_state(struct pp_hwmgr *hwmgr);
extern int phm_power_down_asic(struct pp_hwmgr *hwmgr);
+extern int phm_get_performance_level(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+ PHM_PerformanceLevelDesignation designation, uint32_t index,
+ PHM_PerformanceLevel *level);
+
+extern int phm_get_clock_info(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state,
+ struct pp_clock_info *pclock_info,
+ PHM_PerformanceLevelDesignation designation);
+
+extern int phm_get_current_shallow_sleep_clocks(struct pp_hwmgr *hwmgr, const struct pp_hw_power_state *state, struct pp_clock_info *clock_info);
+
+extern int phm_get_clock_by_type(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
+
+extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
+
#endif /* _HARDWARE_MANAGER_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index aeaa3dbba525c..928f5a740cba7 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -325,8 +325,18 @@ struct pp_hwmgr_func {
bool cc6_disable, bool pstate_disable,
bool pstate_switch_disable);
int (*get_dal_power_level)(struct pp_hwmgr *hwmgr,
- struct amd_pp_dal_clock_info *info);
+ struct amd_pp_simple_clock_info *info);
+ int (*get_performance_level)(struct pp_hwmgr *, const struct pp_hw_power_state *,
+ PHM_PerformanceLevelDesignation, uint32_t, PHM_PerformanceLevel *);
+ int (*get_current_shallow_sleep_clocks)(struct pp_hwmgr *hwmgr,
+ const struct pp_hw_power_state *state, struct pp_clock_info *clock_info);
+ int (*get_clock_by_type)(struct pp_hwmgr *hwmgr, enum amd_pp_clock_type type, struct amd_pp_clocks *clocks);
+ int (*get_max_high_clocks)(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
int (*power_off_asic)(struct pp_hwmgr *hwmgr);
+ int (*get_pp_table)(struct pp_hwmgr *hwmgr, char **table);
+ int (*set_pp_table)(struct pp_hwmgr *hwmgr, const char *buf, size_t size);
+ int (*force_clock_level)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, int level);
+ int (*print_clock_levels)(struct pp_hwmgr *hwmgr, enum pp_clock_type type, char *buf);
};
struct pp_table_func {
diff --git a/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h b/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h
new file mode 100644
index 0000000000000..f49d1963fe85b
--- /dev/null
+++ b/drivers/gpu/drm/amd/powerplay/inc/pp_endian.h
@@ -0,0 +1,38 @@
+/*
+ * Copyright 2016 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _PP_ENDIAN_H_
+#define _PP_ENDIAN_H_
+
+#define PP_HOST_TO_SMC_UL(X) cpu_to_be32(X)
+#define PP_SMC_TO_HOST_UL(X) be32_to_cpu(X)
+
+#define PP_HOST_TO_SMC_US(X) cpu_to_be16(X)
+#define PP_SMC_TO_HOST_US(X) be16_to_cpu(X)
+
+#define CONVERT_FROM_HOST_TO_SMC_UL(X) ((X) = PP_HOST_TO_SMC_UL(X))
+#define CONVERT_FROM_SMC_TO_HOST_UL(X) ((X) = PP_SMC_TO_HOST_UL(X))
+
+#define CONVERT_FROM_HOST_TO_SMC_US(X) ((X) = PP_HOST_TO_SMC_US(X))
+
+#endif /* _PP_ENDIAN_H_ */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 504f035d1843d..fc9e3d1dd4098 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -32,6 +32,27 @@ struct pp_instance;
#define smu_lower_32_bits(n) ((uint32_t)(n))
#define smu_upper_32_bits(n) ((uint32_t)(((n)>>16)>>16))
+enum AVFS_BTC_STATUS {
+ AVFS_BTC_BOOT = 0,
+ AVFS_BTC_BOOT_STARTEDSMU,
+ AVFS_LOAD_VIRUS,
+ AVFS_BTC_VIRUS_LOADED,
+ AVFS_BTC_VIRUS_FAIL,
+ AVFS_BTC_COMPLETED_PREVIOUSLY,
+ AVFS_BTC_ENABLEAVFS,
+ AVFS_BTC_STARTED,
+ AVFS_BTC_FAILED,
+ AVFS_BTC_RESTOREVFT_FAILED,
+ AVFS_BTC_SAVEVFT_FAILED,
+ AVFS_BTC_DPMTABLESETUP_FAILED,
+ AVFS_BTC_COMPLETED_UNSAVED,
+ AVFS_BTC_COMPLETED_SAVED,
+ AVFS_BTC_COMPLETED_RESTORED,
+ AVFS_BTC_DISABLED,
+ AVFS_BTC_NOTSUPPORTED,
+ AVFS_BTC_SMUMSG_ERROR
+};
+
struct pp_smumgr_func {
int (*smu_init)(struct pp_smumgr *smumgr);
int (*smu_fini)(struct pp_smumgr *smumgr);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
index 8cd22d9c91401..b4eb483215b17 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/fiji_smumgr.h
@@ -23,24 +23,6 @@
#ifndef _FIJI_SMUMANAGER_H_
#define _FIJI_SMUMANAGER_H_
-enum AVFS_BTC_STATUS {
- AVFS_BTC_BOOT = 0,
- AVFS_BTC_BOOT_STARTEDSMU,
- AVFS_LOAD_VIRUS,
- AVFS_BTC_VIRUS_LOADED,
- AVFS_BTC_VIRUS_FAIL,
- AVFS_BTC_STARTED,
- AVFS_BTC_FAILED,
- AVFS_BTC_RESTOREVFT_FAILED,
- AVFS_BTC_SAVEVFT_FAILED,
- AVFS_BTC_DPMTABLESETUP_FAILED,
- AVFS_BTC_COMPLETED_UNSAVED,
- AVFS_BTC_COMPLETED_SAVED,
- AVFS_BTC_COMPLETED_RESTORED,
- AVFS_BTC_DISABLED,
- AVFS_BTC_NOTSUPPORTED,
- AVFS_BTC_SMUMSG_ERROR
-};
struct fiji_smu_avfs {
enum AVFS_BTC_STATUS AvfsBtcStatus;
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 8b2becd1aa079..a5ff9458d3590 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -229,6 +229,14 @@ static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
amd_sched_wakeup(entity->sched);
}
+static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
+{
+ struct amd_sched_entity *entity =
+ container_of(cb, struct amd_sched_entity, cb);
+ entity->dependency = NULL;
+ fence_put(f);
+}
+
static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
{
struct amd_gpu_scheduler *sched = entity->sched;
@@ -251,7 +259,7 @@ static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
}
/* Wait for fence to be scheduled */
- entity->cb.func = amd_sched_entity_wakeup;
+ entity->cb.func = amd_sched_entity_clear_dep;
list_add_tail(&entity->cb.node, &s_fence->scheduled_cb);
return true;
}
diff --git a/drivers/gpu/drm/amd/scheduler/sched_fence.c b/drivers/gpu/drm/amd/scheduler/sched_fence.c
index 87c78eecea649..dc115aea352bc 100644
--- a/drivers/gpu/drm/amd/scheduler/sched_fence.c
+++ b/drivers/gpu/drm/amd/scheduler/sched_fence.c
@@ -84,12 +84,33 @@ static bool amd_sched_fence_enable_signaling(struct fence *f)
return true;
}
-static void amd_sched_fence_release(struct fence *f)
+/**
+ * amd_sched_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
+ *
+ * Free up the fence memory after the RCU grace period.
+ */
+static void amd_sched_fence_free(struct rcu_head *rcu)
{
+ struct fence *f = container_of(rcu, struct fence, rcu);
struct amd_sched_fence *fence = to_amd_sched_fence(f);
kmem_cache_free(sched_fence_slab, fence);
}
+/**
+ * amd_sched_fence_release - callback that fence can be freed
+ *
+ * @fence: fence
+ *
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
+ */
+static void amd_sched_fence_release(struct fence *f)
+{
+ call_rcu(&f->rcu, amd_sched_fence_free);
+}
+
const struct fence_ops amd_sched_fence_ops = {
.get_driver_name = amd_sched_fence_get_driver_name,
.get_timeline_name = amd_sched_fence_get_timeline_name,
diff --git a/drivers/gpu/drm/arm/Kconfig b/drivers/gpu/drm/arm/Kconfig
new file mode 100644
index 0000000000000..eaed454e043c7
--- /dev/null
+++ b/drivers/gpu/drm/arm/Kconfig
@@ -0,0 +1,27 @@
+config DRM_ARM
+ bool
+ help
+ Choose this option to select drivers for ARM's devices
+
+config DRM_HDLCD
+ tristate "ARM HDLCD"
+ depends on DRM && OF && (ARM || ARM64)
+ depends on COMMON_CLK
+ select DRM_ARM
+ select DRM_KMS_HELPER
+ select DRM_KMS_FB_HELPER
+ select DRM_KMS_CMA_HELPER
+ help
+ Choose this option if you have an ARM High Definition Colour LCD
+ controller.
+
+ If M is selected the module will be called hdlcd.
+
+config DRM_HDLCD_SHOW_UNDERRUN
+ bool "Show underrun conditions"
+ depends on DRM_HDLCD
+ default n
+ help
+ Enable this option to show in red colour the pixels that the
+ HDLCD device did not fetch from framebuffer due to underrun
+ conditions.
diff --git a/drivers/gpu/drm/arm/Makefile b/drivers/gpu/drm/arm/Makefile
new file mode 100644
index 0000000000000..89dcb7bab93a0
--- /dev/null
+++ b/drivers/gpu/drm/arm/Makefile
@@ -0,0 +1,2 @@
+hdlcd-y := hdlcd_drv.o hdlcd_crtc.o
+obj-$(CONFIG_DRM_HDLCD) += hdlcd.o
diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
new file mode 100644
index 0000000000000..fef1b04c2aab4
--- /dev/null
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -0,0 +1,327 @@
+/*
+ * Copyright (C) 2013-2015 ARM Limited
+ * Author: Liviu Dudau
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License. See the file COPYING in the main directory of this archive
+ * for more details.
+ *
+ * Implementation of a CRTC class for the HDLCD driver.
+ */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include