sync with OpenBSD -current

This commit is contained in:
purplerain 2024-05-06 16:20:26 +00:00
parent f05839c6d3
commit a6677bfd36
Signed by: purplerain
GPG key ID: F42C07F07E2E35B7
29 changed files with 703 additions and 123 deletions

View file

@ -1785,6 +1785,7 @@ err_node_allow:
err_bo_create:
amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id);
err_reserve_limit:
amdgpu_sync_free(&(*mem)->sync);
mutex_destroy(&(*mem)->lock);
if (gobj)
drm_gem_object_put(gobj);

View file

@ -819,7 +819,7 @@ retry:
p->bytes_moved += ctx.bytes_moved;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
amdgpu_bo_in_cpu_visible_vram(bo))
amdgpu_res_cpu_visible(adev, bo->tbo.resource))
p->bytes_moved_vis += ctx.bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {

View file

@ -107,6 +107,10 @@ void amdgpu_show_fdinfo(struct drm_printer *p, struct drm_file *file)
stats.requested_visible_vram/1024UL);
drm_printf(p, "amd-requested-gtt:\t%llu KiB\n",
stats.requested_gtt/1024UL);
drm_printf(p, "drm-shared-vram:\t%llu KiB\n", stats.vram_shared/1024UL);
drm_printf(p, "drm-shared-gtt:\t%llu KiB\n", stats.gtt_shared/1024UL);
drm_printf(p, "drm-shared-cpu:\t%llu KiB\n", stats.cpu_shared/1024UL);
for (hw_ip = 0; hw_ip < AMDGPU_HW_IP_NUM; ++hw_ip) {
if (!usage[hw_ip])
continue;

View file

@ -627,8 +627,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
return r;
if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
bo->tbo.resource->mem_type == TTM_PL_VRAM &&
amdgpu_bo_in_cpu_visible_vram(bo))
amdgpu_res_cpu_visible(adev, bo->tbo.resource))
amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
ctx.bytes_moved);
else
@ -1302,26 +1301,39 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo, bool evict)
void amdgpu_bo_get_memory(struct amdgpu_bo *bo,
struct amdgpu_mem_stats *stats)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
uint64_t size = amdgpu_bo_size(bo);
struct drm_gem_object *obj;
unsigned int domain;
bool shared;
/* Abort if the BO doesn't currently have a backing store */
if (!bo->tbo.resource)
if (!res)
return;
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
obj = &bo->tbo.base;
shared = drm_gem_object_is_shared_for_memory_stats(obj);
domain = amdgpu_mem_type_to_domain(res->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
stats->vram += size;
if (amdgpu_bo_in_cpu_visible_vram(bo))
if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
stats->visible_vram += size;
if (shared)
stats->vram_shared += size;
break;
case AMDGPU_GEM_DOMAIN_GTT:
stats->gtt += size;
if (shared)
stats->gtt_shared += size;
break;
case AMDGPU_GEM_DOMAIN_CPU:
default:
stats->cpu += size;
if (shared)
stats->cpu_shared += size;
break;
}
@ -1406,10 +1418,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* Remember that this BO was accessed by the CPU */
abo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
if (bo->resource->mem_type != TTM_PL_VRAM)
return 0;
if (amdgpu_bo_in_cpu_visible_vram(abo))
if (amdgpu_res_cpu_visible(adev, bo->resource))
return 0;
/* Can't move a pinned BO to visible VRAM */
@ -1433,7 +1442,7 @@ vm_fault_t amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
/* this should never happen */
if (bo->resource->mem_type == TTM_PL_VRAM &&
!amdgpu_bo_in_cpu_visible_vram(abo))
!amdgpu_res_cpu_visible(adev, bo->resource))
return VM_FAULT_SIGBUS;
ttm_bo_move_to_lru_tail_unlocked(bo);
@ -1593,6 +1602,7 @@ uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
*/
u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct dma_buf_attachment *attachment;
struct dma_buf *dma_buf;
const char *placement;
@ -1601,10 +1611,11 @@ u64 amdgpu_bo_print_info(int id, struct amdgpu_bo *bo, struct seq_file *m)
if (dma_resv_trylock(bo->tbo.base.resv)) {
unsigned int domain;
domain = amdgpu_mem_type_to_domain(bo->tbo.resource->mem_type);
switch (domain) {
case AMDGPU_GEM_DOMAIN_VRAM:
if (amdgpu_bo_in_cpu_visible_vram(bo))
if (amdgpu_res_cpu_visible(adev, bo->tbo.resource))
placement = "VRAM VISIBLE";
else
placement = "VRAM";

View file

@ -139,12 +139,18 @@ struct amdgpu_bo_vm {
struct amdgpu_mem_stats {
/* current VRAM usage, includes visible VRAM */
uint64_t vram;
/* current shared VRAM usage, includes visible VRAM */
uint64_t vram_shared;
/* current visible VRAM usage */
uint64_t visible_vram;
/* current GTT usage */
uint64_t gtt;
/* current shared GTT usage */
uint64_t gtt_shared;
/* current system memory usage */
uint64_t cpu;
/* current shared system memory usage */
uint64_t cpu_shared;
/* sum of evicted buffers, includes visible VRAM */
uint64_t evicted_vram;
/* sum of evicted buffers due to CPU access */
@ -245,28 +251,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo)
return drm_vma_node_offset_addr(&bo->tbo.base.vma_node);
}
/**
* amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM
*/
static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct amdgpu_res_cursor cursor;
if (!bo->tbo.resource || bo->tbo.resource->mem_type != TTM_PL_VRAM)
return false;
amdgpu_res_first(bo->tbo.resource, 0, amdgpu_bo_size(bo), &cursor);
while (cursor.remaining) {
if (cursor.start < adev->gmc.visible_vram_size)
return true;
amdgpu_res_next(&cursor, cursor.size);
}
return false;
}
/**
* amdgpu_bo_explicit_sync - return whether the bo is explicitly synced
*/

View file

@ -137,7 +137,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
amdgpu_bo_in_cpu_visible_vram(abo)) {
amdgpu_res_cpu_visible(adev, bo->resource)) {
/* Try evicting to the CPU inaccessible part of VRAM
* first, but only set GTT as busy placement, so this
@ -408,40 +408,55 @@ error:
return r;
}
/**
* amdgpu_res_cpu_visible - Check that resource can be accessed by CPU
* @adev: amdgpu device
* @res: the resource to check
*
* Returns: true if the full resource is CPU visible, false otherwise.
*/
bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
struct ttm_resource *res)
{
struct amdgpu_res_cursor cursor;
if (!res)
return false;
if (res->mem_type == TTM_PL_SYSTEM || res->mem_type == TTM_PL_TT ||
res->mem_type == AMDGPU_PL_PREEMPT)
return true;
if (res->mem_type != TTM_PL_VRAM)
return false;
amdgpu_res_first(res, 0, res->size, &cursor);
while (cursor.remaining) {
if ((cursor.start + cursor.size) >= adev->gmc.visible_vram_size)
return false;
amdgpu_res_next(&cursor, cursor.size);
}
return true;
}
/*
* amdgpu_mem_visible - Check that memory can be accessed by ttm_bo_move_memcpy
* amdgpu_res_copyable - Check that memory can be accessed by ttm_bo_move_memcpy
*
* Called by amdgpu_bo_move()
*/
static bool amdgpu_mem_visible(struct amdgpu_device *adev,
struct ttm_resource *mem)
static bool amdgpu_res_copyable(struct amdgpu_device *adev,
struct ttm_resource *mem)
{
u64 mem_size = (u64)mem->size;
struct amdgpu_res_cursor cursor;
u64 end;
if (mem->mem_type == TTM_PL_SYSTEM ||
mem->mem_type == TTM_PL_TT)
return true;
if (mem->mem_type != TTM_PL_VRAM)
if (!amdgpu_res_cpu_visible(adev, mem))
return false;
amdgpu_res_first(mem, 0, mem_size, &cursor);
end = cursor.start + cursor.size;
while (cursor.remaining) {
amdgpu_res_next(&cursor, cursor.size);
/* ttm_resource_ioremap only supports contiguous memory */
if (mem->mem_type == TTM_PL_VRAM &&
!(mem->placement & TTM_PL_FLAG_CONTIGUOUS))
return false;
if (!cursor.remaining)
break;
/* ttm_resource_ioremap only supports contiguous memory */
if (end != cursor.start)
return false;
end = cursor.start + cursor.size;
}
return end <= adev->gmc.visible_vram_size;
return true;
}
/*
@ -534,8 +549,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
if (r) {
/* Check that all memory is CPU accessible */
if (!amdgpu_mem_visible(adev, old_mem) ||
!amdgpu_mem_visible(adev, new_mem)) {
if (!amdgpu_res_copyable(adev, old_mem) ||
!amdgpu_res_copyable(adev, new_mem)) {
pr_err("Move buffer fallback to memcpy unavailable\n");
return r;
}

View file

@ -140,6 +140,9 @@ int amdgpu_vram_mgr_reserve_range(struct amdgpu_vram_mgr *mgr,
int amdgpu_vram_mgr_query_page_status(struct amdgpu_vram_mgr *mgr,
uint64_t start);
bool amdgpu_res_cpu_visible(struct amdgpu_device *adev,
struct ttm_resource *res);
int amdgpu_ttm_init(struct amdgpu_device *adev);
void amdgpu_ttm_fini(struct amdgpu_device *adev);
void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev,

View file

@ -365,7 +365,8 @@ static void sdma_v4_4_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0
<< (ring->me % adev->sdma.num_inst_per_aid);
sdma_v4_4_2_wait_reg_mem(ring, 0, 1,
adev->nbio.funcs->get_hdp_flush_done_offset(adev),

View file

@ -292,17 +292,21 @@ static void sdma_v5_2_ring_emit_hdp_flush(struct amdgpu_ring *ring)
u32 ref_and_mask = 0;
const struct nbio_hdp_flush_reg *nbio_hf_reg = adev->nbio.hdp_flush_reg;
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
if (ring->me > 1) {
amdgpu_asic_flush_hdp(adev, ring);
} else {
ref_and_mask = nbio_hf_reg->ref_and_mask_sdma0 << ring->me;
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
amdgpu_ring_write(ring, ref_and_mask); /* reference */
amdgpu_ring_write(ring, ref_and_mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(1) |
SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3)); /* == */
amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_done_offset(adev)) << 2);
amdgpu_ring_write(ring, (adev->nbio.funcs->get_hdp_flush_req_offset(adev)) << 2);
amdgpu_ring_write(ring, ref_and_mask); /* reference */
amdgpu_ring_write(ring, ref_and_mask); /* mask */
amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
}
/**

View file

@ -567,6 +567,19 @@ unsigned long drm_gem_lru_scan(struct drm_gem_lru *lru,
int drm_gem_evict(struct drm_gem_object *obj);
/**
* drm_gem_object_is_shared_for_memory_stats - helper for shared memory stats
*
* This helper should only be used for fdinfo shared memory stats to determine
* if a GEM object is shared.
*
* @obj: obj in question
*/
static inline bool drm_gem_object_is_shared_for_memory_stats(struct drm_gem_object *obj)
{
return (obj->handle_count > 1) || obj->dma_buf;
}
#ifdef CONFIG_LOCKDEP
/**
* drm_gem_gpuva_set_lock() - Set the lock protecting accesses to the gpuva list.

View file

@ -81,7 +81,7 @@ struct ttm_pool {
bool use_dma32;
struct {
struct ttm_pool_type orders[MAX_ORDER + 1];
struct ttm_pool_type orders[NR_PAGE_ORDERS];
} caching[TTM_NUM_CACHING_TYPES];
};

View file

@ -7,6 +7,7 @@
#include <linux/nodemask.h>
#define MAX_ORDER 11
#define NR_PAGE_ORDERS (MAX_ORDER + 1)
#define pfn_to_nid(x) 0
#endif

View file

@ -175,7 +175,7 @@ static void ttm_device_init_pools(struct kunit *test)
if (params->pools_init_expected) {
for (int i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
for (int j = 0; j <= MAX_ORDER; ++j) {
for (int j = 0; j < NR_PAGE_ORDERS; ++j) {
pt = pool->caching[i].orders[j];
KUNIT_EXPECT_PTR_EQ(test, pt.pool, pool);
KUNIT_EXPECT_EQ(test, pt.caching, i);

View file

@ -70,11 +70,11 @@ module_param(page_pool_size, ulong, 0644);
static atomic_long_t allocated_pages;
static struct ttm_pool_type global_write_combined[MAX_ORDER + 1];
static struct ttm_pool_type global_uncached[MAX_ORDER + 1];
static struct ttm_pool_type global_write_combined[NR_PAGE_ORDERS];
static struct ttm_pool_type global_uncached[NR_PAGE_ORDERS];
static struct ttm_pool_type global_dma32_write_combined[MAX_ORDER + 1];
static struct ttm_pool_type global_dma32_uncached[MAX_ORDER + 1];
static struct ttm_pool_type global_dma32_write_combined[NR_PAGE_ORDERS];
static struct ttm_pool_type global_dma32_uncached[NR_PAGE_ORDERS];
static spinlock_t shrinker_lock;
static struct list_head shrinker_list;
@ -424,17 +424,23 @@ static struct ttm_pool_type *ttm_pool_select_type(struct ttm_pool *pool,
enum ttm_caching caching,
unsigned int order)
{
if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE)
if (pool->use_dma_alloc)
return &pool->caching[caching].orders[order];
#ifdef CONFIG_X86
switch (caching) {
case ttm_write_combined:
if (pool->nid != NUMA_NO_NODE)
return &pool->caching[caching].orders[order];
if (pool->use_dma32)
return &global_dma32_write_combined[order];
return &global_write_combined[order];
case ttm_uncached:
if (pool->nid != NUMA_NO_NODE)
return &pool->caching[caching].orders[order];
if (pool->use_dma32)
return &global_dma32_uncached[order];
@ -710,11 +716,17 @@ void ttm_pool_init(struct ttm_pool *pool, struct device *dev,
pool->use_dma_alloc = use_dma_alloc;
pool->use_dma32 = use_dma32;
if (use_dma_alloc || nid != NUMA_NO_NODE) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j <= MAX_ORDER; ++j)
ttm_pool_type_init(&pool->caching[i].orders[j],
pool, i, j);
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
for (j = 0; j < NR_PAGE_ORDERS; ++j) {
struct ttm_pool_type *pt;
/* Initialize only pool types which are actually used */
pt = ttm_pool_select_type(pool, i, j);
if (pt != &pool->caching[i].orders[j])
continue;
ttm_pool_type_init(pt, pool, i, j);
}
}
}
EXPORT_SYMBOL(ttm_pool_init);
@ -731,10 +743,16 @@ void ttm_pool_fini(struct ttm_pool *pool)
{
unsigned int i, j;
if (pool->use_dma_alloc || pool->nid != NUMA_NO_NODE) {
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i)
for (j = 0; j <= MAX_ORDER; ++j)
ttm_pool_type_fini(&pool->caching[i].orders[j]);
for (i = 0; i < TTM_NUM_CACHING_TYPES; ++i) {
for (j = 0; j < NR_PAGE_ORDERS; ++j) {
struct ttm_pool_type *pt;
pt = ttm_pool_select_type(pool, i, j);
if (pt != &pool->caching[i].orders[j])
continue;
ttm_pool_type_fini(pt);
}
}
/* We removed the pool types from the LRU, but we need to also make sure
@ -795,7 +813,7 @@ static void ttm_pool_debugfs_header(struct seq_file *m)
unsigned int i;
seq_puts(m, "\t ");
for (i = 0; i <= MAX_ORDER; ++i)
for (i = 0; i < NR_PAGE_ORDERS; ++i)
seq_printf(m, " ---%2u---", i);
seq_puts(m, "\n");
}
@ -806,7 +824,7 @@ static void ttm_pool_debugfs_orders(struct ttm_pool_type *pt,
{
unsigned int i;
for (i = 0; i <= MAX_ORDER; ++i)
for (i = 0; i < NR_PAGE_ORDERS; ++i)
seq_printf(m, " %8u", ttm_pool_type_count(&pt[i]));
seq_puts(m, "\n");
}
@ -915,7 +933,7 @@ int ttm_pool_mgr_init(unsigned long num_pages)
mtx_init(&shrinker_lock, IPL_NONE);
INIT_LIST_HEAD(&shrinker_list);
for (i = 0; i <= MAX_ORDER; ++i) {
for (i = 0; i < NR_PAGE_ORDERS; ++i) {
ttm_pool_type_init(&global_write_combined[i], NULL,
ttm_write_combined, i);
ttm_pool_type_init(&global_uncached[i], NULL, ttm_uncached, i);
@ -948,7 +966,7 @@ void ttm_pool_mgr_fini(void)
{
unsigned int i;
for (i = 0; i <= MAX_ORDER; ++i) {
for (i = 0; i < NR_PAGE_ORDERS; ++i) {
ttm_pool_type_fini(&global_write_combined[i]);
ttm_pool_type_fini(&global_uncached[i]);