Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save cgmb/a5feea2fbd5cb7b7142fdc3f8cb0282f to your computer and use it in GitHub Desktop.
Save cgmb/a5feea2fbd5cb7b7142fdc3f8cb0282f to your computer and use it in GitHub Desktop.
HIP Graph Host-to-Host Copy Bug
From b566e2bdb3a6d088d6155393afe31c4b5b617dc8 Mon Sep 17 00:00:00 2001
From: Jaydeep Patel <jaydeepkumar.patel@amd.com>
Date: Mon, 2 Jan 2023 13:08:13 +0000
Subject: [PATCH] SWDEV-375366 - SWDEV-375351 - Handle HtoH case for graph mem
cpy impl.
Change-Id: I5a8c3c3c22db045f714b0443b8d70a8c6b4a8cea
---
src/hip_graph_helper.hpp | 4 +++
src/hip_graph_internal.hpp | 30 ++++++++++++++++++++---
src/hip_memory.cpp | 50 +++++++++++++++++++++++++++++---------
3 files changed, 70 insertions(+), 14 deletions(-)
diff --git a/src/hip_graph_helper.hpp b/src/hip_graph_helper.hpp
index 4af57c69..69780338 100644
--- a/src/hip_graph_helper.hpp
+++ b/src/hip_graph_helper.hpp
@@ -7,6 +7,10 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes, hip
hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind, amd::HostQueue& queue, bool isAsync = false);
+void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue);
+
+bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind);
+
hipError_t ihipLaunchKernel_validate(hipFunction_t f, uint32_t globalWorkSizeX,
uint32_t globalWorkSizeY, uint32_t globalWorkSizeZ,
uint32_t blockDimX, uint32_t blockDimY, uint32_t blockDimZ,
diff --git a/src/hip_graph_internal.hpp b/src/hip_graph_internal.hpp
index ec016d1f..65ccfdc3 100644
--- a/src/hip_graph_internal.hpp
+++ b/src/hip_graph_internal.hpp
@@ -1037,6 +1037,9 @@ class hipGraphMemcpyNode : public hipGraphNode {
}
hipError_t CreateCommand(amd::HostQueue* queue) {
+ if (IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) {
+ return hipSuccess;
+ }
hipError_t status = hipGraphNode::CreateCommand(queue);
if (status != hipSuccess) {
return status;
@@ -1048,6 +1051,16 @@ class hipGraphMemcpyNode : public hipGraphNode {
return status;
}
+ void EnqueueCommands(hipStream_t stream) override {
+ if (isEnabled_ && IsHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr, pCopyParams_->kind)) {
+ ihipHtoHMemcpy(pCopyParams_->dstPtr.ptr, pCopyParams_->srcPtr.ptr,
+ pCopyParams_->extent.width * pCopyParams_->extent.height *
+ pCopyParams_->extent.depth, *hip::getQueue(stream));
+ return;
+ }
+ hipGraphNode::EnqueueCommands(stream);
+ }
+
void GetParams(hipMemcpy3DParms* params) {
std::memcpy(params, pCopyParams_, sizeof(hipMemcpy3DParms));
}
@@ -1170,6 +1183,9 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
}
virtual hipError_t CreateCommand(amd::HostQueue* queue) {
+ if (IsHtoHMemcpy(dst_, src_, kind_)) {
+ return hipSuccess;
+ }
hipError_t status = hipGraphNode::CreateCommand(queue);
if (status != hipSuccess) {
return status;
@@ -1182,10 +1198,18 @@ class hipGraphMemcpyNode1D : public hipGraphNode {
}
void EnqueueCommands(hipStream_t stream) {
- if (commands_.empty()) return;
- // commands_ should have just 1 item
- assert(commands_.size() == 1 && "Invalid command size in hipGraphMemcpyNode1D");
+ bool isH2H = IsHtoHMemcpy(dst_, src_, kind_);
+ if (!isH2H) {
+ if (commands_.empty()) return;
+ // commands_ should have just 1 item
+ assert(commands_.size() == 1 && "Invalid command size in hipGraphMemcpyNode1D");
+ }
if (isEnabled_) {
+ //HtoH
+ if (isH2H) {
+ ihipHtoHMemcpy(dst_, src_, count_, *hip::getQueue(stream));
+ return;
+ }
amd::Command* command = commands_[0];
amd::HostQueue* cmdQueue = command->queue();
amd::HostQueue* queue = hip::getQueue(stream);
diff --git a/src/hip_memory.cpp b/src/hip_memory.cpp
index e333dce8..37a53e26 100644
--- a/src/hip_memory.cpp
+++ b/src/hip_memory.cpp
@@ -321,7 +321,18 @@ hipError_t ihipMalloc(void** ptr, size_t sizeBytes, unsigned int flags)
memObj->getUserData().deviceId = hip::getCurrentDevice()->deviceId();
return hipSuccess;
}
-
+bool IsHtoHMemcpyValid(void* dst, const void* src, hipMemcpyKind kind) {
+ size_t sOffset = 0;
+ amd::Memory* srcMemory = getMemoryObject(src, sOffset);
+ size_t dOffset = 0;
+ amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
+ if (src && dst && srcMemory == nullptr && dstMemory == nullptr) {
+ if (kind != hipMemcpyHostToHost && kind != hipMemcpyDefault) {
+ return false;
+ }
+ }
+ return true;
+}
hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes,
hipMemcpyKind kind) {
if (dst == nullptr || src == nullptr) {
@@ -337,6 +348,10 @@ hipError_t ihipMemcpy_validate(void* dst, const void* src, size_t sizeBytes,
(srcMemory && sizeBytes > (srcMemory->getSize() - sOffset))) {
return hipErrorInvalidValue;
}
+ //If src and dst ptr are null then kind must be either h2h or def.
+ if (!IsHtoHMemcpyValid(dst, src, kind)) {
+ return hipErrorInvalidValue;
+ }
return hipSuccess;
}
@@ -431,7 +446,22 @@ hipError_t ihipMemcpyCommand(amd::Command*& command, void* dst, const void* src,
}
return hipSuccess;
}
-
+bool IsHtoHMemcpy(void* dst, const void* src, hipMemcpyKind kind) {
+ size_t sOffset = 0;
+ amd::Memory* srcMemory = getMemoryObject(src, sOffset);
+ size_t dOffset = 0;
+ amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
+ if (srcMemory == nullptr && dstMemory == nullptr) {
+ if (kind == hipMemcpyHostToHost || kind == hipMemcpyDefault) {
+ return true;
+ }
+ }
+ return false;
+}
+void ihipHtoHMemcpy(void* dst, const void* src, size_t sizeBytes, amd::HostQueue& queue) {
+ queue.finish();
+ memcpy(dst, src, sizeBytes);
+}
// ================================================================================================
hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKind kind,
amd::HostQueue& queue, bool isAsync = false) {
@@ -451,14 +481,9 @@ hipError_t ihipMemcpy(void* dst, const void* src, size_t sizeBytes, hipMemcpyKin
amd::Memory* srcMemory = getMemoryObject(src, sOffset);
size_t dOffset = 0;
amd::Memory* dstMemory = getMemoryObject(dst, dOffset);
- if ((srcMemory == nullptr) && (dstMemory == nullptr)) {
- if ((kind == hipMemcpyHostToHost) || (kind == hipMemcpyDefault)) {
- queue.finish();
- memcpy(dst, src, sizeBytes);
- return hipSuccess;
- } else {
- return hipErrorInvalidValue;
- }
+ if (srcMemory == nullptr && dstMemory == nullptr) {
+ ihipHtoHMemcpy(dst, src, sizeBytes, queue);
+ return hipSuccess;
} else if ((srcMemory == nullptr) && (dstMemory != nullptr)) {
isAsync = false;
} else if ((srcMemory != nullptr) && (dstMemory == nullptr)) {
@@ -2631,7 +2656,10 @@ hipError_t ihipMemcpy3D_validate(const hipMemcpy3DParms* p) {
if (p->kind < hipMemcpyHostToHost || p->kind > hipMemcpyDefault) {
return hipErrorInvalidMemcpyDirection;
}
-
+ //If src and dst ptr are null then kind must be either h2h or def.
+ if (!IsHtoHMemcpyValid(p->dstPtr.ptr, p->srcPtr.ptr, p->kind)) {
+ return hipErrorInvalidValue;
+ }
return hipSuccess;
}
https://github.com/ROCm-Developer-Tools/hipamd/commit/b566e2bdb3a6d088d6155393afe31c4b5b617dc8#diff-29a39aabc36bb48630574d8630dec58fc464c753abf755043766c620bcc71e5d
#include <iostream>
#include "hip/hip_runtime.h"
#define HIP_CHECK(error) \
{ \
hipError_t localError = error; \
if ((localError != hipSuccess) && (localError != hipErrorPeerAccessAlreadyEnabled)) { \
printf("error: '%s'(%d) from %s at %s:%d\n", hipGetErrorString(localError), \
localError, #error, __FUNCTION__, __LINE__); \
exit(0);\
} \
}
int main() {
size_t size = 1024;
int *A_h{nullptr}, *B_h{nullptr};
size_t numBytes{size * sizeof(int)};
A_h = reinterpret_cast<int*>(malloc(sizeof(int)*size));
B_h = reinterpret_cast<int*>(malloc(sizeof(int)*size));
// Initilize the host
for(size_t i = 0; i < size; i++) {
A_h[i] = i;
B_h[i] = 0;
}
hipGraph_t graph;
hipStream_t streamForGraph;
hipGraphExec_t graphExec;
hipGraphNode_t memcpyH2H;
HIP_CHECK(hipGraphCreate(&graph, 0));
HIP_CHECK(hipStreamCreate(&streamForGraph));
// Host to Host
HIP_CHECK(hipGraphAddMemcpyNode1D(&memcpyH2H, graph, nullptr, 0,
B_h, A_h, numBytes, hipMemcpyHostToHost));
// Instantiate and launch the graph
HIP_CHECK(hipGraphInstantiate(&graphExec, graph, nullptr, nullptr, 0));
HIP_CHECK(hipGraphLaunch(graphExec, streamForGraph));
HIP_CHECK(hipStreamSynchronize(streamForGraph));
HIP_CHECK(hipGraphExecDestroy(graphExec));
HIP_CHECK(hipGraphDestroy(graph));
HIP_CHECK(hipStreamDestroy(streamForGraph));
// Validation
for (size_t i = 0; i < size; i++) {
if( A_h[i] == B_h[i]) {
printf("Test Passed\n");
}
else {
printf("Test Failed\n");
}
}
free(A_h);
free(B_h);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment