From 305f9aa2ab2805e5899baabc8fca5fb320c8406a Mon Sep 17 00:00:00 2001 From: Jianxin Xiong Date: Wed, 2 Oct 2024 14:30:53 -0700 Subject: [PATCH] core: Introduce new inject calls that take buffer descriptor Existing inject calls (for msg, tagged, rma, and atomic ops) don't provide a way to supply the memory descriptor of the data buffer. This presents difficulty when FI_HMEM is enabled. Providers either have to detect the HMEM interface the buffer is allocated with, or declare the supported inject_size be zero. Both may result in sub-optimal application performance. Here a set of new inject calls are introduced. The difference of the new calls vs the corresponding existing calls is the addition of a new parameter "desc", like what have already existed in the send calls. With the new calls, the provider is able to know the iface of the data buffer and handle it properly. Signed-off-by: Jianxin Xiong --- include/rdma/fi_atomic.h | 15 +++++++++++++++ include/rdma/fi_endpoint.h | 22 ++++++++++++++++++++++ include/rdma/fi_rma.h | 25 +++++++++++++++++++++++++ include/rdma/fi_tagged.h | 23 +++++++++++++++++++++++ man/fi_atomic.3.md | 10 ++++++++++ man/fi_msg.3.md | 20 ++++++++++++++++++++ man/fi_rma.3.md | 21 +++++++++++++++++++++ man/fi_tagged.3.md | 20 ++++++++++++++++++++ 8 files changed, 156 insertions(+) diff --git a/include/rdma/fi_atomic.h b/include/rdma/fi_atomic.h index cc8b1e52054..b69b5d92b89 100644 --- a/include/rdma/fi_atomic.h +++ b/include/rdma/fi_atomic.h @@ -138,6 +138,10 @@ struct fi_ops_atomic { enum fi_datatype datatype, enum fi_op op, size_t *count); int (*compwritevalid)(struct fid_ep *ep, enum fi_datatype datatype, enum fi_op op, size_t *count); + + ssize_t (*inject2)(struct fid_ep *ep, const void *buf, size_t count, + void *desc, fi_addr_t dest_addr, uint64_t addr, + uint64_t key, enum fi_datatype datatype, enum fi_op op); }; #ifdef FABRIC_DIRECT @@ -184,6 +188,17 @@ fi_inject_atomic(struct fid_ep *ep, const void *buf, size_t count, key, datatype, op); } +static inline ssize_t +fi_inject_atomic2(struct fid_ep *ep, const void *buf, size_t count, void *desc, + fi_addr_t dest_addr, uint64_t addr, uint64_t key, + enum fi_datatype datatype, enum fi_op op) +{ + return FI_CHECK_OP(ep->atomic, struct fi_ops_atomic, inject2) ? + ep->atomic->inject2(ep, buf, count, desc, dest_addr, addr, key, + datatype, op) : + -FI_ENOSYS; +} + static inline ssize_t fi_fetch_atomic(struct fid_ep *ep, const void *buf, size_t count, void *desc, diff --git a/include/rdma/fi_endpoint.h b/include/rdma/fi_endpoint.h index 825b0334c43..9889cd779a7 100644 --- a/include/rdma/fi_endpoint.h +++ b/include/rdma/fi_endpoint.h @@ -128,6 +128,10 @@ struct fi_ops_msg { uint64_t data, fi_addr_t dest_addr, void *context); ssize_t (*injectdata)(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr); + ssize_t (*inject2)(struct fid_ep *ep, const void *buf, size_t len, + void *desc, fi_addr_t dest_addr); + ssize_t (*injectdata2)(struct fid_ep *ep, const void *buf, size_t len, + void *desc, uint64_t data, fi_addr_t dest_addr); }; struct fi_ops_cm; @@ -359,6 +363,24 @@ fi_injectdata(struct fid_ep *ep, const void *buf, size_t len, return ep->msg->injectdata(ep, buf, len, data, dest_addr); } +static inline ssize_t +fi_inject2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + fi_addr_t dest_addr) +{ + return FI_CHECK_OP(ep->msg, struct fi_ops_msg, inject2) ? + ep->msg->inject2(ep, buf, len, desc, dest_addr) : + -FI_ENOSYS; +} + +static inline ssize_t +fi_injectdata2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + uint64_t data, fi_addr_t dest_addr) +{ + return FI_CHECK_OP(ep->msg, struct fi_ops_msg, injectdata2) ? + ep->msg->injectdata2(ep, buf, len, desc, data, dest_addr) : + -FI_ENOSYS; +} + #endif #ifdef __cplusplus diff --git a/include/rdma/fi_rma.h b/include/rdma/fi_rma.h index 003c10fd9cf..b80b4ded3e5 100644 --- a/include/rdma/fi_rma.h +++ b/include/rdma/fi_rma.h @@ -86,6 +86,12 @@ struct fi_ops_rma { void *context); ssize_t (*injectdata)(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key); + ssize_t (*inject2)(struct fid_ep *ep, const void *buf, size_t len, + void *desc, fi_addr_t dest_addr, uint64_t addr, + uint64_t key); + ssize_t (*injectdata2)(struct fid_ep *ep, const void *buf, size_t len, + void *desc, uint64_t data, fi_addr_t dest_addr, + uint64_t addr, uint64_t key); }; #ifdef FABRIC_DIRECT @@ -159,6 +165,25 @@ fi_inject_writedata(struct fid_ep *ep, const void *buf, size_t len, return ep->rma->injectdata(ep, buf, len, data, dest_addr, addr, key); } +static inline ssize_t +fi_inject_write2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + fi_addr_t dest_addr, uint64_t addr, uint64_t key) +{ + return FI_CHECK_OP(ep->rma, struct fi_ops_rma, inject2) ? + ep->rma->inject2(ep, buf, len, desc, dest_addr, addr, key) : + -FI_ENOSYS; +} + +static inline ssize_t +fi_inject_writedata2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + uint64_t data, fi_addr_t dest_addr, uint64_t addr, + uint64_t key) +{ + return FI_CHECK_OP(ep->rma, struct fi_ops_rma, injectdata2) ? + ep->rma->injectdata2(ep, buf, len, desc, data, dest_addr, addr, key) : + -FI_ENOSYS; +} + #endif #ifdef __cplusplus diff --git a/include/rdma/fi_tagged.h b/include/rdma/fi_tagged.h index 9230c34df64..838c48da3b8 100644 --- a/include/rdma/fi_tagged.h +++ b/include/rdma/fi_tagged.h @@ -85,6 +85,11 @@ struct fi_ops_tagged { uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context); ssize_t (*injectdata)(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr, uint64_t tag); + ssize_t (*inject2)(struct fid_ep *ep, const void *buf, size_t len, + void *desc, fi_addr_t dest_addr, uint64_t tag); + ssize_t (*injectdata2)(struct fid_ep *ep, const void *buf, size_t len, + void *desc, uint64_t data, fi_addr_t dest_addr, + uint64_t tag); }; @@ -159,6 +164,24 @@ fi_tinjectdata(struct fid_ep *ep, const void *buf, size_t len, return ep->tagged->injectdata(ep, buf, len, data, dest_addr, tag); } +static inline ssize_t +fi_tinject2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + fi_addr_t dest_addr, uint64_t tag) +{ + return FI_CHECK_OP(ep->tagged, struct fi_ops_tagged, inject2) ? + ep->tagged->inject2(ep, buf, len, desc, dest_addr, tag) : + -FI_ENOSYS; +} + +static inline ssize_t +fi_tinjectdata2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + uint64_t data, fi_addr_t dest_addr, uint64_t tag) +{ + return FI_CHECK_OP(ep->tagged, struct fi_ops_tagged, injectdata2) ? + ep->tagged->injectdata2(ep, buf, len, desc, data, dest_addr, tag) : + -FI_ENOSYS; +} + #endif #ifdef __cplusplus diff --git a/man/fi_atomic.3.md b/man/fi_atomic.3.md index 0797f4d39fa..c7556939a6e 100644 --- a/man/fi_atomic.3.md +++ b/man/fi_atomic.3.md @@ -47,6 +47,11 @@ ssize_t fi_inject_atomic(struct fid_ep *ep, const void *buf, uint64_t addr, uint64_t key, enum fi_datatype datatype, enum fi_op op); +ssize_t fi_inject_atomic2(struct fid_ep *ep, const void *buf, + size_t count, void *desc, fi_addr_t dest_addr, + uint64_t addr, uint64_t key, + enum fi_datatype datatype, enum fi_op op); + ssize_t fi_fetch_atomic(struct fid_ep *ep, const void *buf, size_t count, void *desc, void *result, void *result_desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key, @@ -429,6 +434,11 @@ has not been configured with FI_SELECTIVE_COMPLETION. See the flags discussion below for more details. The requested message size that can be used with fi_inject_atomic is limited by inject_size. +The fi_inject_atomic2 call is similar to fi_inject_atomic, but allows +passing a descriptor associated with the data buffer. This is especially +useful when FI_HMEM support is enabled and the buffer is registered with +FI_HMEM iface other than FI_HMEM_SYSTEM. + The fi_atomicmsg call supports atomic functions over both connected and connectionless endpoints, with the ability to control the atomic operation per call through the use of flags. The fi_atomicmsg diff --git a/man/fi_msg.3.md b/man/fi_msg.3.md index 4b6e67cf876..65f9a488139 100644 --- a/man/fi_msg.3.md +++ b/man/fi_msg.3.md @@ -42,11 +42,17 @@ ssize_t fi_sendmsg(struct fid_ep *ep, const struct fi_msg *msg, ssize_t fi_inject(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr); +ssize_t fi_inject2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + fi_addr_t dest_addr); + ssize_t fi_senddata(struct fid_ep *ep, const void *buf, size_t len, void *desc, uint64_t data, fi_addr_t dest_addr, void *context); ssize_t fi_injectdata(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr); + +ssize_t fi_injectdata2(struct fid_ep *ep, const void *buf, size_t len, void *desc, + uint64_t data, fi_addr_t dest_addr); ``` # ARGUMENTS @@ -173,6 +179,13 @@ to write CQ entries for all successful completions. See the flags discussion below for more details. The requested message size that can be used with fi_inject is limited by inject_size. +## fi_inject2 + +The fi_inject2 call is similar to fi_inject, but allows passing a descriptor +associated with the send buffer. This is especially useful when FI_HMEM +support is enabled and the buffer is registered with FI_HMEM iface other +than FI_HMEM_SYSTEM. + ## fi_senddata The send data call is similar to fi_send, but allows for the sending @@ -185,6 +198,13 @@ The inject data call is similar to fi_inject, but allows for the sending of remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer. +## fi_injectdata2 + +The fi_injectdata2 call is similar to fi_injectdata, but allows passing a +descriptor associated with the send buffer. This is especially useful when +FI_HMEM support is enabled and the buffer is registered with FI_HMEM iface +other than FI_HMEM_SYSTEM. + ## fi_recv The fi_recv call posts a data buffer to the receive queue of the diff --git a/man/fi_rma.3.md b/man/fi_rma.3.md index 6130c82029e..92e8f9eda2a 100644 --- a/man/fi_rma.3.md +++ b/man/fi_rma.3.md @@ -45,12 +45,19 @@ ssize_t fi_writemsg(struct fid_ep *ep, const struct fi_msg_rma *msg, ssize_t fi_inject_write(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t addr, uint64_t key); +ssize_t fi_inject_write2(struct fid_ep *ep, const void *buf, size_t len, + void *desc, fi_addr_t dest_addr, uint64_t addr, uint64_t key); + ssize_t fi_writedata(struct fid_ep *ep, const void *buf, size_t len, void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key, void *context); ssize_t fi_inject_writedata(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr, uint64_t addr, uint64_t key); + +ssize_t fi_inject_writedata2(struct fid_ep *ep, const void *buf, size_t len, + void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t addr, + uint64_t key); ``` # ARGUMENTS @@ -180,6 +187,13 @@ struct fi_rma_iov { The write inject call is an optimized version of fi_write. It provides similar completion semantics as fi_inject [`fi_msg`(3)](fi_msg.3.html). +## fi_injectwrite2 + +The fi_injectwrite2 call is similar to fi_injectwrite, but allows passing a +descriptor associated with the source buffer. This is especially useful when +FI_HMEM support is enabled and the buffer is registered with FI_HMEM iface +other than FI_HMEM_SYSTEM. + ## fi_writedata The write data call is similar to fi_write, but allows for the sending @@ -192,6 +206,13 @@ The inject write data call is similar to fi_inject_write, but allows for the sen of remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer. +## fi_injectwritedata2 + +The fi_injectwritedata2 call is similar to fi_injectwritedata, but allows +passing a descriptor associated with the source buffer. This is especially +useful when FI_HMEM support is enabled and the buffer is registered with +FI_HMEM iface other than FI_HMEM_SYSTEM. + ## fi_read The fi_read call requests that the remote endpoint transfer data from diff --git a/man/fi_tagged.3.md b/man/fi_tagged.3.md index 901a2b648cc..9c5da849a2c 100644 --- a/man/fi_tagged.3.md +++ b/man/fi_tagged.3.md @@ -43,12 +43,18 @@ ssize_t fi_tsendmsg(struct fid_ep *ep, const struct fi_msg_tagged *msg, ssize_t fi_tinject(struct fid_ep *ep, const void *buf, size_t len, fi_addr_t dest_addr, uint64_t tag); +ssize_t fi_tinject2(struct fid_ep *ep, const void *buf, size_t len, + void *desc, fi_addr_t dest_addr, uint64_t tag); + ssize_t fi_tsenddata(struct fid_ep *ep, const void *buf, size_t len, void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t tag, void *context); ssize_t fi_tinjectdata(struct fid_ep *ep, const void *buf, size_t len, uint64_t data, fi_addr_t dest_addr, uint64_t tag); + +ssize_t fi_tinjectdata2(struct fid_ep *ep, const void *buf, size_t len, + void *desc, uint64_t data, fi_addr_t dest_addr, uint64_t tag); ``` # ARGUMENTS @@ -196,6 +202,13 @@ struct fi_msg_tagged { The tagged inject call is an optimized version of fi_tsend. It provides similar completion semantics as fi_inject [`fi_msg`(3)](fi_msg.3.html). +## fi_tinject2 + +The fi_tinject2 call is similar to fi_tinject, but allows passing a descriptor +to be associated with the send buffer. This is especially useful when FI_HMEM +support is enabled and the buffer is registered with FI_HMEM iface other than +FI_HMEM_SYSTEM. + ## fi_tsenddata The tagged send data call is similar to fi_tsend, but allows for the @@ -208,6 +221,13 @@ The tagged inject data call is similar to fi_tinject, but allows for the sending of remote CQ data (see FI_REMOTE_CQ_DATA flag) as part of the transfer. +## fi_tinjectdata2 + +The fi_tinjectdata2 call is similar to fi_tinjectdata, but allows passing a +descriptor to be associated with the send buffer. This is especially useful +when FI_HMEM support is enabled and the buffer is registered with FI_HMEM +iface other than FI_HMEM_SYSTEM. + ## fi_trecv The fi_trecv call posts a data buffer to the receive queue of the