/*
 * Copyright 2021 NVIDIA Corporation.  All rights reserved.
 *
 * NOTICE TO LICENSEE:
 *
 * This source code and/or documentation ("Licensed Deliverables") are
 * subject to NVIDIA intellectual property rights under U.S. and
 * international Copyright laws.
 *
 * These Licensed Deliverables contained herein is PROPRIETARY and
 * CONFIDENTIAL to NVIDIA and is being provided under the terms and
 * conditions of a form of NVIDIA software license agreement by and
 * between NVIDIA and Licensee ("License Agreement") or electronically
 * accepted by Licensee.  Notwithstanding any terms or conditions to
 * the contrary in the License Agreement, reproduction or disclosure
 * of the Licensed Deliverables to any third party without the express
 * written consent of NVIDIA is prohibited.
 *
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
 * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE.  IT IS
 * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
 * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
 * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
 * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
 * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
 * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
 * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
 * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
 * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
 * OF THESE LICENSED DELIVERABLES.
 *
 * U.S. Government End Users.  These Licensed Deliverables are a
 * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
 * 1995), consisting of "commercial computer software" and "commercial
 * computer software documentation" as such terms are used in 48
 * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
 * only as a commercial end item.  Consistent with 48 C.F.R.12.212 and
 * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
 * U.S. Government End Users acquire the Licensed Deliverables with
 * only those rights set forth herein.
 *
 * Any use of the Licensed Deliverables in individual and commercial
 * software must include, in the user documentation and internal
 * comments to the code, the above Disclaimer and U.S. Government End
 * Users Notice.
 */

#ifndef CAL_HEADER_
#define CAL_HEADER_

/*! \file cal.h
    \brief Communication Abstraction Library API header

    Header provides interfaces to communication functionality.
*/

#include "cuda_runtime.h"
#include "stdint.h"

#define CAL_VER_MAJOR 0
#define CAL_VER_MINOR 4
#define CAL_VER_PATCH 3
#define CAL_VER_BUILD 0
#define CAL_VERSION (CAL_VER_MAJOR * 1000 + CAL_VER_MINOR * 100 + CAL_VER_PATCH)

#if defined(__cplusplus)
extern "C"
{
#endif

/**
 * Return errors from CAL API
 */
typedef enum
{
    CAL_OK = 0,                      /// Success
    CAL_ERROR_INPROGRESS = 1,        /// Request is in progress
    CAL_ERROR = 2,                   /// Generic error
    CAL_ERROR_INVALID_PARAMETER = 3, /// Invalid parameter to the interface function.
    CAL_ERROR_INTERNAL = 4,          /// Internal error
    CAL_ERROR_CUDA = 5,              /// Error in CUDA runtime/driver API
    CAL_ERROR_UCC = 6,               /// Error in UCC call
    CAL_ERROR_NOT_SUPPORTED = 7,     /// Requested configuration or parameters are not supported
} calError_t;

/**
 * Communicator handle, stores device endpoint and resources related to communication
 */
// struct cal_comm;
typedef struct cal_comm* cal_comm_t;

typedef struct cal_comm_create_params
{
    calError_t (*allgather)(void* src_buf, void* recv_buf, size_t size, void* data, void** request);
    calError_t (*req_test)(void* request);
    calError_t (*req_free)(void* request);
    void* data;
    int   nranks;
    int   rank;
    int   local_device;
} cal_comm_create_params_t;

/**
 * Single communicator handle initialization, collective call (all
 * calls to this function for all ranks must progress concurrently).
 *
 * \param[in]   uid_buffer      UID generated by one of the ranks. Should be the same for this call
 *                              for each rank in the initialization routine.
 * \param[in]   num_ranks       Total amount of ranks in the communicator
 * \param[in]   rank            Rank number to be assigned to the returned communicator handle.
 *                              This number should be unique for each call in this collective
 *                              initialization routine. rank should be the number in the
 *                              range [0, num_ranks-1]
 * \param[in]   local_device    Local device id that will be assigned to CAL communicator. Should be same
 *                              as device of active context.
 * \param[out]  new_comm        Pointer where to store new CAL communicator handle
 * \returns                     CAL Return code
 */
calError_t cal_comm_create(cal_comm_create_params_t params, cal_comm_t* new_comm);

/**
 * Blocks calling thread until all of the outstanding device operations are finished in `stream`.
 * This includes outstanding CAL operations submitted to this stream.
 *
 * \param[in]   comm            CAL Communicator handle
 * \param[in]   stream          Stream to synchronize with
 * \returns                     CAL Return code
 */
calError_t cal_stream_sync(cal_comm_t comm, cudaStream_t stream);

/**
 * Synchronizes streams from all processing elements such as work submitted to the stream after
 * this call will continue only after each of the stream in all other processing elements will arrive to this call
 * (all to all synchronization).
 *
 * \param[in]   comm            CAL Communicator handle
 * \param[in]   stream          Stream in which barrier will be placed.
 * \returns                     CAL Return code
 */
calError_t cal_comm_barrier(cal_comm_t comm, cudaStream_t stream);

/**
 * Releases resources associated with provided communicator handle
 *
 * \param[in]   comm            CAL Communicator handle to release
 * \returns                     CAL Return code
 */
calError_t cal_comm_destroy(cal_comm_t comm);

/**
 * Retrieve processing element rank assigned to communicator, will be from 0 to `n_pe-1`
 *
 * \param[in]   comm            CAL Communicator handle
 * \param[out]  rank            Rank ID
 * \returns                     CAL Return code
 */
calError_t cal_comm_get_rank(cal_comm_t comm, int* rank);

/**
 * Retrieve number of processing elements associated with comm.
 *
 * \param[in]   comm            CAL Communicator handle
 * \param[out]  size            Number of processing elements
 * \returns                     CAL Return code
 */
calError_t cal_comm_get_size(cal_comm_t comm, int* size);

#if defined(__cplusplus)
}
#endif

#endif // CAL_HEADER_
