Accelerator

Defines

DO_PRAGMA_(x)
DO_PRAGMA(x)
thread_num(a)
thread_max(a)
set_threads(a)
in_thread_parallel_region(a)
thread_for(i, num, ...)
thread_for3d(i1, n1, i2, n2, i3, n3, ...)
thread_for2d(i1, n1, i2, n2, ...)
strong_inline
gen_lambda1(iter1, _num1, ...)
accelerator_for_gen(thrDim, blockDim, options, iter1, num1, ...)
gen_lambda2(iter1, _num1, iter2, _num2, ...)
accelerator_for_2d_gen(thrDim, blockDim, options, iter1, num1, iter2, num2, ...)
gen_lambda3(iter1, _num1, iter2, _num2, iter3, _num3, ...)
accelerator_for_3d_gen(thrDim, blockDim, options, iter1, num1, iter2, num2, iter3, num3, ...)
gen_lambda4(iter1, _num1, iter2, _num2, iter3, _num3, iter4, _num4, ...)
accelerator_for_4d_gen(thrDim, blockDim, options, iter1, num1, iter2, num2, iter3, num3, iter4, num4, ...)
gen_lambda5(iter1, _num1, iter2, _num2, iter3, _num3, iter4, _num4, iter5, _num5, ...)
accelerator_for_5d_gen(thrDim, blockDim, options, iter1, num1, iter2, num2, iter3, num3, iter4, num4, iter5, num5, ...)
accelerator_for3dNB(iter1, num1, iter2, num2, iter3, num3, block2, ...)
accelerator_for3dNB_shm(iter1, num1, iter2, num2, iter3, num3, block2, shm_size, ...)
accelerator_for_1_3_NB_shm(iter1, num1, iter2, num2, iter3, num3, iter4, num4, block2, shm_size, ...)
accelerator_for_1_3_shm(iter1, num1, iter2, num2, iter3, num3, iter4, num4, block2, shm_size, ...)
accelerator_for_1_3_NB(iter1, num1, iter2, num2, iter3, num3, iter4, num4, block2, ...)
accelerator_for_1_3(iter1, num1, iter2, num2, iter3, num3, iter4, num4, block2, ...)
accelerator_for_2_3_NB_shm(iter1, num1, iter2, num2, iter3, num3, iter4, num4, iter5, num5, shm_size, ...)
accelerator_for_2_3_shm(iter1, num1, iter2, num2, iter3, num3, iter4, num4, iter5, num5, shm_size, ...)
accelerator_for3d(iter1, num1, iter2, num2, iter3, num3, block2, ...)
accelerator_for2dNB(iter1, num1, iter2, num2, block2, ...)
accelerator_for2d(iter1, num1, iter2, num2, block2, ...)
accelerator_forNB(iter1, num1, ...)
accelerator_for(iter, num, ...)
accelerator_for3d_shm(iter1, num1, iter2, num2, iter3, num3, block2, shm_size, ...)
accelerator_for2dNB_shm(iter1, num1, iter2, num2, block2, shm_size, ...)
accelerator_for2d_shm(iter1, num1, iter2, num2, block2, shm_size, ...)
accelerator_forNB_shm(iter1, num1, shm_size, ...)
accelerator_for_shm(iter, num, shm_size, ...)
autoView(ViewName, ObjName, mode)
doHost(a, ...)
doHost2(a, b, ...)
doHost3(a, b, c, ...)
doHost4(a, b, c, d, ...)

Functions

void acceleratorInit(void)
void acceleratorReport()
template<typename decompCoordPolicy, int thrDims, int blockDims, int splitBlockSize>
struct decomp

Public Types

Values:

enumerator totalDims
typedef decompCoordPolicy::itemPosContainerType itemPosContainerType

Public Functions

inline decomp(int _dim_sizes[totalDims])
inline void print()

Public Members

int decomp_sizes[6]
int dim_sizes[totalDims]
size_t total_size

Public Static Functions

template<int w> static inline accelerator_inline int coord (itemPosContainerType pos)
template<int w, bool na>
struct helper<w, false, na>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<int w>
struct helper<w, true, false>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<int w>
struct helper<w, true, true>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<typename decompCoordPolicy, int thrDims, int blockDims>
struct decomp<decompCoordPolicy, thrDims, blockDims, 1>

Public Types

Values:

enumerator totalDims
typedef decompCoordPolicy::itemPosContainerType itemPosContainerType

Public Functions

inline decomp(int _dim_sizes[totalDims])
inline void print()

Public Members

int decomp_sizes[6]
int dim_sizes[totalDims]
size_t total_size

Public Static Functions

template<int w> static inline accelerator_inline int coord (itemPosContainerType pos)
template<>
template<int w>
struct helper<w, false>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<>
template<int w>
struct helper<w, true>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<>
template<int w>
struct helper<w, true>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<>
template<int w>
struct helper<w, false>

Public Static Functions

static inline accelerator_inline int value (itemPosContainerType pos)
template<int _splitBlockSize = 1>
struct loopOptions

Public Types

Values:

enumerator splitBlockSize

Public Functions

inline loopOptions()
template<int B>
inline loopOptions<B> splitBlock()
inline loopOptions<_splitBlockSize> shm(size_t shm)
inline loopOptions<_splitBlockSize> barrier(bool doit)
inline loopOptions<_splitBlockSize> normal()

Public Members

size_t shm_size
bool do_barrier
template<typename ViewType>
struct viewDeallocator

Public Functions

inline viewDeallocator(ViewType &v)
inline ~viewDeallocator()

Public Members

ViewType &v

Public Static Functions

static inline void free(ViewType &v)

ActivationFuncs

template<typename FloatType>
class ReLU

Public Functions

void operator()(Matrix<FloatType> &x, Matrix<FloatType> *deriv = nullptr) const
template<int Dim>
void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const
template<typename FloatType>
class noActivation

Public Functions

inline void operator()(Matrix<FloatType> &x, Matrix<FloatType> *deriv = nullptr) const
template<int Dim>
inline void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const
template<typename FloatType>
class GeLU

Public Functions

template<int Dim>
void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const

Comms

Functions

Communicators &communicators()
void initializeComms(int argc, char **argv)
inline int UniqueID()
template<typename FloatType>
inline MPI_Datatype getMPIdataType()
template<typename FloatType>
inline void commsReduce(FloatType *data, size_t data_len, const MPI_Comm &comm)
template<typename FloatType>
inline void commsReduce(Vector<FloatType> &v, const MPI_Comm &comm)
template<typename FloatType>
inline void commsBroadcast(FloatType *data, size_t data_len, int from_rank, const MPI_Comm &comm)
template<typename FloatType>
inline void commsBroadcast(Vector<FloatType> &v, int from_rank, const MPI_Comm &comm)
template<typename FloatType>
inline void commsBroadcast(Matrix<FloatType> &v, int from_rank, const MPI_Comm &comm)
void waitAll(std::vector<CommsRequest> &reqs)

Wait for all comms activity on the associated requests to complete.

class Communicators

Public Functions

Communicators(int argc, char **argv)
~Communicators()
inline int worldRank() const
inline int worldNrank() const
inline int nodeRank() const
inline int nodeNrank() const
inline int ddpRank() const
inline int ddpNrank() const
inline int pipelineRank() const
inline int pipelineNrank() const
inline bool isPipelineLeader() const
inline MPI_Comm &pipelineCommunicator()
inline MPI_Comm &ddpCommunicator()
void enableNodePipelining()
void enableColorPipelining(int rank_color)
void enableGlobalPipelining()
void disableParallelism()
void enableDDPnoPipelining()
void reportSetup()

Private Functions

void setupDDPcommunicator()
void freeCommunicators()
void enableDDPnoPipeliningInternal()

Private Members

MPI_Comm pipeline_comm
MPI_Comm ddp_comm
int world_rank
int world_nrank
int node_rank
int node_nrank
int pipeline_rank
int pipeline_nrank
bool is_pipeline_leader
int ddp_rank
int ddp_nrank

Private Static Functions

static void createCommJustThisRank(int world_rank, MPI_Comm &comm)
struct PostCommActionCallback
#include <Comms.hpp>

A generic callback function applied after comms have completed.

Subclassed by PostCommActionCallbackTensorInitialize< FloatType, Dim >, PostCommActionCallbackUnlock< T >

Public Functions

virtual void performAction() = 0
inline virtual ~PostCommActionCallback()
template<typename T>
struct PostCommActionCallbackUnlock : public PostCommActionCallback
#include <Comms.hpp>

A post-comms callback to unlock a managed object.

Public Functions

inline PostCommActionCallbackUnlock(T const *v)
inline virtual void performAction() override

Public Members

T const *v
template<typename FloatType, int Dim>
struct PostCommActionCallbackTensorInitialize : public PostCommActionCallback
#include <Comms.hpp>

A post-comms callback to initialize a tensor. The associated comms should populate the “size” field.

Public Functions

inline PostCommActionCallbackTensorInitialize(std::unique_ptr<Tensor<FloatType, Dim>> &tens)
inline virtual void performAction() override

Public Members

std::unique_ptr<Tensor<FloatType, Dim>> &tens
int size[Dim]
struct CommsRequest
#include <Comms.hpp>

A comms request with a callback hook.

Public Members

MPI_Request req
std::unique_ptr<PostCommActionCallback> post

Components

DDP

Functions

template<typename FloatType>
void ddpAverage(FloatType *data, size_t len, bool pipeline_bcast = false)
template<typename FloatType>
void ddpAverage(Vector<FloatType> &v, bool pipeline_bcast = false)

DynamicModel

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
LayerWrapper<CONFIGTYPE(U), INPUTTYPE(U), LAYEROUTPUTTYPE(U)> enwrap(U &&u)
template<typename Config, typename InputType, typename LayerOutputType>
class LayerWrapperInternalBase

Public Functions

virtual LayerOutputType value(const InputType &x, EnableDeriv enable_deriv) = 0
virtual int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const = 0
virtual int nparams() const = 0
virtual size_t FLOPS(int value_or_deriv) const = 0
virtual void resizeInputBuffer(size_t to) = 0
virtual int getParams(Vector<FloatType> &into, int off) const = 0
virtual int update(int off, const Vector<FloatType> &new_params) = 0
virtual int step(int off, const Vector<FloatType> &derivs, FloatType eps) = 0
inline virtual ~LayerWrapperInternalBase()

Public Members

EXTRACT_CONFIG_TYPES
template<typename Store, typename std::enable_if<ISSTORAGE(Store), int>::type = 0>
class LayerWrapperInternal : public LayerWrapperInternalBase<Store::type::ModelConfig, Store::type::InputType, LAYEROUTPUTTYPE(Store::type)>

Public Types

typedef Store::type::FloatType FloatType
typedef Store::type::InputType InputType

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerOutputType
inline LayerWrapperInternal(Store &&layer)
inline virtual LayerOutputType value(const InputType &x, EnableDeriv enable_deriv) override
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const override
inline virtual int nparams() const override
inline virtual size_t FLOPS(int value_or_deriv) const override
inline int getParams(Vector<FloatType> &into, int off) const override
inline int update(int off, const Vector<FloatType> &new_params) override
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps) override
inline virtual void resizeInputBuffer(size_t to) override

Public Members

Store layer
template<typename Config, typename _InputType, typename _LayerOutputType>
class LayerWrapper

Public Types

typedef _InputType InputType
typedef _LayerOutputType LayerOutputType
typedef LeafTag tag

Public Functions

LayerWrapper(LayerWrapper &&r) = default
LayerWrapper &operator=(LayerWrapper &&r) = default
template<typename Store, typename std::enable_if<ISSTORAGE(Store), int>::type = 0>
inline LayerWrapper(Store &&layer)
inline LayerOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

std::unique_ptr<LayerWrapperInternalBase<Config, InputType, LayerOutputType>> layer

Embeddings

Functions

template<typename FloatType>
Tensor<FloatType, 3> embedPositionsSinusoidal(const Tensor<FloatType, 3> &in, FLOPScounter *flops = nullptr)
template<typename FloatType>
Tensor<FloatType, 2> embedPositionsSinusoidal(const Tensor<FloatType, 2> &in, FLOPScounter *flops = nullptr)

HPCortex

Init

Functions

void initialize(int argc, char **argv)

InstanceStorage

Defines

DDST(a)
ISSTORAGE(a)
INPUT_CON(BASENM, BASETYPE)

For an input forwarding reference BASETYPE&& ${BASENM}_ref, create a reference container ${BASENM}_con and a const reference to the object ${BASENM}.

Typedefs

template<typename RefType, typename OfType>
using enable_if_fwd_ref = typename std::enable_if<std::is_same<typename std::decay<RefType>::type, OfType>::value, int>::type
template<typename RefType1, typename RefType2, typename OfType>
using enable_if_fwd_ref2 = typename std::enable_if<std::is_same<typename std::decay<RefType1>::type, OfType>::value && std::is_same<typename std::decay<RefType2>::type, OfType>::value, int>::type
template<typename RefType1, typename RefType2, typename RefType3, typename OfType>
using enable_if_fwd_ref3 = typename std::enable_if<std::is_same<typename std::decay<RefType1>::type, OfType>::value && std::is_same<typename std::decay<RefType2>::type, OfType>::value && std::is_same<typename std::decay<RefType3>::type, OfType>::value, int>::type
struct StorageTag
template<typename T>
struct LeafStore

Public Types

typedef StorageTag tag
typedef T type

Public Functions

inline LeafStore(T &&v)
LeafStore(const LeafStore &r) = delete
inline LeafStore(LeafStore &&r)
inline T release()

Public Members

T v
template<typename T>
struct LeafRef

Public Types

typedef StorageTag tag
typedef T type

Public Functions

inline LeafRef(T &v)
LeafRef(const LeafRef &r) = delete
inline LeafRef(LeafRef &&r)
inline T &release()

Public Members

T &v
template<typename T>
struct deduceStorage
template<typename T>
struct deduceStorage<T&>

Public Types

typedef LeafRef<T> type
template<typename T>
struct deduceStorage<T&&>

Public Types

typedef LeafStore<T> type

Layers

Linalg

Functions

template<typename FloatType>
void thinMulMatMatTranspose_p(FloatType *out_p, const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> thinMulMatMatTranspose(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> mulMatTransposeThinMat(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> computeThinMatOuterProd(const Matrix<FloatType> &above_deriv, const Matrix<FloatType> &activation_deriv, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> axpyMatThinMat(const Matrix<FloatType> &a, const Matrix<FloatType> &b, const Vector<FloatType> &c, FLOPScounter *flops = nullptr)
template<typename FloatType>
Tensor<FloatType, 3> batch3tensorContract(const Tensor<FloatType, 3> &A, const Tensor<FloatType, 3> &B, int contract_dimA, int contract_dimB, FloatType nrm = 1.0, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
Tensor<FloatType, Dim> matrixBatchTensorAxpy(const Matrix<FloatType> &A, const Tensor<FloatType, Dim> &X, const Vector<FloatType> &Y, const int contract_dim, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
void batchTensorContractToMatrix_p(FloatType *out_p, const Tensor<FloatType, Dim> &A, const Tensor<FloatType, Dim> &B, const int preserve_dim, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
Tensor<FloatType, Dim> matrixBatchTensorContractRight(const Tensor<FloatType, Dim> &X, const Matrix<FloatType> &A, const int contract_dim, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
Tensor<FloatType, Dim> matrixBatchTensorContractLeft(const Matrix<FloatType> &A, const Tensor<FloatType, Dim> &X, const int contract_dim, FLOPScounter *flops = nullptr)

LossFunctions

Defines

CWRP

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto mse_cost(U &&u) -> CWRP
template<typename CostFunc, typename U, typename std::enable_if<ISLEAF(U) && std::is_default_constructible<CostFunc>::value, int>::type = 0>
auto cost_func_wrap(U &&u)
template<typename CostFunc, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto cost_func_wrap(U &&u, const CostFunc &cf)
template<typename Store, typename CostFunc>
class CostFuncWrapper

Public Types

typedef Store::type ModelType
typedef ModelType::FloatType FloatType
typedef ModelType::InputType InputType
typedef CostFunc::PredictionType PredictionType
typedef CostFunc::ComparisonType ComparisonType

Public Functions

inline CostFuncWrapper(Store &&leaf, const CostFunc &cost = CostFunc())
inline ModelType &getModel()

Access the underlying model.

inline FloatType loss(const InputType &x, const ComparisonType &y, EnableDeriv enable_deriv = DerivNo)
inline Vector<FloatType> deriv() const
inline PredictionType predict(const InputType &x)
template<typename _PredictionType = PredictionType, typename _InputType = InputType, int TensDimIn = _InputType::Dimension, int TensDimOut = _PredictionType::Dimension, typename std::enable_if<std::is_same<_PredictionType, Tensor<FloatType, TensDimOut>>::value && std::is_same<_InputType, Tensor<FloatType, TensDimIn>>::value && std::is_same<_PredictionType, ComparisonType>::value, int>::type = 0>
inline Tensor<FloatType, TensDimOut - 1> predict(const Tensor<FloatType, TensDimIn - 1> &x, int batch_size)
inline void update(const Vector<FloatType> &new_params)
inline void step(const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline Vector<FloatType> getParams() const

Private Members

Store leaf
PredictionType ypred
ComparisonType yval
CostFunc cost
int nparam
template<typename OutputType>
class MSEcostFunc
template<typename FloatType, int Dim>
class MSEcostFunc<Tensor<FloatType, Dim>>

Public Types

typedef Tensor<FloatType, Dim> DataType
typedef DataType ComparisonType
typedef DataType PredictionType

Public Static Functions

static FloatType loss(const ComparisonType &y, const PredictionType &ypred)
static PredictionType layer_deriv(const ComparisonType &y, const PredictionType &ypred)

ManagedArray

template<typename FloatType>
class ManagedArray

Public Functions

inline ManagedArray()
inline ManagedArray(size_t size, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)
inline ManagedArray(size_t size, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)
inline ManagedArray(const std::vector<FloatType> &init)
inline ManagedArray(ManagedArray &&r)
inline ManagedArray(const ManagedArray &r)
inline ManagedArray &operator=(ManagedArray &&r)
inline ManagedArray &operator=(const ManagedArray &r)
inline size_t size() const
inline View view(ViewMode mode) const
inline ~ManagedArray()
inline void fill(FloatType init, MemoryManager::Pool assign_pool = MemoryManager::Pool::DevicePool)
inline void lock() const
inline void unlock() const
inline bool deviceResident() const

Private Members

MemoryManager::HandleIterator handle
size_t _size
class View

Subclassed by Tensor< _FloatType, Dim >::View

Public Functions

inline accelerator_inline size_t size () const
inline accelerator_inline FloatType * data () const
inline accelerator_inline FloatType & operator[] (const size_t i) const
inline View(ViewMode mode, MemoryManager::HandleIterator handle, size_t _size)
inline View(ViewMode mode, const ManagedArray &parent)
inline void free()

Private Members

FloatType *v
size_t _size
MemoryManager::HandleIterator handle
template<typename T>
class ManagedTypeArray
#include <ManagedArray.hpp>

A container representing an array of managed objects. The associated view allows accessing views of the individual elements by index.

Public Functions

inline ManagedTypeArray()
inline ManagedTypeArray(int size)
inline T &operator[](const int i)
inline const T &operator[](const int i) const
inline View view(ViewMode mode) const
inline int size() const
template<typename ElemConstructFunc>
inline void resize(int size, const ElemConstructFunc &construct)

Resize the array, using a lambda function to construct in-place each element. The lambda should take the element index and return the element.

inline void resize(int size)

Private Types

typedef ManagedArray<typename T::View> ElemViewArray

Private Members

std::vector<T> elems
mutable ElemViewArray tv
struct View : public ElemViewArray::View

Public Functions

inline View(ViewMode mode, ElemViewArray &parent)
inline void free()

Public Members

ElemViewArray *parent_p

MemoryManager

Enums

enum ViewMode

Values:

enumerator HostRead
enumerator HostWrite
enumerator DeviceRead
enumerator DeviceWrite
enumerator HostReadWrite
enumerator DeviceReadWrite
enum Locale

Used to select a locale for an operation where applicable.

Values:

enumerator Host
enumerator Device
enumerator Auto

Functions

inline std::string memPoolManagerReport(bool detailed = false)
class MemoryManager

Public Types

enum Pool

Values:

enumerator DevicePool
enumerator HostPool
typedef std::list<Entry>::iterator EntryIterator
typedef std::list<Handle>::iterator HandleIterator

Public Functions

inline MemoryManager()
inline MemoryManager(size_t max_size_device, size_t max_size_host)
~MemoryManager()
inline void setVerbose(bool to)
void enableIOlogging()
inline void setDiskRoot(const std::string &to)
inline const std::string &getDiskRoot() const
inline void enableDeletionOfLocalDiskDataOnRestore(bool val = true)
inline void setPoolMaxSize(size_t to, Pool pool)
inline size_t getAllocatedBytes(Pool pool) const
size_t getDiskCachedBytes() const
size_t getDiskUsedBytes() const
std::string report(bool detailed = false) const
void evictToDisk(HandleIterator h)
HandleIterator allocate(size_t bytes, Pool pool = DevicePool)
void *openView(ViewMode mode, HandleIterator h)
void closeView(HandleIterator h)
void free(HandleIterator h)
inline size_t nOpenHandles() const
inline void lock(HandleIterator h)
inline void unlock(HandleIterator h)

Public Static Functions

static inline MemoryManager &globalPool()

Protected Functions

inline std::list<Entry> &getLRUpool(Pool pool)
inline std::map<size_t, std::list<Entry>, std::greater<size_t>> &getFreePool(Pool pool)
inline std::string poolName(Pool pool)
EntryIterator allocEntry(size_t bytes, Pool pool)
void sanityCheck()
void moveEntryToFreePool(EntryIterator it, Pool pool)
void freeEntry(EntryIterator it, Pool pool)
void deallocateFreePool(Pool pool, size_t until_allocated_lte = 0)
EntryIterator getEntry(size_t bytes, Pool pool)
void attachEntry(Handle &handle, Pool pool)
void touchEntry(Handle &handle, Pool pool)
void syncDeviceToHost(Handle &handle)
void syncHostToDevice(Handle &handle)
void syncHostToDisk(Handle &handle)
void syncDiskToHost(Handle &handle)
void syncForRead(Handle &handle, Pool pool)
void markForWrite(Handle &handle, Pool pool)
void prepareEntryForView(Handle &handle, Pool pool)
EntryIterator evictEntry(EntryIterator entry, bool free_it, Pool pool)
void removeDiskData(Handle &handle, bool in_memory_check = true)

Protected Attributes

bool verbose
std::ofstream *io_logger
std::list<Handle> handles
std::list<Entry> device_in_use_pool
std::map<size_t, std::list<Entry>, std::greater<size_t>> device_free_pool
std::list<HandleIterator> device_queued_prefetches
std::list<Entry> host_in_use_pool
std::map<size_t, std::list<Entry>, std::greater<size_t>> host_free_pool
std::list<HandleIterator> host_queued_prefetches
size_t device_allocated
size_t host_allocated
size_t device_pool_max_size
size_t host_pool_max_size
size_t local_disk_allocated
size_t device_allocated_HWM
size_t host_allocated_HWM
size_t local_disk_allocated_HWM
std::string disk_root
bool delete_local_diskdata_on_restore

Protected Static Functions

static void summarizePoolStatus(std::ostream &os, const std::string &descr, const std::map<size_t, std::list<Entry>, std::greater<size_t>> &pool_stat)
static void summarizePoolStatus(std::ostream &os, const std::string &descr, const std::map<size_t, int, std::greater<size_t>> &pool_stat)
struct Entry

Public Members

size_t bytes
void *ptr
Handle *owned_by
struct Handle

Public Functions

inline Handle()

Public Members

size_t lock_entry
bool device_valid
EntryIterator device_entry
bool host_valid
EntryIterator host_entry
size_t bytes
bool device_in_sync
bool host_in_sync
bool disk_in_sync
std::string disk_file
bool disk_file_exists
bool device_prefetch_underway
bool initialized

Optimizers

Functions

template<typename DataLoader, typename LossWrappedModelType, typename Optimizer>
std::vector<typename LossWrappedModelType::FloatType> train(LossWrappedModelType &loss_func, const DataLoader &data, Optimizer &optimizer, int nepoch, int batch_size, bool suppress_logging = false)

Train a model using DDP, whereby batches of data are distributed over ranks of the DDP communicator and trained in parallel.

DataLoaders are expected to contain the following methods: size_t size() const : return the total amount of data BatchType batch(int const* indices, int batch_size) const : return the batch with batch size and indices as specified. BatchType must contain members ‘x’ and ‘y’, which are taken as the inputs to the model’s loss function

Parameters:
  • loss_func – The model wrapper in a loss-function wrapper supporting calls to compute the loss and the loss derivative given a input/output data batch

  • data – The training data loader, the spec for which is provided below

  • optimizer – The optimizer

  • nepoch – The number of epochs

  • batch_size – The batch size

  • suppress_logging – Optionally suppress logging output

Returns:

The complete loss history for all batches / epochs

template<typename DataLoader, typename LossWrappedModelType, typename Optimizer>
std::pair<std::vector<typename LossWrappedModelType::FloatType>, std::vector<typename LossWrappedModelType::FloatType>> train(LossWrappedModelType &loss_func, const DataLoader &train_data, const DataLoader &valid_data, Optimizer &optimizer, int nepoch, int batch_size, bool suppress_logging = false)

Train and validate model using DDP, whereby batches of data are distributed over ranks of the DDP communicator and trained in parallel.

Parameters:
  • loss_func – The model wrapper in a loss-function wrapper supporting calls to compute the loss and the loss derivative given a input/output data batch

  • train_data – The training data loader (cf above for spec)

  • valid_data – The validation data loader

  • optimizer – The optimizer

  • nepoch – The number of epochs

  • batch_size – The batch size

  • suppress_logging – Optionally suppress logging output

Returns:

The complete loss history for all batches / epochs for training (first) and validation (second)

template<typename FloatType, int DimX, int DimY>
inline XYpair<FloatType, DimX + 1, DimY + 1> batchData(int const *indices, int batch_size, const std::vector<XYpair<FloatType, DimX, DimY>> &data)
template<typename FloatType>
struct noScheduler

Public Functions

inline noScheduler(FloatType lr)
inline FloatType operator()(const int epoch) const

Public Members

FloatType lr
template<typename FloatType, typename LRscheduler = noScheduler<FloatType>>
class GradientDescentOptimizer

Public Functions

inline GradientDescentOptimizer(const LRscheduler &sched)
template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0>
inline GradientDescentOptimizer(FloatType lr)
inline void epochStart(int epoch, bool verbose = true)
inline Vector<FloatType> descentProfile(FloatType &step_size, const Vector<FloatType> &deriv) const

Private Members

LRscheduler sched
FloatType eps
template<typename FloatType>
struct AdamParams

Public Functions

inline AdamParams(FloatType beta1 = 0.99, FloatType beta2 = 0.999, FloatType eps = 1e-8)

Public Members

FloatType beta1
FloatType beta2
FloatType eps
template<typename FloatType>
struct AdamState

Public Members

FloatType alpha
size_t t
Vector<FloatType> m
Vector<FloatType> v
template<typename FloatType, typename LRscheduler = noScheduler<FloatType>>
class AdamOptimizer

Public Functions

inline AdamOptimizer(const AdamParams<FloatType> &ap, const LRscheduler &sched)
inline AdamOptimizer(const LRscheduler &sched)
template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0>
inline AdamOptimizer(const AdamParams<FloatType> &ap, FloatType lr)
template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0>
inline AdamOptimizer(FloatType lr)
inline AdamState<FloatType> getState() const
inline void setState(const AdamState<FloatType> &st)
inline void epochStart(int epoch, bool verbose = true)
inline Vector<FloatType> descentProfile(FloatType &step_size, const Vector<FloatType> &g)

Private Functions

inline void reset()
template<typename FloatType>
class DecayScheduler

Public Functions

inline DecayScheduler(FloatType eps, FloatType decay_rate)
inline FloatType operator()(const int epoch) const

Private Members

FloatType eps
FloatType decay_rate
template<typename FloatType, int DimX, int DimY>
struct XYpair

Public Members

Tensor<FloatType, DimX> x
Tensor<FloatType, DimY> y
template<typename FloatType, int DimX, int DimY>
class XYpairDataLoader

Public Functions

inline XYpairDataLoader(const std::vector<XYpair<FloatType, DimX, DimY>> &data)
inline size_t size() const
inline XYpair<FloatType, DimX + 1, DimY + 1> batch(int const *indices, int batch_size) const

Private Members

const std::vector<XYpair<FloatType, DimX, DimY>> &data

Padding

template<typename FloatType>
class NoPadding

Public Functions

template<int Dim>
inline Tensor<FloatType, Dim> padInput(const Tensor<FloatType, Dim> &in) const
template<int Dim>
inline Tensor<FloatType, 3> unpadDeriv(const Tensor<FloatType, Dim> &deriv_pad) const

Public Static Functions

static inline int layerOutputLength(int input_size, int kernel_size, int stride)
template<typename FloatType>
class SamePaddingZero1D

Public Functions

inline SamePaddingZero1D(int kernel_size, int stride = 1)
inline Tensor<FloatType, 3> padInput(const Tensor<FloatType, 3> &in) const
inline Tensor<FloatType, 3> unpadDeriv(const Tensor<FloatType, 3> &deriv_pad) const

Public Static Functions

static inline int layerOutputLength(int input_size, int kernel_size, int stride)

Private Members

int kernel_size
int stride

Performance

struct FLOPScounter

Public Functions

inline FLOPScounter()
inline size_t add(size_t v)
inline void lock()
inline bool locked() const
inline size_t value() const

Private Members

bool _locked
size_t _value

Pipelining

Warning

doxygenfile: Cannot find file “Pipelining.hpp

Random

Typedefs

typedef std::mt19937 GlobalRNGtype

Functions

inline GlobalRNGtype &globalRNG()
inline void reseedGlobalRNG(size_t seed)
template<typename FloatType, int Dim, typename Dist, typename RNG>
void random(Tensor<FloatType, Dim> &m, Dist &dist, RNG &rng)
template<typename FloatType, int Dim, typename RNG>
void uniformRandom(Tensor<FloatType, Dim> &m, RNG &rng, FloatType min = FloatType(-1.0), FloatType max = FloatType(1.0))
template<typename FloatType, int Dim>
inline void uniformRandom(Tensor<FloatType, Dim> &m, FloatType min = FloatType(-1.0), FloatType max = FloatType(1.0))
template<typename FloatType, typename RNG>
void glorotUniformRandom(Matrix<FloatType> &m, RNG &rng, FloatType gain = FloatType(1.0))
template<typename FloatType>
inline void glorotUniformRandom(Matrix<FloatType> &m, FloatType gain = FloatType(1.0))
template<typename FloatType, typename RNG>
size_t drawWeightedRandomIndex(FloatType const *weights, int nweights, size_t stride, RNG &rng)
template<typename FloatType>
size_t drawWeightedRandomIndex(FloatType const *weights, int nweights, size_t stride)

Variables

constexpr size_t default_seed = 1234

RingBuffer

Warning

doxygenfile: Cannot find file “RingBuffer.hpp

Serialization

Enums

enum class Endianness

Values:

enumerator Big
enumerator Little
enumerator System

Functions

std::string toString(const Endianness e)
Endianness endianness()
uint8_t BitReverseTable256(size_t i)
template<typename T>
inline T bitReverse(T in)
class BinaryWriter

Public Functions

BinaryWriter(const std::string &filename, const Endianness end = Endianness::System)
template<typename T, typename std::enable_if<!ISLEAF(T), int>::type = 0>
void write(const T &v)
template<typename T, typename U>
void write(const std::pair<T, U> &v)
template<typename T>
void write(const std::vector<T> &v)
template<typename T, int Dim>
void write(const Tensor<T, Dim> &t)
template<typename Model, typename std::enable_if<ISLEAF(Model), int>::type = 0>
void write(const Model &model)
template<typename Store, typename CostFunc>
void write(const CostFuncWrapper<Store, CostFunc> &model)
template<typename T>
inline void close()

Private Members

std::ofstream of
bool do_flip
class BinaryReader

Public Functions

BinaryReader(const std::string &filename)
template<typename T, typename std::enable_if<!ISLEAF(T), int>::type = 0>
void read(T &v)
template<typename T, typename U>
void read(std::pair<T, U> &v)
template<typename T>
void read(std::vector<T> &v)
template<typename T, int Dim>
void read(Tensor<T, Dim> &t)
template<typename Model, typename std::enable_if<ISLEAF(Model), int>::type = 0>
void read(Model &model)
template<typename Store, typename CostFunc>
void read(CostFuncWrapper<Store, CostFunc> &model)
inline void close()

Private Functions

template<typename T>
inline T readValue()

Private Members

std::ifstream of
bool do_flip

Tensors

Defines

_1D_TENSOR_ONLY
_2D_TENSOR_ONLY
_3D_TENSOR_ONLY
_4D_TENSOR_ONLY

Typedefs

template<typename FloatType>
using Vector = Tensor<FloatType, 1>

Alias vector to 1D tensor.

template<typename FloatType>
using Matrix = Tensor<FloatType, 2>

Alias matrix to 2D tensor.

Functions

template<size_t Dim> accelerator_inline size_t tensorSize (int const *dims)

Compute the linear size of a tensor of dimension “Dim” and the provided dimensions.

Parameters:

dims – The tensor dimension (array of size Dim)

template<size_t Dim> accelerator_inline size_t tensorOffset (int const *coord, int const *dims)

Compute the linear (pointer) offset of a specific coordate within a tensor of dimension “Dim” and the provided dimensions.

Parameters:
  • coor – The coordinate (array of size Dim)

  • dims – The tensor dimension (array of size Dim)

template<size_t Dim> accelerator_inline void tensorOffsetUnmap (int *coord, int const *dims, size_t offset)

Compute the coordinate associated with a specific linear (pointer) offset for a tensor of dimension “Dim” and the provided dimensions.

Parameters:
  • coord[out] The tensor coordinate (array of size Dim)

  • dims[in] The tensor dimension (array of size Dim)

  • offset[in] The input linear offset

template<int Dim> accelerator_inline size_t tensorDimensionStride (int iter_dim, int const *size)

Compute the stride for iterating over a specific dimension for a tensor of dimension “Dim” with the provided dimensions.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t tensorDimensionBaseLin (int iter_dim, size_t other_dim_lin, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension of a tensor of dimension “Dim”.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • other_dim_lin – The coordinates in dimensions apart from iter_dim expressed as a lexicographic linear index in descending order, e.g. z + size_z * (y + size_y * x)

  • size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t tensorDimensionBase (int iter_dim, int const *other_coord, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension of a tensor of dimension “Dim”.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • other_coor – The coordinates for the other dimensions (array of size Dim-1)

  • size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t batchTensorDimensionBaseLin (int iter_dim, int batch_idx, size_t other_dim_lin, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension for a batch-tensor (last dim is the batch dimension) of dimension “Dim”.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • batch_idx – The batch index (coordinate in last dimension)

  • other_dim_lin – The coordinates in dimensions apart from iter_dim and Dim-1 expressed as a lexicographic linear index in descending order, e.g. z + size_z * (y + size_y * x)

  • size – The tensor dimension (array of size Dim)

template<typename FloatType>
void pokeColumn(Matrix<FloatType> &into, int col, const Vector<FloatType> &data)

Insert a vector as particular column of a matrix, i.e. into(:,col) = data(:)

Parameters:
  • The – target matrix

  • col – The column index

  • data – The input column

template<typename FloatType>
void pokeRow(Matrix<FloatType> &into, int row, const Vector<FloatType> &data)

Insert a vector as particular row of a matrix, i.e. into(row,:) = data(:)

Parameters:
  • The – target matrix

  • row – The row index

  • data – The input row

template<typename FloatType>
Vector<FloatType> peekColumn(const Matrix<FloatType> &m, int col)

Retrieve a specific column of a matrix m, i.e. return m(:,col)

Parameters:
  • m – The matrix

  • col – The column index

template<typename FloatType>
Matrix<FloatType> peekColumns(const Matrix<FloatType> &m, int col_start, int col_end)

Retrieve multiple consecutive columns of a matrix m, i.e. return m(:,col_start:col_end+1)

Parameters:
  • m – The matrix

  • col_start – The first column index

  • col_end – The last column index

template<typename FloatType>
void pokeColumns(Matrix<FloatType> &into, int col_start, int col_end, const Matrix<FloatType> &cols)

Insert multiple consecutive columns of a matrix m, i.e. into(:,col_start:col_end+1) = cols(:,:)

Parameters:
  • into – The matrix in which to insert the columns

  • col_start – The first column index

  • col_end – The last column index

  • cols – The matrix containing the columns (#cols = col_end-col_start+1)

template<int Dim, typename FloatType>
Tensor<FloatType, Dim> dimensionSlice(const Tensor<FloatType, Dim> &from, const std::vector<int> &indices, const int dimension, Locale loc = Auto)

Extract a slice/subset of a tensor based on indices in a given dimension, e.g. for a 3-tensor X and dimension=1, return X[:,indices,:].

Parameters:
  • from – The tensor to slice

  • indices – The indices along the slice dimension to retain

  • dimension – The dimension along which to slice

  • loc – The locale in which the operation is performed. If set to Auto (default) it will be performed on the device if from is device-resident, else on the host

template<int Dim, typename FloatType>
Tensor<FloatType, Dim> dimensionSlice(const Tensor<FloatType, Dim> &from, int const *indices, int nidx, const int dimension, Locale loc = Auto)

Extract a slice/subset of a tensor based on indices in a given dimension, e.g. for a 3-tensor X and dimension=1, return X[:,indices,:].

Parameters:
  • from – The tensor to slice

  • indices – A host pointer to the array of indices along the slice dimension to retain

  • nidx – The number of indices / size of the sliced output dimension

  • dimension – The dimension along which to slice

  • loc – The locale in which the operation is performed. If set to Auto (default) it will be performed on the device if from is device-resident, else on the host

template<int Dim, typename FloatType>
normalization<FloatType, Dim - 1> normalize(Tensor<FloatType, Dim> &tens, const int dimension, Locale loc = Auto, FloatType epsilon = FloatType(0.))

Normalize a tensor along a specific dimension. The normalization factors (mean, std) will be returned for each orthogonal dimension.

Parameters:
  • tens – The tensor to normalize

  • dimension – The dimension along which to normalize

  • loc – The locale on which the operation is performed. If set to Auto (default), it will be performed on the device if the tensor is device-resident, else on the host

  • epsilon – A small offset for numerical stability

template<int Dim, typename FloatType>
void unnormalize(Tensor<FloatType, Dim> &tens, const int dimension, const normalization<FloatType, Dim - 1> &nrm, Locale loc = Auto)

Unnormalize a tensor along a specific dimension using the pre-computed normalization factors (mean, std)

Parameters:
  • tens – The tensor to normalize

  • dimension – The dimension along which to normalize

  • nrm – The precomputed normalization factors

  • loc – The locale on which the operation is performed. If set to Auto (default), it will be performed on the device if the tensor is device-resident, else on the host

template<typename FloatType>
Matrix<FloatType> transpose(const Matrix<FloatType> &m, Locale loc = Auto)

Transpose a matrix. If loc = Auto (default), the operation will be performed on the device, else on the host.

template<typename FloatType>
std::ostream &operator<<(std::ostream &os, const Vector<FloatType> &v)

Output a vector to a stream.

template<typename FloatType>
std::ostream &operator<<(std::ostream &os, const Matrix<FloatType> &v)

Output a matrix to a stream.

template<typename FloatType>
Vector<FloatType> operator*(const Matrix<FloatType> &A, const Vector<FloatType> &x)

Perform the matrix-vector product of A and x.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> &operator+=(Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Addition-assignment operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> operator+(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Addition operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> &operator-=(Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Subtraction-assignment operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> operator-(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Subtraction operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> &operator*=(Tensor<FloatType, Dim> &a, FloatType eps)

Scalar multiplication-assignment operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> operator*(FloatType eps, const Tensor<FloatType, Dim> &b)

Scalar left-multiplication operator for tensors.

template<typename FloatType, int Dim>
inline Tensor<FloatType, Dim> operator*(const Tensor<FloatType, Dim> &b, FloatType eps)

Scalar right-multiplication operator for tensors.

template<int Dim, typename FloatType>
Vector<FloatType> flatten(const Tensor<FloatType, Dim> &t)

“Flatten” a tensor into a vector. The output mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
FloatType *flatten(FloatType *host_ptr, const Tensor<FloatType, Dim> &in)

“Flatten” a tensor into a pre-allocated host array and return the pointer to the element of the array one past the flattened tensor.

Parameters:
  • host_ptr – The host array destination. The output mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

  • in – The input tensor

Returns:

A pointer to the element of the array one past the flattened tensor. note, the copy is performed on the host side

template<int Dim, typename FloatType>
void unflatten(Tensor<FloatType, Dim> &out, const Vector<FloatType> &t)

“Unflatten” vector into tensor

Parameters:
  • out – The output tensor. Its dimensions should be set correctly prior to calling this function

  • t – The input vector. The input mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
FloatType const *unflatten(Tensor<FloatType, Dim> &out, FloatType const *host_ptr)

“Unflatten” a tensor from a pre-allocated host array and return the pointer to the element of the array one past the flattened tensor.

Parameters:
  • out – The output tensor. Its dimensions should be set correctly prior to calling this function

  • host_ptr – The input array pointer. The input mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

Returns:

A pointer to the element of the array one past the flattened tensor. note, the copy is performed on the host side

template<int Dim1, int Dim2, typename FloatType>
Vector<FloatType> flatten2(const Tensor<FloatType, Dim1> &t1, const Tensor<FloatType, Dim2> &t2)

Flatten two tensors into a single contiguous array.

Parameters:
  • t1 – The first tensor

  • t2 – The second tensor

Returns:

An output vector of length t1.data_len() + t2.data_len(), where the elements within the sub-arrays are obtained from their corresponding tensor via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim1, int Dim2, typename FloatType>
void unflatten2(Tensor<FloatType, Dim1> &t1, Tensor<FloatType, Dim2> &t2, const Vector<FloatType> &v)

Unflatten two tensors from a single contiguous array.

The output tensor dimensions should be set appropriately prior to calling this function

Parameters:
  • t1[out] The first tensor

  • t2[out] The first tensor

  • v[in] An input vector of length t1.data_len() + t2.data_len(), where the elements within the sub-arrays map to their corresponding tensor coordinates via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
Vector<FloatType> flattenNsameDim(Tensor<FloatType, Dim> const *const *tens, int N)

Flatten N tensors of the same dimension into a single contiguous array.

Parameters:
  • tens – An array of pointers to input tensors

  • N – The number of tensors

Returns:

An output vector of length \sum_i tens[i].data_len(), where the elements within the sub-arrays are obtained from their corresponding tensor via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
void unflattenNsameDim(Tensor<FloatType, Dim> *const *tens, int N, const Vector<FloatType> &v)

Unflatten N tensors of the same dimension from a single contiguous array.

Parameters:
  • tens – The output tensor array. The tensor dimensions should be set appropriately prior to calling this function.

  • N – The number of tensors

  • v – The input vector. This must have length \sum_i tens[i].data_len(), where the elements within the sub-arrays map to their corresponding tensor coordinates via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
Tensor<FloatType, Dim> batchTensorConcatenate(Tensor<FloatType, Dim> const *const *in, int Ntens, int concat_dim)

Concatenate (stack) Ntens tensors along a dimension concat_dim < Dim-1 (last dim is assumed to be the batch index).

Dimensions other than concat_dim must all have the same size.

Parameters:
  • in – The input tensor array

  • Ntens – The number of tensors

  • concat_dim – The dimension along which the concatenation is performed

template<int Dim, typename FloatType>
void batchTensorSplit(Tensor<FloatType, Dim> *const *out, int Ntens, const Tensor<FloatType, Dim> &in, int split_dim)

Split a tensor along a dimension split_dim < Dim-1 (last dim is the batch index) into multiple tensors.

Dimensions other than split_dim must all have the same size.

Parameters:
  • out – The output tensors. These should be pre-initialized to the appropriate sizes.

  • Ntens – The number of output tensors

  • in – The input tensor

  • split_dim – The dimension along which to split

template<int Dim, typename FloatType>
double norm2(const Tensor<FloatType, Dim> &T)

Return the tensor norm^2, i.e. \sum_{i,j,k,…} T[i,j,k,…]^2.

template<typename FloatType, int Dim>
Vector<FloatType> transformBatchMatrix(int rowdim, int coldim, const Tensor<FloatType, Dim> &tens)

Interpret a batched-tensor (last dim is the batch index) as an array of matrices with the provided row and column dimensions The output data are rearranged such that these matrices are contiguous in row-major, suitable for BLAS libraries.

Parameters:
  • rowdim – The dimension of the input tensor that is interpreted as the output matrix row dimension

  • coldim – The dimension of the input tensor that is interpreted as the output matrix column dimension

  • tens – The input tensor

Returns:

An array of contiguous matrices in row-major format

template<typename FloatType, int Dim>
void untransformBatchMatrix(int rowdim, int coldim, Tensor<FloatType, Dim> &tens, Vector<FloatType> &from)

Perform the inverse operation of transformBatchMatrix, taking an array of matrices with the provided row and column dimensions and interpreting them as a batched-tensor (last dim is the batch index)

Parameters:
  • rowdim – The dimension of the output tensor that is interpreted as the output matrix row dimension

  • coldim – The dimension of the output tensor that is interpreted as the output matrix column dimension

  • tens – The output tensor, setup prior to the call to the appropriate dimension

  • from – An array of contiguous matrices in row-major format

template<typename FloatType, int Dim>
Vector<FloatType> transformBatchVector(int vecdim, const Tensor<FloatType, Dim> &tens)

Interpret a batched-tensor (last dim is the batch index) as an array of vectors with the provided vector dimension The output data are rearranged such that these matrices are contiguous in row-major, suitable for BLAS libraries.

Parameters:
  • vecdim – The dimension of the input tensor that is interpreted as the output vector dimension

  • tens – The input tensor

Returns:

An array of contiguous vectors

template<typename FloatType, int Dim>
void untransformBatchVector(int vecdim, Tensor<FloatType, Dim> &tens, const Vector<FloatType> &from)

Perform the inverse operation of transformBatchVector, taking an array of vectors with the provided vector dimension and interpreting them as a batched-tensor (last dim is the batch index)

Parameters:
  • vecdim – The dimension of the output tensor that is interpreted as the vector dimension

  • tens – The output tensor, setup prior to the call to the appropriate dimension

  • from – An array of contiguous vectors

template<typename _FloatType, int Dim>
struct Tensor
#include <Tensors.hpp>

A class for tensors of arbitrary dimension and floating point type.

Public Types

Values:

enumerator Dimension
typedef _FloatType FloatType

The floating point type

typedef const int *Dims

Array type for tensor dimensions

typedef const int *Coord

Array type for tensor coordinates

Public Functions

inline Tensor()

Default constructor for a zero-size tensor.

inline Tensor(Dims dims, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • dims – The tensor dimensions

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline Tensor(Dims dims, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a tensor with the provided dimensions uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • dims – The tensor dimensions

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline Tensor(Dims dims, const std::vector<FloatType> &init_vals)

Construct a tensor with the provided dimensions initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • dims – The tensor dimensions

  • init_vals – The initial values with lexicographic mapping in descending order, e.g. z + size_z * (y + size_y * x)

inline Tensor(Dims dims, FloatType const *init_vals)

Construct a tensor with the provided dimensions initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • dims – The tensor dimensions

  • init_vals – The initial values with lexicographic mapping in descending order, e.g. z + size_z * (y + size_y * x)

inline _1D_TENSOR_ONLY Tensor(int len, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 1D tensor (vector) with the provided length with the initial memory allocation in the provided pool.

Parameters:
  • len – The vector length

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _1D_TENSOR_ONLY Tensor(int len, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 1D tensor (vector) with the provided length uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • len – The vector length

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _1D_TENSOR_ONLY Tensor(const std::vector<FloatType> &init_vals)

Construct a 1D tensor (vector) initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

init_vals – The initial values

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 2D tensor (matrix) with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension (number of rows)

  • size1 – The size of the 2nd dimension (number of columns)

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 2D tensor (matrix) with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension (number of rows)

  • size1 – The size of the 2nd dimension (number of columns)

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocation (default: device)

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, const std::vector<FloatType> &init_vals)

Construct a 2D tensor (matrix) with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • size0 – The size of the 1st dimension (number of rows)

  • size1 – The size of the 2nd dimension (number of columns)

  • init_vals – The initial values with lexicographic mapping y + size1*x for coord (x,y)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 3D tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 3D tensor with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocation (default: device)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, const std::vector<FloatType> &init_vals)

Construct a 3D tensor with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • init_vals – The initial values with lexicographic mapping z + size2*(y + size1*x) for coord (x,y,z)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 4D tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • size3 – The size of the 4th dimension

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 4D tensor with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • size3 – The size of the 4th dimension

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocation (default: device)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, const std::vector<FloatType> &init_vals)

Construct a 4D tensor with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • size3 – The size of the 4th dimension

  • init_vals – The initial values with lexicographic mapping t + size3*(z + size2*(y + size1*x) ) for coord (x,y,z,t)

inline int const *sizeArray() const

Return the tensor dimensions as an array pointer.

inline int size(int i) const

Return the tensor size along a specific dimension.

Parameters:

i – The dimension

inline std::string sizeArrayString() const

Return the tensor dimensions as a string.

inline size_t data_len() const

Return the linear dimension (flattened size) of the tensor, or equivalently, the total number of elements.

inline View view(ViewMode mode) const

Return a view to this tensor opened with a specific view mode.

inline void lock() const

“Lock” the memory region associated with this object such that it cannot be auto-evicted to free space in a memory pool. A possible use case is to ensure a memory region remains valid while performing an asynchronouse background copy

inline void unlock() const

“Unlock the memory region, allowing it to be evicted. This is the default state.

Tensor sliceLastDimension(int idx_start, int idx_end) const

Return a tensor where the last dimension contains the slice between idx_start and idx_end (inclusive). E.g., for a 3D tensor T, return T(:,:,idx_start:idx_end+1)

void insertSliceLastDimension(const Tensor &ins, int idx_start, int idx_end) const

Insert a tensor for which the last dimension contains a slice inserted between idx_start and idx_end (inclusive). E.g., for a 3D tensor T, T(:,:,idx_start:idx_end+1) = ins(:,:,:)

void pokeLastDimension(const Tensor<FloatType, Dim - 1> &ins, const int idx)

Insert a tensor of Dim-1 such that (*this)(i,j,k,…, idx) = ins(i,j,k,…). E.g., for a 3D tensor T and 2D input I, set T[:,:,idx] = I[:,:].

Parameters:
  • ins – The Dim-1 dimensional tensor to insert

  • idx – The index in the last dimension on which to insert the tensor

Tensor<FloatType, Dim - 1> peekLastDimension(const int idx) const

Return a tensor of dimension Dim-1 such that out(i,j,k,…) = (*this)(i,j,k,…, idx). E.g., for a 3D tensor T, return T[:,:,idx].

Parameters:

idx – The index in the last dimension on which to insert the tensor

inline bool deviceResident() const

Return true if the data is resident and up-to-date on the device.

template<typename FloatTypeOut>
Tensor<FloatTypeOut, Dim> convertFloatType(Locale loc = Auto) const

Up/down-cast the floating point type. If loc == Auto and this tensor is device-resident, the copy will be made on the device, else on the host.

Public Static Functions

static inline constexpr int dimension()

Return the tensor dimension.

Private Members

ManagedArray<FloatType> vals

Memory-contiguous container for tensor data

int _size[Dim]

Tensor dimensions

class View : private ManagedArray<FloatType>::View
#include <Tensors.hpp>

The tensor View accessor class

Public Functions

inline View(ViewMode mode, const Tensor<FloatType, Dim> &parent)

Construct a view with a specific view mode and parent object.

Parameters:
  • mode – The view mode

  • parent – The parent object

inline void free()

Free the view. This must be called explicitly once the view is no longer needed.

inline accelerator_inline FloatType & operator() (const Coord coord) const

Access the tensor at the provided coordinate.

inline _1D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i) const

Access the 1D tensor at the index (i)

inline _2D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j) const

Access the 2D tensor at the coordinate (i,j)

inline _3D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j, int k) const

Access the 3D tensor at the coordinate (i,j,k)

inline _4D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j, int k, int l) const

Access the 4D tensor at the coordinate (i,j,k,l)

inline accelerator_inline FloatType * data () const

Return a pointer to the underlying array.

inline accelerator_inline size_t data_len () const

Return the linear dimension (flattened size) of the tensor, or equivalently, the total number of elements.

inline accelerator_inline size_t size (int i) const

Return the tensor size along a specific dimension.

Parameters:

i – The dimension

inline accelerator_inline int const * sizeArray () const

Return the tensor dimensions as an array pointer.

inline accelerator_inline FloatType & compact3 (int i, int j, int k) const

Access a tensor element at a coordinate expressed such that the first Dim-2 dimensions are expressed lexicographically.

Parameters:
  • i – The first Dim-2 dimensions expressed lexicographically in descending order (e.g. z+sizez*(y+sizey*x))

  • j – The index of dimension Dim-2

  • k – The index of dimension Dim-1

Private Types

typedef ManagedArray<FloatType>::View Base

Private Members

int _size[Dim]
template<typename FloatType, int Dim>
struct normalization
#include <Tensors.hpp>

A struct to contain normalization factors, allowing for unnormalization.

Public Functions

inline normalization(int const *tens_sz, FloatType epsilon, MemoryManager::Pool pool)

Public Members

Tensor<FloatType, Dim> mean
Tensor<FloatType, Dim> std
FloatType epsilon

Testing

Functions

template<typename FloatType>
bool near(FloatType a, FloatType b, FloatType rel_tol, FloatType *reldiff_p = nullptr)
template<typename FloatType>
bool near(const Vector<FloatType> &a, const Vector<FloatType> &b, FloatType rel_tol, bool verbose = false)
template<typename FloatType>
bool near(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FloatType rel_tol, bool verbose = false)
template<typename FloatType>
bool abs_near(FloatType a, FloatType b, FloatType abs_tol, FloatType *absdiff_p = nullptr)
template<typename FloatType>
bool abs_near(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FloatType abs_tol, bool verbose = false)
template<typename FloatType, int Dim>
bool abs_near(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b, FloatType abs_tol, bool verbose = false)
template<typename FloatType, int Dim>
bool equal(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b, bool verbose = false)
template<typename Op, typename PreOp>
void benchmark(double &mean, double &std, int nrpt, int nwarmup, const Op &op, const PreOp &preop)
template<typename TensType>
TensType::FloatType testCost(const Vector<typename TensType::FloatType> &c, const TensType &v)
template<typename ModelType>
void testDeriv(ModelType &model, int const *in_sizes, int const *out_sizes, typename ModelType::FloatType delta = typename ModelType::FloatType(1e-4))
template<typename ComponentWrapper>
void testComponentDeriv(ComponentWrapper &cpt, typename ComponentWrapper::FloatType delta = typename ComponentWrapper::FloatType(1e-4), bool _2nd_order = false)
template<typename FloatType>
std::vector<FloatType> softMaxVector(const std::vector<FloatType> &v, FloatType beta = 1.0)

Timing

Defines

TIME(into, ...)

Functions

inline std::chrono::system_clock::time_point now()
inline size_t usSinceEpoch()
inline size_t usCountSince(const std::chrono::system_clock::time_point &when)
inline double since(const std::chrono::system_clock::time_point &when)
class Timer
#include <Timing.hpp>

A simple timer class.

Public Functions

inline void restart(bool start = false)

Reset the accumulated time to zero and (optionally) begin timing from now.

inline void resume()

Resume a timer when paused.

inline void pause()

Pause a timer, adding the time since start/resume to the accumulated time.

inline double time() const

Get the accumulated time.

inline Timer(bool start_on_create = false)

Construct and (optionally) start the timer.

Private Members

double ttot
std::chrono::system_clock::time_point tp

Batch3tensorPairContractComponent

template<typename Config>
class Batch3tensorPairContractComponent

Public Functions

inline Batch3tensorPairContractComponent(int contract_dim_A, int contract_dim_B, FloatType nrm = 1.0)
Batch3tensorPairContractComponent(const Batch3tensorPairContractComponent &r) = delete
Batch3tensorPairContractComponent(Batch3tensorPairContractComponent &&r) = default
template<typename InTensorType1, typename InTensorType2, enable_if_fwd_ref2<InTensorType1, InTensorType2, Tensor<FloatType, 3>> = 0>
inline Tensor<FloatType, 3> value(InTensorType1 &&A, InTensorType2 &&B, EnableDeriv enable_deriv = DerivNo)
inline void deriv(Tensor<FloatType, 3> &&_dcost_by_dC, Tensor<FloatType, 3> &dcost_by_dA, Tensor<FloatType, 3> &dcost_by_dB) const
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

FloatType nrm
int contract_dim_A
int contract_dim_B
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
mutable BufferType<Tensor<FloatType, 3>> A_buf
mutable BufferType<Tensor<FloatType, 3>> B_buf

BatchedMatrixRowSoftMaxComponent

template<typename Config>
class BatchedMatrixRowSoftMaxComponent

Public Functions

inline BatchedMatrixRowSoftMaxComponent(bool use_mask = false, FloatType beta = 1.0)
BatchedMatrixRowSoftMaxComponent(const BatchedMatrixRowSoftMaxComponent &r) = delete
BatchedMatrixRowSoftMaxComponent(BatchedMatrixRowSoftMaxComponent &&r) = default
Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &in, EnableDeriv enable_deriv = DerivNo) const
void deriv(Tensor<FloatType, 3> &&dcost_by_dOut, Tensor<FloatType, 3> &dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

FloatType beta
mutable BufferType<Tensor<FloatType, 3>> out_buf
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool use_mask

BatchTensorConcatenateComponent

template<typename Config, int TensDim>
class BatchTensorConcatenateComponent

Public Functions

inline BatchTensorConcatenateComponent(int concat_dim, int Ntens)
BatchTensorConcatenateComponent(const BatchTensorConcatenateComponent &r) = delete
BatchTensorConcatenateComponent(BatchTensorConcatenateComponent &&r) = default
inline Tensor<FloatType, TensDim> value(Tensor<FloatType, TensDim> const *const *in)
inline void deriv(Tensor<FloatType, TensDim> &&_dcost_by_dOut, Tensor<FloatType, TensDim> *const *dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const

Public Members

EXTRACT_CONFIG_TYPES

Private Members

int concat_dim
int Ntens
std::vector<std::array<int, TensDim>> tens_dims
bool setup

BatchTensorDimensionSliceComponent

template<typename Config, int TensDim>
class BatchTensorDimensionSliceComponent

Public Functions

inline BatchTensorDimensionSliceComponent(int slice_dim, int slice_idx)
BatchTensorDimensionSliceComponent(const BatchTensorDimensionSliceComponent &r) = delete
BatchTensorDimensionSliceComponent(BatchTensorDimensionSliceComponent &&r) = default
Tensor<FloatType, TensDim - 1> value(const Tensor<FloatType, TensDim> &in)
void deriv(Tensor<FloatType, TensDim - 1> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const

Public Members

EXTRACT_CONFIG_TYPES

Private Members

int slice_dim
int slice_idx
int in_size[TensDim]
int out_size[TensDim - 1]
size_t other_dim_vol
size_t offset_in
bool setup

BatchTensorDNNcomponent

template<typename Config, int TensDim, typename ActivationFunc>
class BatchTensorDNNcomponent

Public Functions

inline BatchTensorDNNcomponent(const Matrix<FloatType> &weights, const Vector<FloatType> &bias, int contract_dim, const ActivationFunc &activation)
inline BatchTensorDNNcomponent(const Matrix<FloatType> &_weights, int contract_dim, const ActivationFunc &activation)
BatchTensorDNNcomponent(const BatchTensorDNNcomponent &r) = delete
BatchTensorDNNcomponent(BatchTensorDNNcomponent &&r) = default
template<typename InTensorType, enable_if_fwd_ref<InTensorType, Tensor<FloatType, TensDim>> = 0>
Tensor<FloatType, TensDim> value(InTensorType &&x, EnableDeriv enable_deriv = DerivNo)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dCost_by_dOut, Tensor<FloatType, TensDim> &dCost_by_dIn) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
void getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Matrix<FloatType> weights
Vector<FloatType> bias
int batch_size
int contract_dim
bool use_bias
int in_dims[TensDim]
int out_dims[TensDim]
size_t other_size
size_t stride
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
ActivationFunc activation_func
mutable BufferType<Tensor<FloatType, TensDim>> in_buf
mutable BufferType<Tensor<FloatType, TensDim>> activation_deriv_buf

MatrixTensorContractComponent

template<typename Config, int TensDim>
class MatrixTensorContractComponent

Public Functions

inline MatrixTensorContractComponent(const Matrix<FloatType> &weights)
MatrixTensorContractComponent(const MatrixTensorContractComponent &r) = delete
MatrixTensorContractComponent(MatrixTensorContractComponent &&r) = default
template<typename InTensorType, enable_if_fwd_ref<InTensorType, Tensor<FloatType, TensDim>> = 0>
Tensor<FloatType, TensDim> value(InTensorType &&x, EnableDeriv enable_deriv = DerivNo)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dCost_by_dOut, Tensor<FloatType, TensDim> &dCost_by_dIn) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
void getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Matrix<FloatType> weights
int size0
int size1
int batch_size
int in_dims[TensDim]
int out_dims[TensDim]
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
mutable BufferType<Tensor<FloatType, TensDim>> in_buf

MultiHeadAttentionComponent

template<typename Config>
class MultiHeadAttentionComponent

Public Functions

MultiHeadAttentionComponent(int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadAttentionComponent(int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadAttentionComponent(const MultiHeadAttentionComponent &r) = delete
MultiHeadAttentionComponent(MultiHeadAttentionComponent &&r) = default
TensorType value(const TensorType &Q, const TensorType &K, const TensorType &V, EnableDeriv enable_deriv = DerivNo)
void deriv(Vector<FloatType> &cost_deriv, int off, TensorType &&dCost_by_dOut, TensorType &dCost_by_dQ, TensorType &dCost_by_dK, TensorType &dCost_by_dV) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
size_t FLOPS(int value_or_deriv) const
void getParams(Vector<FloatType> &into, int off) const
void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Tensor<FloatType, 3> TensorType

Private Members

int C
int E
int B
int Nparams_layer
bool setup
std::vector<std::unique_ptr<ScaledDotProductAttentionHeadComponent<Config>>> heads
BatchTensorConcatenateComponent<Config, 3> concatY
MatrixTensorContractComponent<Config, 3> multW_O

NormComponent

template<typename Config, int TensDim>
class NormComponent

Public Functions

inline NormComponent(int norm_dim, FloatType epsilon = 1e-5)
NormComponent(const NormComponent &r) = delete
NormComponent(NormComponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in, EnableDeriv enable_deriv = DerivNo)
void deriv(Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

int norm_dim
FloatType epsilon
int in_size[TensDim]
size_t other_dim_vol
size_t stride
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
mutable BufferType<Tensor<FloatType, TensDim>> out_buf
mutable BufferType<Matrix<FloatType>> std_buf

ScaleComponent

template<typename Config, int TensDim>
class ScaleComponent

Public Functions

inline ScaleComponent(int scale_dim, int dimension_size, bool use_affine, bool use_bias, const Vector<FloatType> &affine_init, const Vector<FloatType> &bias_init)
ScaleComponent(const ScaleComponent &r) = delete
ScaleComponent(ScaleComponent &&r) = default
template<typename InTensorType, enable_if_fwd_ref<InTensorType, Tensor<FloatType, TensDim>> = 0>
Tensor<FloatType, TensDim> value(InTensorType &&in, EnableDeriv enable_deriv = DerivNo)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
void getParams(Vector<FloatType> &into, int off) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

int scale_dim
bool use_affine
bool use_bias
int nparams_val
int in_size[TensDim]
size_t other_dim_vol
size_t stride
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
Vector<FloatType> gamma
Vector<FloatType> beta
mutable BufferType<Tensor<FloatType, TensDim>> in_buf

ScaledDotProductAttentionComponent

template<typename Config>
class ScaledDotProductAttentionComponent

Public Functions

inline ScaledDotProductAttentionComponent(int d_k, int d_v, int use_mask = false)
ScaledDotProductAttentionComponent(const ScaledDotProductAttentionComponent &r) = delete
ScaledDotProductAttentionComponent(ScaledDotProductAttentionComponent &&r) = default
template<typename InTensorType1, typename InTensorType2, typename InTensorType3, enable_if_fwd_ref3<InTensorType1, InTensorType2, InTensorType3, Tensor<FloatType, 3>> = 0>
Tensor<FloatType, 3> value(InTensorType1 &&Q, InTensorType2 &&K, InTensorType3 &&V, EnableDeriv enable_deriv = DerivNo)
void deriv(Tensor<FloatType, 3> &&dCost_by_dOut, Tensor<FloatType, 3> &dCost_by_dQ, Tensor<FloatType, 3> &dCost_by_dK, Tensor<FloatType, 3> &dCost_by_dV) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

int C
int B
int d_k
int d_v
bool setup
Batch3tensorPairContractComponent<Config> mulQKtoGetS
BatchedMatrixRowSoftMaxComponent<Config> softmaxS_to_SS
Batch3tensorPairContractComponent<Config> mulSSVtoGetOut

ScaledDotProductAttentionHeadComponent

template<typename Config>
class ScaledDotProductAttentionHeadComponent

Public Functions

inline ScaledDotProductAttentionHeadComponent(const Matrix<FloatType> &W_Q, const Matrix<FloatType> &W_K, const Matrix<FloatType> &W_V, bool use_mask = false)
ScaledDotProductAttentionHeadComponent(const ScaledDotProductAttentionHeadComponent &r) = delete
ScaledDotProductAttentionHeadComponent(ScaledDotProductAttentionHeadComponent &&r) = default
template<typename InTensorType1, typename InTensorType2, typename InTensorType3, enable_if_fwd_ref3<InTensorType1, InTensorType2, InTensorType3, Tensor<FloatType, 3>> = 0>
Tensor<FloatType, 3> value(InTensorType1 &&Q, InTensorType2 &&K, InTensorType3 &&V, EnableDeriv enable_deriv = DerivNo)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&dCost_by_dOut, Tensor<FloatType, 3> &dCost_by_dQ, Tensor<FloatType, 3> &dCost_by_dK, Tensor<FloatType, 3> &dCost_by_dV) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
void getParams(Vector<FloatType> &into, int off) const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef MatrixTensorContractComponent<Config, 3> MatTensMulCptType

Private Members

int C
int B
int E
int d_k
int d_v
bool setup
MatTensMulCptType multWQ
MatTensMulCptType multWK
MatTensMulCptType multWV
ScaledDotProductAttentionComponent<Config> attention

SoftMaxComponent

template<typename Config, int TensDim>
class SoftMaxComponent

Public Functions

inline SoftMaxComponent(int softmax_dim, FloatType beta = 1.0)
SoftMaxComponent(const SoftMaxComponent &r) = delete
SoftMaxComponent(SoftMaxComponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in, EnableDeriv enable_deriv = DerivNo) const
void deriv(Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)
inline void setBeta(FloatType _beta)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

int softmax_dim
FloatType beta
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
mutable BufferType<Tensor<FloatType, TensDim>> out_buf

BatchTensorDNNlayer

Defines

LAYER_TYPE
LAYER_TYPE

Functions

template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, int contract_dim, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_dnn_layer(const Matrix<FLOATTYPE(U)> &weights, int contract_dim, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_dnn_layer(int contract_dim, int fan_out, int fan_in, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_unbiased_dnn_layer(int contract_dim, int fan_out, int fan_in, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, const ActivationFunc &activation, U &&u)
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, U &&u)
template<typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(int fan_out, int fan_in, const ActivationFunc &activation, U &&u)
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(int fan_out, int fan_in, U &&u)
template<typename Config, int TensDim, typename _InputType, typename Store, typename ActivationFunc>
class BatchTensorDNNlayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline BatchTensorDNNlayer(Store &&leaf, const Matrix<FloatType> &weights, const Vector<FloatType> &bias, int contract_dim, const ActivationFunc &activation)
inline BatchTensorDNNlayer(Store &&leaf, const Matrix<FloatType> &weights, int contract_dim, const ActivationFunc &activation)
BatchTensorDNNlayer(const BatchTensorDNNlayer &r) = delete
BatchTensorDNNlayer(BatchTensorDNNlayer &&r) = default
Tensor<FloatType, TensDim> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

ConvolutionLayer1D

Defines

LAYER_TYPE

Functions

template<typename U, typename ActivationFunc, typename PaddingFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto conv1d_layer(const Tensor<FLOATTYPE(U), 3> &filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, int stride, U &&u) -> LAYER_TYPE
template<typename U, typename ActivationFunc, typename PaddingFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto conv1d_layer(const Tensor<FLOATTYPE(U), 3> &filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, U &&u) -> LAYER_TYPE
template<typename Config, typename _InputType, typename Store, typename ActivationFunc, typename PaddingFunc>
class ConvolutionLayer1D

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType
inline ConvolutionLayer1D(Store &&leaf, const Tensor<FloatType, 3> &_filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, int stride = 1)
ConvolutionLayer1D(const ConvolutionLayer1D &r) = delete
ConvolutionLayer1D(ConvolutionLayer1D &&r) = default
Tensor<FloatType, 3> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Store leaf
int _input_tens_size[LayerInputTensorType::dimension()]
Tensor<FloatType, 3> filter
ActivationFunc activation_func
PaddingFunc padding_func
int depth
int channels
int kernel_size
int stride
bool init
int padded_data_len
int batch_size
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
mutable BufferType<Tensor<FloatType, 3>> leaf_buf
mutable BufferType<Tensor<FloatType, 3>> activation_deriv_buf

EmbedPositionsSinusoidalLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto embed_positions_sinusoidal_layer(U &&u)
template<typename Config, typename _InputType, typename Store>
class EmbedPositionsSinusoidalLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline EmbedPositionsSinusoidalLayer(Store &&leaf)
EmbedPositionsSinusoidalLayer(const EmbedPositionsSinusoidalLayer &r) = delete
EmbedPositionsSinusoidalLayer(EmbedPositionsSinusoidalLayer &&r) = default
inline Tensor<FloatType, 3> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Store leaf
mutable FLOPScounter value_FLOPS

FlattenLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto flatten_layer(U &&u) -> FlattenLayer<CONFIGTYPE(U), INPUTTYPE(U), DDST(u)>
template<typename Config, typename _InputType, typename Store>
class FlattenLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType
inline FlattenLayer(Store &&leaf)
FlattenLayer(const FlattenLayer &r) = delete
FlattenLayer(FlattenLayer &&r) = default
Matrix<FloatType> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Matrix<FloatType> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Store leaf
int _input_tens_size[LayerInputTensorType::dimension()]
bool init

InputLayer

Functions

template<typename Config, typename InputType = Matrix<typename Config::FloatType>>
inline InputLayer<Config, InputType> input_layer()
template<typename Config, typename _InputType = Matrix<typename Config::FloatType>>
class InputLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline InputLayer()
inline InputLayer(InputLayer &&r) = default
inline InputLayer(const InputLayer &r) = delete
inline const InputType &value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, InputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

LayerCommon

Defines

ISLEAF(a)
FLOATTYPE(a)
INPUTTYPE(a)
LAYEROUTPUTTYPE(a)
LAYERTYPEOUTPUTTYPE(a)
CONFIGTYPE(a)

Typedefs

template<class...>
using void_type = void

Variables

template<class T>
constexpr bool is_leaf_v = is_leaf<T>::value
struct LeafTag
template<class T, class = void>
struct is_leaf : public std::false_type
template<class T>
struct is_leaf<T, void_type<typename std::decay_t<T>::tag>> : public std::is_same<std::decay_t<T>::tag, LeafTag>

MatrixTensorContractLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto matrix_tensor_contract_layer(const Matrix<FLOATTYPE(U)> &weights, U &&u) -> LAYER_TYPE
template<typename Config, int TensDim, typename _InputType, typename Store>
class MatrixTensorContractLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline MatrixTensorContractLayer(Store &&leaf, const Matrix<FloatType> &weights)
MatrixTensorContractLayer(const MatrixTensorContractLayer &r) = delete
MatrixTensorContractLayer(MatrixTensorContractLayer &&r) = default
Tensor<FloatType, TensDim> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

MultiHeadCrossAttentionLayer

Defines

LAYER_TYPE
TEMPL

Functions

TEMPL auto multihead_cross_attention_layer(int Nheads, Matrix<FLOATTYPE(ChainKV)> const *const *W_Q, Matrix<FLOATTYPE(ChainKV)> const *const *W_K, Matrix<FLOATTYPE(ChainKV)> const *const *W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, Matrix<FLOATTYPE(ChainKV)> const *const *W_Q, Matrix<FLOATTYPE(ChainKV)> const *const *W_K, Matrix<FLOATTYPE(ChainKV)> const *const *W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_Q, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_K, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_Q, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_K, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, int E, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, int E, ChainKV &&chain_KV, ChainQ &&chain_Q)
template<typename Config, typename _InputType, typename StoreKV, typename StoreQ>
class MultiHeadCrossAttentionLayer

Public Types

typedef _InputType InputType
typedef Tensor<FloatType, 3> TensorType
typedef LeafTag tag

Public Functions

inline MultiHeadCrossAttentionLayer(StoreKV &&leaf_KV, StoreQ &&leaf_Q, int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
inline MultiHeadCrossAttentionLayer(StoreKV &&leaf_KV, StoreQ &&leaf_Q, int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadCrossAttentionLayer(const MultiHeadCrossAttentionLayer &r) = delete
MultiHeadCrossAttentionLayer(MultiHeadCrossAttentionLayer &&r) = default
inline TensorType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, TensorType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

StoreKV leaf_KV
StoreQ leaf_Q
MultiHeadAttentionComponent<Config> attention

MultiHeadSelfAttentionLayer

Defines

LAYER_TYPE

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, Matrix<FLOATTYPE(U)> const *const *W_Q, Matrix<FLOATTYPE(U)> const *const *W_K, Matrix<FLOATTYPE(U)> const *const *W_V, const Matrix<FLOATTYPE(U)> &W_O, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, Matrix<FLOATTYPE(U)> const *const *W_Q, Matrix<FLOATTYPE(U)> const *const *W_K, Matrix<FLOATTYPE(U)> const *const *W_V, const Matrix<FLOATTYPE(U)> &W_O, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(U)>> &W_Q, const std::vector<Matrix<FLOATTYPE(U)>> &W_K, const std::vector<Matrix<FLOATTYPE(U)>> &W_V, const Matrix<FLOATTYPE(U)> &W_O, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(U)>> &W_Q, const std::vector<Matrix<FLOATTYPE(U)>> &W_K, const std::vector<Matrix<FLOATTYPE(U)>> &W_V, const Matrix<FLOATTYPE(U)> &W_O, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, int E, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, int E, U &&u) -> LAYER_TYPE
template<typename Config, typename _InputType, typename Store>
class MultiHeadSelfAttentionLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

MultiHeadSelfAttentionLayer(Store &&leaf, int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadSelfAttentionLayer(Store &&leaf, int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadSelfAttentionLayer(const MultiHeadSelfAttentionLayer &r) = delete
MultiHeadSelfAttentionLayer(MultiHeadSelfAttentionLayer &&r) = default
Tensor<FloatType, 3> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Tensor<FloatType, 3> LayerInputType

Private Members

MultiHeadAttentionComponent<Config> mha
Store leaf

NormLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto norm_layer (int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector< FLOATTYPE(U)> &affine_init, const Vector< FLOATTYPE(U)> &bias_init, FLOATTYPE(U) epsilon, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto norm_layer(int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector<FLOATTYPE(U)> &affine_init, const Vector<FLOATTYPE(U)> &bias_init, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto norm_layer (int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, FLOATTYPE(U) epsilon, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto norm_layer(int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, U &&u) -> LAYER_TYPE
template<typename Config, int TensDim, typename _InputType, typename Store>
class NormLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline NormLayer(Store &&leaf, int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector<FloatType> &affine_init, const Vector<FloatType> &bias_init, FloatType epsilon)
NormLayer(const NormLayer &r) = delete
NormLayer(NormLayer &&r) = default
inline Tensor<FloatType, TensDim> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Tensor<FloatType, TensDim> LayerInputType

PairJoinLayer

Functions

template<typename U, typename V, typename std::enable_if<ISLEAF(U) && ISLEAF(V) && std::is_same<INPUTTYPE(U), INPUTTYPE(V)>::value, int>::type = 0>
auto pair_join_layer(U &&u, V &&v)
template<typename Config, typename _InputType, typename Store1, typename Store2>
class PairJoinLayer

Public Types

typedef _InputType InputType
typedef std::pair<LayerInputType1, LayerInputType2> LayerOutputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType1) LayerInputType1
typedef LAYERTYPEOUTPUTTYPE (StoredType2) LayerInputType2
inline PairJoinLayer(Store1 &&leaf1, Store2 &&leaf2)
PairJoinLayer(const PairJoinLayer &r) = delete
PairJoinLayer(PairJoinLayer &&r) = default
inline LayerOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Store1::type StoredType1
typedef Store2::type StoredType2

Private Members

Store1 leaf1
Store2 leaf2

PairSplitLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto pair_split_layer(U &&u)
template<typename Config, typename _InputType, typename Store>
class PairSplitLayerLeader

Public Types

typedef _InputType InputType
typedef Store::type StoredType
typedef LayerInputType::first_type LayerOutputType1
typedef LayerInputType::second_type LayerOutputType2

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType
inline PairSplitLayerLeader(Store &&leaf)
inline void cinc(int &i)
inline LayerOutputType1 first(const InputType &x, EnableDeriv enable_deriv)
inline LayerOutputType2 second(const InputType &x, EnableDeriv enable_deriv)
inline int deriv_complete(Vector<FloatType> &cost_deriv, int off, InputType *input_above_deriv_return)
inline int deriv_first(Vector<FloatType> &cost_deriv, int off, LayerOutputType1 &&_above_deriv, InputType *input_above_deriv_return)
inline int deriv_second(Vector<FloatType> &cost_deriv, int off, LayerOutputType2 &&_above_deriv, InputType *input_above_deriv_return)
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int getParams(Vector<FloatType> &into, int off)

Public Members

EXTRACT_CONFIG_TYPES
LayerInputType in_buf
LayerOutputType1 above_deriv1
LayerOutputType2 above_deriv2
Store leaf
int val_count
int deriv_count
int update_count
int step_count
int getparams_count
template<typename Config, typename _InputType, typename Store>
class PairSplitLayer1

Public Types

typedef LayerInputType::first_type LayerOutputType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType
inline PairSplitLayer1(PairSplitLayerLeader<Config, InputType, Store> *leader)
PairSplitLayer1(const PairSplitLayer1 &r) = delete
inline PairSplitLayer1(PairSplitLayer1 &&r)
inline ~PairSplitLayer1()
inline LayerOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Store::type StoredType

Private Members

PairSplitLayerLeader<Config, InputType, Store> *leader
template<typename Config, typename _InputType, typename Store>
class PairSplitLayer2

Public Types

typedef LayerInputType::second_type LayerOutputType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType
inline PairSplitLayer2(PairSplitLayerLeader<Config, InputType, Store> *leader)
PairSplitLayer2(const PairSplitLayer2 &r) = delete
PairSplitLayer2(PairSplitLayer2 &&r) = default
inline LayerOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Store::type StoredType

Private Members

PairSplitLayerLeader<Config, InputType, Store> *leader

ReplicateLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto replicate_layer(int N, U &&u)
template<typename Config, typename _InputType, typename Store>
class ReplicateLayerLeader

Public Types

typedef _InputType InputType
typedef Store::type StoredType

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputOutputType
inline ReplicateLayerLeader(Store &&leaf, int N)
inline void cinc(int &i)
inline LayerInputOutputType value(const InputType &x, EnableDeriv enable_deriv)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return)
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int getParams(Vector<FloatType> &into, int off)

Public Members

EXTRACT_CONFIG_TYPES
LayerInputOutputType in_buf
LayerInputOutputType above_deriv
Store leaf
int N
int val_count
int deriv_count
int update_count
int step_count
int getparams_count
template<typename Config, typename _InputType, typename Store>
class ReplicateLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputOutputType
inline ReplicateLayer(ReplicateLayerLeader<Config, InputType, Store> *leader, int instance, int N)
ReplicateLayer(const ReplicateLayer &r) = delete
inline ReplicateLayer(ReplicateLayer &&r)
inline ~ReplicateLayer()
inline LayerInputOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Store::type StoredType

Private Members

int instance
int N
ReplicateLayerLeader<Config, InputType, Store> *leader

ScaledDotProductSelfAttentionLayer

Defines

LAYER_TYPE

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto scaled_dotproduct_self_attention_layer(const Matrix<FLOATTYPE(U)> &W_Q, const Matrix<FLOATTYPE(U)> &W_K, const Matrix<FLOATTYPE(U)> &W_V, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto scaled_dotproduct_self_attention_layer(const Matrix<FLOATTYPE(U)> &W_Q, const Matrix<FLOATTYPE(U)> &W_K, const Matrix<FLOATTYPE(U)> &W_V, U &&u) -> LAYER_TYPE
template<typename Config, typename _InputType, typename Store>
class ScaledDotProductSelfAttentionLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline ScaledDotProductSelfAttentionLayer(Store &&leaf, const Matrix<FloatType> &W_Q, const Matrix<FloatType> &W_K, const Matrix<FloatType> &W_V, bool use_mask = false)
ScaledDotProductSelfAttentionLayer(const ScaledDotProductSelfAttentionLayer &r) = delete
ScaledDotProductSelfAttentionLayer(ScaledDotProductSelfAttentionLayer &&r) = default
Tensor<FloatType, 3> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
int getParams(Vector<FloatType> &into, int off) const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Tensor<FloatType, 3> LayerInputType

Private Members

int C
int E
int B
int d_k
int d_v
bool setup
ScaledDotProductAttentionHeadComponent<Config> attentionQKV
Store leaf

SkipConnection

Defines

LAYER_TYPE

Functions

template<typename Internal, typename Below, typename std::enable_if<ISLEAF(Internal) && ISLEAF(Below), int>::type = 0>
auto skip_connection(Internal &&internal, Below &&below) -> LAYER_TYPE
template<typename Config, typename _InputType, typename ChainInternal, typename ChainBelow>
class SkipConnection

Public Types

typedef _InputType InputType
typedef ChainBelow::type ChainBelowInternalType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (ChainBelowInternalType) LayerInputOutputType
inline SkipConnection(ChainInternal &&leaf_internal, ChainBelow &&leaf_below)
SkipConnection(const SkipConnection &r) = delete
SkipConnection(SkipConnection &&r) = default
LayerInputOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

ChainBelow leaf_below
ChainInternal leaf_internal

SoftMaxLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto softmax_layer (int softmax_dim, FLOATTYPE(U) beta, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto softmax_layer(int softmax_dim, U &&u) -> LAYER_TYPE
template<typename Config, int TensDim, typename _InputType, typename Store>
class SoftMaxLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline SoftMaxLayer(Store &&leaf, int softmax_dim, FloatType beta = 1.0)
inline SoftMaxLayer(SoftMaxLayer &&r) = default
inline SoftMaxLayer(const SoftMaxLayer &r) = delete
Tensor<FloatType, TensDim> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)
inline void setBeta(FloatType beta)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Store leaf
SoftMaxComponent<Config, TensDim> cpt

SumJoinLayer

Functions

template<typename U, typename V, typename std::enable_if<ISLEAF(U) && ISLEAF(V) && std::is_same<CONFIGTYPE(U), CONFIGTYPE(V)>::value && std::is_same<INPUTTYPE(U), INPUTTYPE(V)>::value, int>::type = 0>
auto sum_join_layer(U &&u, V &&v)
template<typename Config, typename _InputType, typename Store1, typename Store2>
class SumJoinLayer

Public Types

typedef _InputType InputType
typedef LayerInputType1 LayerInputOutputType
typedef LeafTag tag

Public Functions

inline SumJoinLayer(Store1 &&leaf1, Store2 &&leaf2)
SumJoinLayer(const SumJoinLayer &r) = delete
SumJoinLayer(SumJoinLayer &&r) = default
inline LayerInputOutputType value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Types

typedef Store1::type StoredType1
typedef Store2::type StoredType2

Private Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType1) LayerInputType1
typedef LAYERTYPEOUTPUTTYPE (StoredType2) LayerInputType2

Private Members

Store1 leaf1
Store2 leaf2

TransformerEncoderDecoderBlock

Functions

template<typename Below, typename ActivationFunc>
auto transformer_decoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, Below &&below)
template<typename Below, typename ActivationFunc>
auto transformer_encoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, Below &&below)
template<typename EncoderInput, typename DecoderInput, typename ActivationFunc>
auto transformer_cross_decoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, EncoderInput &&encoder_in, DecoderInput &&decoder_in)

UnflattenLayer

Functions

template<int OutDimension, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto unflatten_layer(int const *output_tens_dim, U &&u) -> UnflattenLayer<CONFIGTYPE(U), OutDimension, INPUTTYPE(U), DDST(u)>
template<typename Config, int OutDimension, typename _InputType, typename Store>
class UnflattenLayer

Public Types

typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType
inline UnflattenLayer(Store &&leaf, int const *output_tens_size)
UnflattenLayer(const UnflattenLayer &r) = delete
UnflattenLayer(UnflattenLayer &&r) = default
Tensor<FloatType, OutDimension> value(const InputType &x, EnableDeriv enable_deriv = DerivNo)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, OutDimension> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Public Members

EXTRACT_CONFIG_TYPES

Private Members

Store leaf
int _output_tens_size[OutDimension]