Accelerator

Defines

strong_inline
DO_PRAGMA_(x)
DO_PRAGMA(x)
thread_num(a)
thread_max(a)
set_threads(a)
in_thread_parallel_region(a)
thread_for(i, num, ...)
thread_for3d(i1, n1, i2, n2, i3, n3, ...)
thread_for2d(i1, n1, i2, n2, ...)
accelerator
accelerator_inline
accelerator_barrier(dummy)
accelerator_for3dNB(iter1, num1, iter2, num2, iter3, num3, block2, ...)
accelerator_for3d(iter1, num1, iter2, num2, iter3, num3, block2, ...)
accelerator_for2dNB(iter1, num1, iter2, num2, block2, ...)
accelerator_for2d(iter1, num1, iter2, num2, block2, ...)
accelerator_forNB(iter1, num1, ...)
accelerator_for(iter, num, ...)
accelerator_for3d_shm(iter1, num1, iter2, num2, iter3, num3, block2, shm_size, ...)
accelerator_for2dNB_shm(iter1, num1, iter2, num2, block2, shm_size, ...)
accelerator_for2d_shm(iter1, num1, iter2, num2, block2, shm_size, ...)
accelerator_forNB_shm(iter1, num1, shm_size, ...)
accelerator_for_shm(iter, num, shm_size, ...)
autoView(ViewName, ObjName, mode)
doHost(a, ...)
doHost2(a, b, ...)
doHost3(a, b, c, ...)

Functions

void acceleratorInit(void)
void acceleratorReport()
inline void acceleratorCopyToDevice(void *to, void const *from, size_t bytes)
inline void acceleratorCopyFromDevice(void *to, void const *from, size_t bytes)
inline void acceleratorCopyDeviceToDevice(void *to, void const *from, size_t bytes)
inline void acceleratorCopyDeviceToDeviceAsynch(void *to, void const *from, size_t bytes)
inline void acceleratorCopySynchronize(void)
inline void acceleratorMemSet(void *base, int value, size_t bytes)
inline void *acceleratorAllocHost(size_t bytes)
inline void *acceleratorAllocShared(size_t bytes)
inline void *acceleratorAllocDevice(size_t bytes)
inline void acceleratorFreeHost(void *ptr)
inline void acceleratorFreeShared(void *ptr)
inline void acceleratorFreeDevice(void *ptr)
inline void profileStart()
inline void profileStop()
inline void labelRegionBegin(char const *label)
inline void labelRegionEnd()
template<typename FloatType>
inline void atomicAdd(FloatType *p, const FloatType v)
template<typename ViewType>
struct viewDeallocator

Public Functions

inline viewDeallocator(ViewType &v)
inline ~viewDeallocator()

Public Members

ViewType &v

Public Static Functions

static inline void free(ViewType &v)

ActivationFuncs

template<typename FloatType>
class ReLU

Public Functions

void operator()(Matrix<FloatType> &x, Matrix<FloatType> *deriv = nullptr) const
template<int Dim>
void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const
template<typename FloatType>
class noActivation

Public Functions

inline void operator()(Matrix<FloatType> &x, Matrix<FloatType> *deriv = nullptr) const
template<int Dim>
inline void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const
template<typename FloatType>
class GeLU

Public Functions

template<int Dim>
void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const

Comms

Functions

Communicators &communicators()
void initializeComms(int argc, char **argv)
inline int UniqueID()
template<typename FloatType>
inline MPI_Datatype getMPIdataType()
template<typename FloatType>
inline void commsReduce(FloatType *data, size_t data_len, const MPI_Comm &comm)
template<typename FloatType>
inline void commsReduce(Vector<FloatType> &v, const MPI_Comm &comm)
template<typename FloatType>
inline void commsBroadcast(FloatType *data, size_t data_len, int from_rank, const MPI_Comm &comm)
template<typename FloatType>
inline void commsBroadcast(Vector<FloatType> &v, int from_rank, const MPI_Comm &comm)
template<typename FloatType>
inline void commsBroadcast(Matrix<FloatType> &v, int from_rank, const MPI_Comm &comm)
class Communicators

Public Functions

Communicators(int argc, char **argv)
~Communicators()
inline int worldRank() const
inline int worldNrank() const
inline int nodeRank() const
inline int nodeNrank() const
inline int ddpRank() const
inline int ddpNrank() const
inline int pipelineRank() const
inline int pipelineNrank() const
inline bool isPipelineLeader() const
inline MPI_Comm &pipelineCommunicator()
inline MPI_Comm &ddpCommunicator()
void enableNodePipelining()
void enableColorPipelining(int rank_color)
void enableGlobalPipelining()
void disableParallelism()
void enableDDPnoPipelining()
void reportSetup()

Private Functions

void setupDDPcommunicator()
void freeCommunicators()
void enableDDPnoPipeliningInternal()

Private Members

MPI_Comm pipeline_comm
MPI_Comm ddp_comm
int world_rank
int world_nrank
int node_rank
int node_nrank
int pipeline_rank
int pipeline_nrank
bool is_pipeline_leader
int ddp_rank
int ddp_nrank

Private Static Functions

static void createCommJustThisRank(int world_rank, MPI_Comm &comm)

Components

DDP

Functions

template<typename FloatType>
void ddpAverage(FloatType *data, size_t len, bool pipeline_bcast = false)
template<typename FloatType>
void ddpAverage(Vector<FloatType> &v, bool pipeline_bcast = false)

DynamicModel

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
LayerWrapper<FLOATTYPE(U), INPUTTYPE(U), LAYEROUTPUTTYPE(U)> enwrap(U &&u)
template<typename FloatType, typename InputType, typename LayerOutputType>
class LayerWrapperInternalBase

Public Functions

virtual LayerOutputType value(const InputType &x) = 0
virtual int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const = 0
virtual int nparams() const = 0
virtual size_t FLOPS(int value_or_deriv) const = 0
virtual void resizeInputBuffer(size_t to) = 0
virtual int getParams(Vector<FloatType> &into, int off) const = 0
virtual int step(int off, const Vector<FloatType> &derivs, FloatType eps) = 0
inline virtual ~LayerWrapperInternalBase()
template<typename Store, typename std::enable_if<ISSTORAGE(Store), int>::type = 0>
class LayerWrapperInternal : public LayerWrapperInternalBase<Store::type::FloatType, Store::type::InputType, LAYEROUTPUTTYPE(Store::type)>

Public Types

typedef Store::type::FloatType FloatType
typedef Store::type::InputType InputType

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerOutputType
inline LayerWrapperInternal(Store &&layer)
inline virtual LayerOutputType value(const InputType &x) override
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const override
inline virtual int nparams() const override
inline virtual size_t FLOPS(int value_or_deriv) const
inline virtual int getParams(Vector<FloatType> &into, int off) const override
inline virtual int step(int off, const Vector<FloatType> &derivs, FloatType eps) override
inline virtual void resizeInputBuffer(size_t to) override

Public Members

Store layer
template<typename _FloatType, typename _InputType, typename _LayerOutputType>
class LayerWrapper

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef _LayerOutputType LayerOutputType
typedef LeafTag tag

Public Functions

LayerWrapper(LayerWrapper &&r) = default
LayerWrapper &operator=(LayerWrapper &&r) = default
template<typename Store, typename std::enable_if<ISSTORAGE(Store), int>::type = 0>
inline LayerWrapper(Store &&layer)
inline LayerOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline void resizeInputBuffer(size_t to)

Private Members

std::unique_ptr<LayerWrapperInternalBase<FloatType, InputType, LayerOutputType>> layer

Embeddings

Functions

template<typename FloatType>
Tensor<FloatType, 3> embedPositionsSinusoidal(const Tensor<FloatType, 3> &in, FLOPScounter *flops = nullptr)
template<typename FloatType>
Tensor<FloatType, 2> embedPositionsSinusoidal(const Tensor<FloatType, 2> &in, FLOPScounter *flops = nullptr)

HPCortex

Init

Functions

void initialize(int argc, char **argv)

InstanceStorage

Defines

DDST(a)
ISSTORAGE(a)
struct StorageTag
template<typename T>
struct LeafStore

Public Types

typedef StorageTag tag
typedef T type

Public Functions

inline LeafStore(T &&v)
LeafStore(const LeafStore &r) = delete
inline LeafStore(LeafStore &&r)

Public Members

T v
template<typename T>
struct LeafRef

Public Types

typedef StorageTag tag
typedef T type

Public Functions

inline LeafRef(T &v)
LeafRef(const LeafRef &r) = delete
inline LeafRef(LeafRef &&r)

Public Members

T &v
template<typename T>
struct deduceStorage
template<typename T>
struct deduceStorage<T&>

Public Types

typedef LeafRef<T> type
template<typename T>
struct deduceStorage<T&&>

Public Types

typedef LeafStore<T> type

Layers

Linalg

Functions

template<typename FloatType>
void thinMulMatMatTranspose_p(FloatType *out_p, const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> thinMulMatMatTranspose(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> mulMatTransposeThinMat(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> computeThinMatOuterProd(const Matrix<FloatType> &above_deriv, const Matrix<FloatType> &activation_deriv, FLOPScounter *flops = nullptr)
template<typename FloatType>
Matrix<FloatType> axpyMatThinMat(const Matrix<FloatType> &a, const Matrix<FloatType> &b, const Vector<FloatType> &c, FLOPScounter *flops = nullptr)
template<typename FloatType>
Tensor<FloatType, 3> batch3tensorContract(const Tensor<FloatType, 3> &A, const Tensor<FloatType, 3> &B, int contract_dimA, int contract_dimB, FloatType nrm = 1.0, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
Tensor<FloatType, Dim> matrixBatchTensorAxpy(const Matrix<FloatType> &A, const Tensor<FloatType, Dim> &X, const Vector<FloatType> &Y, const int contract_dim, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
void batchTensorContractToMatrix_p(FloatType *out_p, const Tensor<FloatType, Dim> &A, const Tensor<FloatType, Dim> &B, const int preserve_dim, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
Tensor<FloatType, Dim> matrixBatchTensorContractRight(const Tensor<FloatType, Dim> &X, const Matrix<FloatType> &A, const int contract_dim, FLOPScounter *flops = nullptr)
template<typename FloatType, int Dim>
Tensor<FloatType, Dim> matrixBatchTensorContractLeft(const Matrix<FloatType> &A, const Tensor<FloatType, Dim> &X, const int contract_dim, FLOPScounter *flops = nullptr)

LossFunctions

Defines

CWRP

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto mse_cost(U &&u) -> CWRP
template<typename CostFunc, typename U, typename std::enable_if<ISLEAF(U) && std::is_default_constructible<CostFunc>::value, int>::type = 0>
auto cost_func_wrap(U &&u)
template<typename CostFunc, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto cost_func_wrap(U &&u, const CostFunc &cf)
template<typename Store, typename CostFunc>
class CostFuncWrapper

Public Types

typedef Store::type::FloatType FloatType
typedef Store::type::InputType InputType
typedef CostFunc::PredictionType PredictionType
typedef CostFunc::ComparisonType ComparisonType

Public Functions

inline CostFuncWrapper(Store &&leaf, const CostFunc &cost = CostFunc())
inline FloatType loss(const InputType &x, const ComparisonType &y)
inline Vector<FloatType> deriv() const
inline PredictionType predict(const InputType &x)
template<typename _PredictionType = PredictionType, typename _InputType = InputType, int TensDimIn = _InputType::Dimension, int TensDimOut = _PredictionType::Dimension, typename std::enable_if<std::is_same<_PredictionType, Tensor<FloatType, TensDimOut>>::value && std::is_same<_InputType, Tensor<FloatType, TensDimIn>>::value && std::is_same<_PredictionType, ComparisonType>::value, int>::type = 0>
inline Tensor<FloatType, TensDimOut - 1> predict(const Tensor<FloatType, TensDimIn - 1> &x, int batch_size)
inline void update(const Vector<FloatType> &new_params)
inline void step(const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline Vector<FloatType> getParams() const

Private Members

Store leaf
PredictionType ypred
ComparisonType yval
CostFunc cost
int nparam
template<typename OutputType>
class MSEcostFunc
template<typename FloatType, int Dim>
class MSEcostFunc<Tensor<FloatType, Dim>>

Public Types

typedef Tensor<FloatType, Dim> DataType
typedef DataType ComparisonType
typedef DataType PredictionType

Public Static Functions

static FloatType loss(const ComparisonType &y, const PredictionType &ypred)
static PredictionType layer_deriv(const ComparisonType &y, const PredictionType &ypred)

ManagedArray

template<typename FloatType>
class ManagedArray

Public Functions

inline ManagedArray()
inline ManagedArray(size_t size, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)
inline ManagedArray(size_t size, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)
inline ManagedArray(const std::vector<FloatType> &init)
inline ManagedArray(ManagedArray &&r)
inline ManagedArray(const ManagedArray &r)
inline ManagedArray &operator=(ManagedArray &&r)
inline ManagedArray &operator=(const ManagedArray &r)
inline size_t size() const
inline View view(ViewMode mode) const
inline ~ManagedArray()
inline void fill(FloatType init, MemoryManager::Pool assign_pool = MemoryManager::Pool::DevicePool)
inline void lock() const
inline void unlock() const

Private Members

MemoryManager::HandleIterator handle
size_t _size
class View

Subclassed by Tensor< _FloatType, Dim >::View

Public Functions

inline accelerator_inline size_t size () const
inline accelerator_inline FloatType * data ()
inline accelerator_inline FloatType const * data () const
inline accelerator_inline FloatType & operator[] (const size_t i)
inline accelerator_inline FloatType operator[] (const size_t i) const
inline View(ViewMode mode, MemoryManager::HandleIterator handle, size_t _size)
inline View(ViewMode mode, const ManagedArray &parent)
inline void free()

Private Members

FloatType *v
size_t _size
MemoryManager::HandleIterator handle

MemoryManager

Enums

enum ViewMode

Values:

enumerator HostRead
enumerator HostWrite
enumerator DeviceRead
enumerator DeviceWrite
enumerator HostReadWrite
enumerator DeviceReadWrite

Functions

inline std::string memPoolManagerReport(bool detailed = false)
class MemoryManager

Public Types

enum Pool

Values:

enumerator DevicePool
enumerator HostPool
typedef std::list<Entry>::iterator EntryIterator
typedef std::list<Handle>::iterator HandleIterator

Public Functions

inline MemoryManager()
inline MemoryManager(size_t max_size_device, size_t max_size_host)
~MemoryManager()
inline void setVerbose(bool to)
void enableIOlogging()
inline void setDiskRoot(const std::string &to)
inline const std::string &getDiskRoot() const
inline void enableDeletionOfLocalDiskDataOnRestore(bool val = true)
inline void setPoolMaxSize(size_t to, Pool pool)
inline size_t getAllocatedBytes(Pool pool) const
size_t getDiskCachedBytes() const
size_t getDiskUsedBytes() const
std::string report(bool detailed = false) const
void evictToDisk(HandleIterator h)
HandleIterator allocate(size_t bytes, Pool pool = DevicePool)
void *openView(ViewMode mode, HandleIterator h)
void closeView(HandleIterator h)
void free(HandleIterator h)
inline size_t nOpenHandles() const
inline void lock(HandleIterator h)
inline void unlock(HandleIterator h)

Public Static Functions

static inline MemoryManager &globalPool()

Protected Functions

inline std::list<Entry> &getLRUpool(Pool pool)
inline std::map<size_t, std::list<Entry>, std::greater<size_t>> &getFreePool(Pool pool)
inline std::string poolName(Pool pool)
EntryIterator allocEntry(size_t bytes, Pool pool)
void sanityCheck()
void moveEntryToFreePool(EntryIterator it, Pool pool)
void freeEntry(EntryIterator it, Pool pool)
void deallocateFreePool(Pool pool, size_t until_allocated_lte = 0)
EntryIterator getEntry(size_t bytes, Pool pool)
void attachEntry(Handle &handle, Pool pool)
void touchEntry(Handle &handle, Pool pool)
void syncDeviceToHost(Handle &handle)
void syncHostToDevice(Handle &handle)
void syncHostToDisk(Handle &handle)
void syncDiskToHost(Handle &handle)
void syncForRead(Handle &handle, Pool pool)
void markForWrite(Handle &handle, Pool pool)
void prepareEntryForView(Handle &handle, Pool pool)
EntryIterator evictEntry(EntryIterator entry, bool free_it, Pool pool)
void removeDiskData(Handle &handle, bool in_memory_check = true)

Protected Attributes

bool verbose
std::ofstream *io_logger
std::list<Handle> handles
std::list<Entry> device_in_use_pool
std::map<size_t, std::list<Entry>, std::greater<size_t>> device_free_pool
std::list<HandleIterator> device_queued_prefetches
std::list<Entry> host_in_use_pool
std::map<size_t, std::list<Entry>, std::greater<size_t>> host_free_pool
std::list<HandleIterator> host_queued_prefetches
size_t device_allocated
size_t host_allocated
size_t device_pool_max_size
size_t host_pool_max_size
size_t local_disk_allocated
size_t device_allocated_HWM
size_t host_allocated_HWM
size_t local_disk_allocated_HWM
std::string disk_root
bool delete_local_diskdata_on_restore

Protected Static Functions

static void summarizePoolStatus(std::ostream &os, const std::string &descr, const std::map<size_t, std::list<Entry>, std::greater<size_t>> &pool_stat)
static void summarizePoolStatus(std::ostream &os, const std::string &descr, const std::map<size_t, int, std::greater<size_t>> &pool_stat)
struct Entry

Public Members

size_t bytes
void *ptr
Handle *owned_by
struct Handle

Public Functions

inline Handle()

Public Members

size_t lock_entry
bool device_valid
EntryIterator device_entry
bool host_valid
EntryIterator host_entry
size_t bytes
bool device_in_sync
bool host_in_sync
bool disk_in_sync
std::string disk_file
bool disk_file_exists
bool device_prefetch_underway
bool initialized

Optimizers

Functions

template<typename DataLoader, typename ModelType, typename Optimizer>
std::vector<typename ModelType::FloatType> train(ModelType &model, const DataLoader &data, Optimizer &optimizer, int nepoch, int batch_size, bool suppress_logging = false)
template<typename FloatType, int DimX, int DimY>
inline XYpair<FloatType, DimX + 1, DimY + 1> batchData(int const *indices, int batch_size, const std::vector<XYpair<FloatType, DimX, DimY>> &data)
template<typename FloatType, int DimX, int DimY, typename ModelType, typename Optimizer>
std::vector<FloatType> train(ModelType &model, const std::vector<XYpair<FloatType, DimX, DimY>> &data, Optimizer &optimizer, int nepoch, int batch_size, bool suppress_logging = false)
template<typename FloatType>
struct noScheduler

Public Functions

inline noScheduler(FloatType lr)
inline FloatType operator()(const int epoch) const

Public Members

FloatType lr
template<typename FloatType, typename LRscheduler = noScheduler<FloatType>>
class GradientDescentOptimizer

Public Functions

inline GradientDescentOptimizer(const LRscheduler &sched)
template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0>
inline GradientDescentOptimizer(FloatType lr)
inline void epochStart(int epoch, bool verbose = true)
inline Vector<FloatType> descentProfile(FloatType &step_size, const Vector<FloatType> &deriv) const

Private Members

LRscheduler sched
FloatType eps
template<typename FloatType>
struct AdamParams

Public Functions

inline AdamParams(FloatType beta1 = 0.99, FloatType beta2 = 0.999, FloatType eps = 1e-8)

Public Members

FloatType beta1
FloatType beta2
FloatType eps
template<typename FloatType, typename LRscheduler = noScheduler<FloatType>>
class AdamOptimizer

Public Functions

inline AdamOptimizer(const AdamParams<FloatType> &ap, const LRscheduler &sched)
inline AdamOptimizer(const LRscheduler &sched)
template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0>
inline AdamOptimizer(const AdamParams<FloatType> &ap, FloatType lr)
template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0>
inline AdamOptimizer(FloatType lr)
inline void epochStart(int epoch, bool verbose = true)
inline Vector<FloatType> descentProfile(FloatType &step_size, const Vector<FloatType> &g)

Private Functions

inline void reset()
template<typename FloatType>
class DecayScheduler

Public Functions

inline DecayScheduler(FloatType eps, FloatType decay_rate)
inline FloatType operator()(const int epoch) const

Private Members

FloatType eps
FloatType decay_rate
template<typename FloatType, int DimX, int DimY>
struct XYpair

Public Members

Tensor<FloatType, DimX> x
Tensor<FloatType, DimY> y
template<typename FloatType, int DimX, int DimY>
class XYpairDataLoader

Public Functions

inline XYpairDataLoader(const std::vector<XYpair<FloatType, DimX, DimY>> &data)
inline size_t size() const
inline XYpair<FloatType, DimX + 1, DimY + 1> batch(int const *indices, int batch_size) const

Private Members

const std::vector<XYpair<FloatType, DimX, DimY>> &data

Padding

template<typename FloatType>
class NoPadding

Public Functions

template<int Dim>
inline Tensor<FloatType, Dim> padInput(const Tensor<FloatType, Dim> &in) const
template<int Dim>
inline Tensor<FloatType, 3> unpadDeriv(const Tensor<FloatType, Dim> &deriv_pad) const

Public Static Functions

static inline int layerOutputLength(int input_size, int kernel_size, int stride)
template<typename FloatType>
class SamePaddingZero1D

Public Functions

inline SamePaddingZero1D(int kernel_size, int stride = 1)
inline Tensor<FloatType, 3> padInput(const Tensor<FloatType, 3> &in) const
inline Tensor<FloatType, 3> unpadDeriv(const Tensor<FloatType, 3> &deriv_pad) const

Public Static Functions

static inline int layerOutputLength(int input_size, int kernel_size, int stride)

Private Members

int kernel_size
int stride

Performance

struct FLOPScounter

Public Functions

inline FLOPScounter()
inline size_t add(size_t v)
inline void lock()
inline bool locked() const
inline size_t value() const

Private Members

bool _locked
size_t _value

Pipelining

Defines

CWRP

Functions

template<typename PipelineBlockType>
auto pipeline_mse_cost(PipelineBlockType &u) -> CWRP
template<typename InputType, typename OutputType, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto pipeline_block(U &&u, int const *block_output_dims, int const *block_input_dims) -> PipelineBlock<DDST(u), InputType, OutputType>
template<typename PipelineBlockType, typename CostFunc>
class BatchPipelineCostFuncWrapper

Public Types

Values:

enumerator InputDimension
enumerator OutputDimension
typedef PipelineBlockType::FloatType FloatType
typedef PipelineBlockType::InputType InputType
typedef PipelineBlockType::OutputType OutputType
typedef PipelineBlockType::BlockInputType BlockInputType

Public Functions

typedef LAYEROUTPUTTYPE (PipelineBlockType) BlockOutputType
inline BatchPipelineCostFuncWrapper(PipelineBlockType &block, int call_batch_size, const CostFunc &cost = CostFunc())
inline FloatType loss(const InputType &x, const OutputType &y)
inline Vector<FloatType> deriv() const
inline void update(const Vector<FloatType> &new_params)
inline void step(const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline Vector<FloatType> getParams() const
inline Matrix<FloatType> predict(const Matrix<FloatType> &x)
inline Vector<FloatType> predict(const Vector<FloatType> &x)

Private Members

PipelineBlockType &block
CostFunc cost
int nparam
int value_lag
int deriv_lag
int call_batch_size
int rank
int nrank
Vector<FloatType> deriv_store
template<typename PipelineBlockType, typename CostFunc>
class PipelineCostFuncWrapper

Public Functions

inline PipelineCostFuncWrapper(PipelineBlockType &block, const CostFunc &cost = CostFunc())
inline std::pair<FloatType, bool> loss(const OutputType &x, const OutputType &y)
inline std::pair<Vector<FloatType>, bool> deriv() const
inline void update(const Vector<FloatType> &new_params)
inline void step(const Vector<FloatType> &derivs, FloatType eps)
inline int nparams()
inline Vector<FloatType> getParams() const
inline int valueLag() const
inline int derivLag() const

Private Types

Values:

enumerator InputDimension
enumerator OutputDimension
typedef PipelineBlockType::FloatType FloatType
typedef PipelineBlockType::InputType InputType
typedef PipelineBlockType::OutputType OutputType
typedef PipelineBlockType::BlockInputType BlockInputType

Private Functions

typedef LAYEROUTPUTTYPE (PipelineBlockType) BlockOutputType

Private Members

PipelineBlockType &block
RingBuffer<OutputType> yval_buf_v
size_t calls
OutputType ypred
OutputType yval
CostFunc cost
int nparam
int value_lag
int deriv_lag
int rank
struct LockControlWrapper

Subclassed by LockControlWrapperTensor< FloatType, Dim >

Public Functions

virtual void lock() = 0
virtual void unlock() = 0
template<typename FloatType, int Dim>
struct LockControlWrapperTensor : public LockControlWrapper

Public Functions

inline LockControlWrapperTensor(Tensor<FloatType, Dim> const *v)
inline virtual void lock() override
inline virtual void unlock() override

Public Members

Tensor<FloatType, Dim> const *v
class PipelineCommunicator

Subclassed by PipelineBlock< BlockStore, InputType_, OutputType_ >

Public Functions

inline PipelineCommunicator()
inline int pipelineDepth() const
inline void waitAll(const std::vector<CommsRequest> &reqs)
template<typename T, typename U>
inline void passLeft(std::vector<CommsRequest> &reqs, T const *send_bulk, T const *send_last, U *recv_first, U *recv_bulk) const
template<typename T, typename U>
inline void passRight(std::vector<CommsRequest> &reqs, T const *send_first, T const *send_bulk, U *recv_bulk, U *recv_last) const
template<typename T>
inline void passLeftLastToFirst(std::vector<CommsRequest> &reqs, T const *send_last, T *recv_first)

Public Static Functions

template<typename T>
static inline CommsRequest send(const T &mat, int to)
template<typename T>
static inline CommsRequest recv(T &mat, int from)

Protected Attributes

int rank
int next_rank
int prev_rank
int pipeline_depth
bool is_first
bool is_last
struct CommsRequest

Public Functions

template<typename FloatType, int Dim>
inline CommsRequest(MPI_Request r, const Tensor<FloatType, Dim> &vv)

Public Members

std::unique_ptr<LockControlWrapper> v
MPI_Request req
template<typename BlockStore, typename InputType_, typename OutputType_>
class PipelineBlock : public PipelineCommunicator

Public Types

Values:

enumerator BlockInputDimension
enumerator BlockOutputDimension
typedef BlockStore::type::FloatType FloatType
typedef BlockStore::type::InputType BlockInputType
typedef InputType_ InputType
typedef OutputType_ OutputType

Public Functions

typedef LAYEROUTPUTTYPE (typename BlockStore::type) BlockOutputType
inline PipelineBlock(BlockStore &&_block, int const *block_output_dims_, int const *block_input_dims_)
PipelineBlock(const PipelineBlock &r) = delete
PipelineBlock(PipelineBlock &&r) = default
inline int nparams() const
inline int valueLag() const
inline int derivLag() const
inline OutputType value(const InputType &in)
inline void deriv(Vector<FloatType> &cost_deriv, OutputType &&above_deriv)
inline void update(const Vector<FloatType> &new_params)
inline void step(const Vector<FloatType> &derivs, FloatType eps)
inline void getParams(Vector<FloatType> &into) const

Private Functions

template<typename OutType, typename B, typename std::enable_if<!std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0>
inline OutType get_as(B &&v)
template<typename OutType, typename B, typename std::enable_if<std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0>
inline OutType get_as(B &&v)
template<typename OutType, typename B, typename std::enable_if<!std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0>
inline const OutType &get_as(const B &v)
template<typename OutType, typename B, typename std::enable_if<std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0>
inline const OutType &get_as(const B &v)

Private Members

BlockStore block
int block_output_dims[BlockOutputDimension]
int block_input_dims[BlockInputDimension]
int nparam
int stage_off
BlockInputType prev_block_in
BlockOutputType prev_above_deriv
Vector<FloatType> prev_cost_deriv_passright
int dcalls

Random

Typedefs

typedef std::mt19937 GlobalRNGtype

Functions

GlobalRNGtype &globalRNG()
inline void reseedGlobalRNG(size_t seed)
template<typename FloatType, int Dim, typename Dist, typename RNG>
void random(Tensor<FloatType, Dim> &m, Dist &dist, RNG &rng)
template<typename FloatType, int Dim, typename RNG>
void uniformRandom(Tensor<FloatType, Dim> &m, RNG &rng, FloatType min = FloatType(-1.0), FloatType max = FloatType(1.0))
template<typename FloatType, int Dim>
inline void uniformRandom(Tensor<FloatType, Dim> &m, FloatType min = FloatType(-1.0), FloatType max = FloatType(1.0))
template<typename FloatType, typename RNG>
void glorotUniformRandom(Matrix<FloatType> &m, RNG &rng, FloatType gain = FloatType(1.0))
template<typename FloatType>
inline void glorotUniformRandom(Matrix<FloatType> &m, FloatType gain = FloatType(1.0))
template<typename FloatType, typename RNG>
size_t drawWeightedRandomIndex(FloatType const *weights, int nweights, size_t stride, RNG &rng)
template<typename FloatType>
size_t drawWeightedRandomIndex(FloatType const *weights, int nweights, size_t stride)

Variables

constexpr size_t default_seed = 1234

RingBuffer

template<typename T>
class RingBuffer

Public Functions

inline RingBuffer(size_t size)
inline RingBuffer()
inline void resize(size_t size)
inline void push(T &&v)
inline T pop()
inline bool isFilled() const
inline size_t size() const
inline const T &latest() const

Private Members

std::vector<T> ring
size_t off
bool filled

Serialization

Enums

enum class Endianness

Values:

enumerator Big
enumerator Little
enumerator System

Functions

std::string toString(const Endianness e)
Endianness endianness()
uint8_t BitReverseTable256(size_t i)
template<typename T>
inline T bitReverse(T in)
class BinaryWriter

Public Functions

BinaryWriter(const std::string &filename, const Endianness end = Endianness::System)
template<typename T, int Dim>
void write(const Tensor<T, Dim> &t)
template<typename Model, typename std::enable_if<ISLEAF(Model), int>::type = 0>
void write(const Model &model)
template<typename Store, typename CostFunc>
void write(const CostFuncWrapper<Store, CostFunc> &model)
inline void close()

Private Functions

template<typename T>
inline void writeValue(T v)

Private Members

std::ofstream of
bool do_flip
class BinaryReader

Public Functions

BinaryReader(const std::string &filename)
template<typename T, int Dim>
void read(Tensor<T, Dim> &t)
template<typename Model, typename std::enable_if<ISLEAF(Model), int>::type = 0>
void read(Model &model)
template<typename Store, typename CostFunc>
void read(CostFuncWrapper<Store, CostFunc> &model)
inline void close()

Private Functions

template<typename T>
inline T readValue()

Private Members

std::ifstream of
bool do_flip

Tensors

Defines

_1D_TENSOR_ONLY
_2D_TENSOR_ONLY
_3D_TENSOR_ONLY
_4D_TENSOR_ONLY

Typedefs

template<typename FloatType>
using Vector = Tensor<FloatType, 1>

Alias vector to 1D tensor.

template<typename FloatType>
using Matrix = Tensor<FloatType, 2>

Alias matrix to 2D tensor.

Functions

template<size_t Dim> accelerator_inline size_t tensorSize (int const *dims)

Compute the linear size of a tensor of dimension “Dim” and the provided dimensions.

Parameters:

dims – The tensor dimension (array of size Dim)

template<size_t Dim> accelerator_inline size_t tensorOffset (int const *coord, int const *dims)

Compute the linear (pointer) offset of a specific coordate within a tensor of dimension “Dim” and the provided dimensions.

Parameters:
  • coor – The coordinate (array of size Dim)

  • dims – The tensor dimension (array of size Dim)

template<size_t Dim> accelerator_inline void tensorOffsetUnmap (int *coord, int const *dims, size_t offset)

Compute the coordinate associated with a specific linear (pointer) offset for a tensor of dimension “Dim” and the provided dimensions.

Parameters:
  • coord[out] The tensor coordinate (array of size Dim)

  • dims[in] The tensor dimension (array of size Dim)

  • offset[in] The input linear offset

template<int Dim> accelerator_inline size_t tensorDimensionStride (int iter_dim, int const *size)

Compute the stride for iterating over a specific dimension for a tensor of dimension “Dim” with the provided dimensions.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t tensorDimensionBase (int iter_dim, int const *other_coord, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension of a tensor of dimension “Dim”.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • other_coor – The coordinates for the other dimensions (array of size Dim-1)

  • size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t batchTensorDimensionBaseLin (int iter_dim, int batch_idx, size_t other_dim_lin, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension for a batch-tensor (last dim is the batch dimension) of dimension “Dim”.

Parameters:
  • iter_dim – The dimension that will be iterated over

  • batch_idx – The batch index (coordinate in last dimension)

  • other_dim_lin – The coordinates in dimensions apart from iter_dim and Dim-1 expressed as a lexicographic linear index in descending order, e.g. z + size_z * (y + size_y * x)

  • size – The tensor dimension (array of size Dim)

template<typename FloatType>
void pokeColumn(Matrix<FloatType> &into, int col, const Vector<FloatType> &data)

Insert a vector as particular column of a matrix, i.e. into(:,col) = data(:)

Parameters:
  • The – target matrix

  • col – The column index

  • data – The input column

template<typename FloatType>
void pokeRow(Matrix<FloatType> &into, int row, const Vector<FloatType> &data)

Insert a vector as particular row of a matrix, i.e. into(row,:) = data(:)

Parameters:
  • The – target matrix

  • row – The row index

  • data – The input row

template<typename FloatType>
Vector<FloatType> peekColumn(const Matrix<FloatType> &m, int col)

Retrieve a specific column of a matrix m, i.e. return m(:,col)

Parameters:
  • m – The matrix

  • col – The column index

template<typename FloatType>
Matrix<FloatType> peekColumns(const Matrix<FloatType> &m, int col_start, int col_end)

Retrieve multiple consecutive columns of a matrix m, i.e. return m(:,col_start:col_end+1)

Parameters:
  • m – The matrix

  • col_start – The first column index

  • col_end – The last column index

template<typename FloatType>
void pokeColumns(Matrix<FloatType> &into, int col_start, int col_end, const Matrix<FloatType> &cols)

Insert multiple consecutive columns of a matrix m, i.e. into(:,col_start:col_end+1) = cols(:,:)

Parameters:
  • into – The matrix in which to insert the columns

  • col_start – The first column index

  • col_end – The last column index

  • cols – The matrix containing the columns (cols = col_end-col_start+1)

template<typename FloatType>
std::ostream &operator<<(std::ostream &os, const Vector<FloatType> &v)

Output a vector to a stream.

template<typename FloatType>
std::ostream &operator<<(std::ostream &os, const Matrix<FloatType> &v)

Output a matrix to a stream.

template<typename FloatType>
Vector<FloatType> operator*(const Matrix<FloatType> &A, const Vector<FloatType> &x)

Perform the matrix-vector product of A and x.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> &operator+=(Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Addition-assignment operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> operator+(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Addition operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> &operator-=(Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Subtraction-assignment operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> operator-(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b)

Subtraction operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> &operator*=(Tensor<FloatType, Dim> &a, FloatType eps)

Scalar multiplication-assignment operator for tensors.

template<typename FloatType, int Dim>
Tensor<FloatType, Dim> operator*(FloatType eps, const Tensor<FloatType, Dim> &b)

Scalar left-multiplication operator for tensors.

template<typename FloatType, int Dim>
inline Tensor<FloatType, Dim> operator*(const Tensor<FloatType, Dim> &b, FloatType eps)

Scalar right-multiplication operator for tensors.

template<int Dim, typename FloatType>
Vector<FloatType> flatten(const Tensor<FloatType, Dim> &t)

“Flatten” a tensor into a vector. The output mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
FloatType *flatten(FloatType *host_ptr, const Tensor<FloatType, Dim> &in)

“Flatten” a tensor into a pre-allocated host array and return the pointer to the element of the array one past the flattened tensor.

Parameters:
  • host_ptr – The host array destination. The output mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

  • in – The input tensor

Returns:

A pointer to the element of the array one past the flattened tensor. note, the copy is performed on the host side

template<int Dim, typename FloatType>
void unflatten(Tensor<FloatType, Dim> &out, const Vector<FloatType> &t)

“Unflatten” vector into tensor

Parameters:
  • out – The output tensor. Its dimensions should be set correctly prior to calling this function

  • t – The input vector. The input mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
FloatType const *unflatten(Tensor<FloatType, Dim> &out, FloatType const *host_ptr)

“Unflatten” a tensor from a pre-allocated host array and return the pointer to the element of the array one past the flattened tensor.

Parameters:
  • out – The output tensor. Its dimensions should be set correctly prior to calling this function

  • host_ptr – The input array pointer. The input mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

Returns:

A pointer to the element of the array one past the flattened tensor. note, the copy is performed on the host side

template<int Dim1, int Dim2, typename FloatType>
Vector<FloatType> flatten2(const Tensor<FloatType, Dim1> &t1, const Tensor<FloatType, Dim2> &t2)

Flatten two tensors into a single contiguous array.

Parameters:
  • t1 – The first tensor

  • t2 – The second tensor

Returns:

An output vector of length t1.data_len() + t2.data_len(), where the elements within the sub-arrays are obtained from their corresponding tensor via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim1, int Dim2, typename FloatType>
void unflatten2(Tensor<FloatType, Dim1> &t1, Tensor<FloatType, Dim2> &t2, const Vector<FloatType> &v)

Unflatten two tensors from a single contiguous array.

The output tensor dimensions should be set appropriately prior to calling this function

Parameters:
  • t1[out] The first tensor

  • t2[out] The first tensor

  • v[in] An input vector of length t1.data_len() + t2.data_len(), where the elements within the sub-arrays map to their corresponding tensor coordinates via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
Vector<FloatType> flattenNsameDim(Tensor<FloatType, Dim> const *const *tens, int N)

Flatten N tensors of the same dimension into a single contiguous array.

Parameters:
  • tens – An array of pointers to input tensors

  • N – The number of tensors

Returns:

An output vector of length \sum_i tens[i].data_len(), where the elements within the sub-arrays are obtained from their corresponding tensor via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
void unflattenNsameDim(Tensor<FloatType, Dim> *const *tens, int N, const Vector<FloatType> &v)

Unflatten N tensors of the same dimension from a single contiguous array.

Parameters:
  • tens – The output tensor array. The tensor dimensions should be set appropriately prior to calling this function.

  • N – The number of tensors

  • v – The input vector. This must have length \sum_i tens[i].data_len(), where the elements within the sub-arrays map to their corresponding tensor coordinates via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType>
Tensor<FloatType, Dim> batchTensorConcatenate(Tensor<FloatType, Dim> const *const *in, int Ntens, int concat_dim)

Concatenate (stack) Ntens tensors along a dimension concat_dim < Dim-1 (last dim is assumed to be the batch index).

Dimensions other than concat_dim must all have the same size.

Parameters:
  • in – The input tensor array

  • Ntens – The number of tensors

  • concat_dim – The dimension along which the concatenation is performed

template<int Dim, typename FloatType>
void batchTensorSplit(Tensor<FloatType, Dim> *const *out, int Ntens, const Tensor<FloatType, Dim> &in, int split_dim)

Split a tensor along a dimension split_dim < Dim-1 (last dim is the batch index) into multiple tensors.

Dimensions other than split_dim must all have the same size.

Parameters:
  • out – The output tensors. These should be pre-initialized to the appropriate sizes.

  • Ntens – The number of output tensors

  • in – The input tensor

  • split_dim – The dimension along which to split

template<int Dim, typename FloatType>
double norm2(const Tensor<FloatType, Dim> &T)

Return the tensor norm^2, i.e. \sum_{i,j,k,…} T[i,j,k,…]^2.

template<typename _FloatType, int Dim>
struct Tensor
#include <Tensors.hpp>

A class for tensors of arbitrary dimension and floating point type.

Public Types

Values:

enumerator Dimension
typedef _FloatType FloatType

The floating point type

typedef const int *Dims

Array type for tensor dimensions

typedef const int *Coord

Array type for tensor coordinates

Public Functions

inline Tensor()

Default constructor for a zero-size tensor.

inline Tensor(Dims dims, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • dims – The tensor dimensions

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline Tensor(Dims dims, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a tensor with the provided dimensions uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • dims – The tensor dimensions

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline Tensor(Dims dims, const std::vector<FloatType> &init_vals)

Construct a tensor with the provided dimensions initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • dims – The tensor dimensions

  • init_vals – The initial values with lexicographic mapping in descending order, e.g. z + size_z * (y + size_y * x)

inline Tensor(Dims dims, FloatType const *init_vals)

Construct a tensor with the provided dimensions initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • dims – The tensor dimensions

  • init_vals – The initial values with lexicographic mapping in descending order, e.g. z + size_z * (y + size_y * x)

inline _1D_TENSOR_ONLY Tensor(int len, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 1D tensor (vector) with the provided length with the initial memory allocation in the provided pool.

Parameters:
  • len – The vector length

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _1D_TENSOR_ONLY Tensor(int len, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 1D tensor (vector) with the provided length uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • len – The vector length

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _1D_TENSOR_ONLY Tensor(const std::vector<FloatType> &init_vals)

Construct a 1D tensor (vector) initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

init_vals – The initial values

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 2D tensor (matrix) with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension (number of rows)

  • size1 – The size of the 2nd dimension (number of columns)

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 2D tensor (matrix) with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension (number of rows)

  • size1 – The size of the 2nd dimension (number of columns)

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocation (default: device)

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, const std::vector<FloatType> &init_vals)

Construct a 2D tensor (matrix) with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • size0 – The size of the 1st dimension (number of rows)

  • size1 – The size of the 2nd dimension (number of columns)

  • init_vals – The initial values with lexicographic mapping y + size1*x for coord (x,y)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 3D tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 3D tensor with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocation (default: device)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, const std::vector<FloatType> &init_vals)

Construct a 3D tensor with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • init_vals – The initial values with lexicographic mapping z + size2*(y + size1*x) for coord (x,y,z)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 4D tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • size3 – The size of the 4th dimension

  • alloc_pool – The memory pool for the initial allocatio (default: device)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 4D tensor with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • size3 – The size of the 4th dimension

  • init – The initial value for all elements

  • alloc_pool – The memory pool for the initial allocation (default: device)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, const std::vector<FloatType> &init_vals)

Construct a 4D tensor with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:
  • size0 – The size of the 1st dimension

  • size1 – The size of the 2nd dimension

  • size2 – The size of the 3rd dimension

  • size3 – The size of the 4th dimension

  • init_vals – The initial values with lexicographic mapping t + size3*(z + size2*(y + size1*x) ) for coord (x,y,z,t)

inline int const *sizeArray() const

Return the tensor dimensions as an array pointer.

inline int size(int i) const

Return the tensor size along a specific dimension.

Parameters:

i – The dimension

inline std::string sizeArrayString() const

Return the tensor dimensions as a string.

inline size_t data_len() const

Return the linear dimension (flattened size) of the tensor, or equivalently, the total number of elements.

inline View view(ViewMode mode) const

Return a view to this tensor opened with a specific view mode.

inline void lock() const

“Lock” the memory region associated with this object such that it cannot be auto-evicted to free space in a memory pool. A possible use case is to ensure a memory region remains valid while performing an asynchronouse background copy

inline void unlock() const

“Unlock the memory region, allowing it to be evicted. This is the default state.

Tensor sliceLastDimension(int idx_start, int idx_end) const

Return a tensor where the last dimension contains the slice between idx_start and idx_end (inclusive). E.g., for a 3D tensor T, return T(:,:,idx_start:idx_end+1)

void pokeLastDimension(const Tensor<FloatType, Dim - 1> &ins, const int idx)

Insert a tensor of Dim-1 such that (*this)(i,j,k,…, idx) = ins(i,j,k,…). E.g., for a 3D tensor T and 2D input I, set T[:,:,idx] = I[:,:].

Parameters:
  • ins – The Dim-1 dimensional tensor to insert

  • idx – The index in the last dimension on which to insert the tensor

Tensor<FloatType, Dim - 1> peekLastDimension(const int idx) const

Return a tensor of dimension Dim-1 such that out(i,j,k,…) = (*this)(i,j,k,…, idx). E.g., for a 3D tensor T, return T[:,:,idx].

Parameters:

idx – The index in the last dimension on which to insert the tensor

Public Static Functions

static inline constexpr int dimension()

Return the tensor dimension.

Private Members

ManagedArray<FloatType> vals

Memory-contiguous container for tensor data

int _size[Dim]

Tensor dimensions

class View : private ManagedArray<FloatType>::View
#include <Tensors.hpp>

The tensor View accessor class

Public Functions

inline View(ViewMode mode, const Tensor<FloatType, Dim> &parent)

Construct a view with a specific view mode and parent object.

Parameters:
  • mode – The view mode

  • parent – The parent object

inline void free()

Free the view. This must be called explicitly once the view is no longer needed.

inline accelerator_inline FloatType & operator() (const Coord coord)

Access the tensor at the provided coordinate.

inline accelerator_inline FloatType operator() (const Coord coord) const

Access the tensor at the provided coordinate.

inline _1D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i)

Access the 1D tensor at the index (i)

inline _1D_TENSOR_ONLY accelerator_inline FloatType operator() (int i) const

Access the 1D tensor at the index (i)

inline _2D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j)

Access the 2D tensor at the coordinate (i,j)

inline _2D_TENSOR_ONLY accelerator_inline FloatType operator() (int i, int j) const

Access the 2D tensor at the coordinate (i,j)

inline _3D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j, int k)

Access the 3D tensor at the coordinate (i,j,k)

inline _3D_TENSOR_ONLY accelerator_inline FloatType operator() (int i, int j, int k) const

Access the 3D tensor at the coordinate (i,j,k)

inline _4D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j, int k, int l)

Access the 4D tensor at the coordinate (i,j,k,l)

inline _4D_TENSOR_ONLY accelerator_inline FloatType operator() (int i, int j, int k, int l) const

Access the 4D tensor at the coordinate (i,j,k,l)

inline accelerator_inline FloatType const * data () const

Return a pointer to the underlying array.

inline accelerator_inline FloatType * data ()

Return a pointer to the underlying array.

inline accelerator_inline size_t data_len () const

Return the linear dimension (flattened size) of the tensor, or equivalently, the total number of elements.

inline accelerator_inline size_t size (int i) const

Return the tensor size along a specific dimension.

Parameters:

i – The dimension

inline accelerator_inline int const * sizeArray () const

Return the tensor dimensions as an array pointer.

inline accelerator_inline FloatType & compact3 (int i, int j, int k)

Access a tensor element at a coordinate expressed such that the first Dim-2 dimensions are expressed lexicographically.

Parameters:
  • i – The first Dim-2 dimensions expressed lexicographically in descending order (e.g. z+sizez*(y+sizey*x))

  • j – The index of dimension Dim-2

  • k – The index of dimension Dim-1

inline accelerator_inline FloatType compact3 (int i, int j, int k) const

Access a tensor element at a coordinate expressed such that the first Dim-2 dimensions are expressed lexicographically.

Parameters:
  • i – The first Dim-2 dimensions expressed lexicographically in descending order (e.g. z+sizez*(y+sizey*x))

  • j – The index of dimension Dim-2

  • k – The index of dimension Dim-1

Private Types

typedef ManagedArray<FloatType>::View Base

Private Members

int *_size

Tensor dimensions

bool is_device_ptr

Track whether the tensor dimensions array is allocated on the device or host

Testing

Functions

template<typename FloatType>
bool near(FloatType a, FloatType b, FloatType rel_tol, FloatType *reldiff_p = nullptr)
template<typename FloatType>
bool near(const Vector<FloatType> &a, const Vector<FloatType> &b, FloatType rel_tol, bool verbose = false)
template<typename FloatType>
bool near(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FloatType rel_tol, bool verbose = false)
template<typename FloatType>
bool abs_near(FloatType a, FloatType b, FloatType abs_tol, FloatType *absdiff_p = nullptr)
template<typename FloatType>
bool abs_near(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FloatType abs_tol, bool verbose = false)
template<typename FloatType, int Dim>
bool abs_near(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b, FloatType abs_tol, bool verbose = false)
template<typename FloatType, int Dim>
bool equal(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b, bool verbose = false)
template<typename Op, typename PreOp>
void benchmark(double &mean, double &std, int nrpt, int nwarmup, const Op &op, const PreOp &preop)
template<typename TensType>
TensType::FloatType testCost(const Vector<typename TensType::FloatType> &c, const TensType &v)
template<typename ModelType>
void testDeriv(ModelType &model, int const *in_sizes, int const *out_sizes, typename ModelType::FloatType delta = typename ModelType::FloatType(1e-4))
template<typename ComponentWrapper>
void testComponentDeriv(ComponentWrapper &cpt, typename ComponentWrapper::FloatType delta = typename ComponentWrapper::FloatType(1e-4), bool _2nd_order = false)
template<typename FloatType>
std::vector<FloatType> softMaxVector(const std::vector<FloatType> &v, FloatType beta = 1.0)

Timing

Functions

inline std::chrono::system_clock::time_point now()
inline size_t usSinceEpoch()
inline size_t usCountSince(const std::chrono::system_clock::time_point &when)
inline double since(const std::chrono::system_clock::time_point &when)

Batch3tensorPairContractComponent

template<typename _FloatType>
class Batch3tensorPairContractComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline Batch3tensorPairContractComponent(int contract_dim_A, int contract_dim_B, FloatType nrm = 1.0)
Batch3tensorPairContractComponent(const Batch3tensorPairContractComponent &r) = delete
Batch3tensorPairContractComponent(Batch3tensorPairContractComponent &&r) = default
inline Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &A, const Tensor<FloatType, 3> &B)
inline void deriv(Tensor<FloatType, 3> &&_dcost_by_dC, Tensor<FloatType, 3> &dcost_by_dA, Tensor<FloatType, 3> &dcost_by_dB) const
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Private Members

FloatType nrm
int contract_dim_A
int contract_dim_B
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
mutable RingBuffer<Tensor<FloatType, 3>> A_buf
mutable RingBuffer<Tensor<FloatType, 3>> B_buf

BatchedMatrixRowSoftMaxComponent

template<typename _FloatType>
class BatchedMatrixRowSoftMaxComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchedMatrixRowSoftMaxComponent(bool use_mask = false, FloatType beta = 1.0)
BatchedMatrixRowSoftMaxComponent(const BatchedMatrixRowSoftMaxComponent &r) = delete
BatchedMatrixRowSoftMaxComponent(BatchedMatrixRowSoftMaxComponent &&r) = default
Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &in) const
void deriv(Tensor<FloatType, 3> &&dcost_by_dOut, Tensor<FloatType, 3> &dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)

Private Members

FloatType beta
mutable RingBuffer<Tensor<FloatType, 3>> out_buf
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool use_mask

BatchTensorConcatenateComponent

template<typename _FloatType, int TensDim>
class BatchTensorConcatenateComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchTensorConcatenateComponent(int concat_dim, int Ntens)
BatchTensorConcatenateComponent(const BatchTensorConcatenateComponent &r) = delete
BatchTensorConcatenateComponent(BatchTensorConcatenateComponent &&r) = default
inline Tensor<FloatType, TensDim> value(Tensor<FloatType, TensDim> const *const *in)
inline void deriv(Tensor<FloatType, TensDim> &&_dcost_by_dOut, Tensor<FloatType, TensDim> *const *dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const

Private Members

int concat_dim
int Ntens
std::vector<std::array<int, TensDim>> tens_dims
bool setup

BatchTensorDimensionSliceComponent

template<typename _FloatType, int TensDim>
class BatchTensorDimensionSliceComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchTensorDimensionSliceComponent(int slice_dim, int slice_idx)
BatchTensorDimensionSliceComponent(const BatchTensorDimensionSliceComponent &r) = delete
BatchTensorDimensionSliceComponent(BatchTensorDimensionSliceComponent &&r) = default
Tensor<FloatType, TensDim - 1> value(const Tensor<FloatType, TensDim> &in)
void deriv(Tensor<FloatType, TensDim - 1> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const

Private Members

int slice_dim
int slice_idx
int in_size[TensDim]
int out_size[TensDim - 1]
size_t other_dim_vol
size_t offset_in
bool setup

BatchTensorDNNcomponent

template<typename _FloatType, int TensDim, typename ActivationFunc>
class BatchTensorDNNcomponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchTensorDNNcomponent(const Matrix<FloatType> &weights, const Vector<FloatType> &bias, int contract_dim, const ActivationFunc &activation)
inline BatchTensorDNNcomponent(const Matrix<FloatType> &_weights, int contract_dim, const ActivationFunc &activation)
BatchTensorDNNcomponent(const BatchTensorDNNcomponent &r) = delete
BatchTensorDNNcomponent(BatchTensorDNNcomponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &x)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dCost_by_dOut, Tensor<FloatType, TensDim> &dCost_by_dIn) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
void getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

Matrix<FloatType> weights
Vector<FloatType> bias
int batch_size
int contract_dim
bool use_bias
int in_dims[TensDim]
int out_dims[TensDim]
size_t other_size
size_t stride
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
ActivationFunc activation_func
mutable RingBuffer<Tensor<FloatType, TensDim>> in_buf
mutable RingBuffer<Tensor<FloatType, TensDim>> activation_deriv_buf

MatrixTensorContractComponent

template<typename _FloatType, int TensDim>
class MatrixTensorContractComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline MatrixTensorContractComponent(const Matrix<FloatType> &weights)
MatrixTensorContractComponent(const MatrixTensorContractComponent &r) = delete
MatrixTensorContractComponent(MatrixTensorContractComponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &x)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dCost_by_dOut, Tensor<FloatType, TensDim> &dCost_by_dIn) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
void getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

Matrix<FloatType> weights
int size0
int size1
int batch_size
int in_dims[TensDim]
int out_dims[TensDim]
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
mutable RingBuffer<Tensor<FloatType, TensDim>> in_buf

MultiHeadAttentionComponent

template<typename _FloatType>
class MultiHeadAttentionComponent

Public Types

typedef _FloatType FloatType

Public Functions

MultiHeadAttentionComponent(int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadAttentionComponent(int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadAttentionComponent(const MultiHeadAttentionComponent &r) = delete
MultiHeadAttentionComponent(MultiHeadAttentionComponent &&r) = default
TensorType value(const TensorType &Q, const TensorType &K, const TensorType &V)
void deriv(Vector<FloatType> &cost_deriv, int off, TensorType &&dCost_by_dOut, TensorType &dCost_by_dQ, TensorType &dCost_by_dK, TensorType &dCost_by_dV) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
size_t FLOPS(int value_or_deriv) const
void getParams(Vector<FloatType> &into, int off) const
void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, 3> TensorType

Private Members

int C
int E
int B
int Nparams_layer
bool setup
std::vector<std::unique_ptr<ScaledDotProductAttentionHeadComponent<FloatType>>> heads
BatchTensorConcatenateComponent<FloatType, 3> concatY
MatrixTensorContractComponent<FloatType, 3> multW_O

NormComponent

template<typename _FloatType, int TensDim>
class NormComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline NormComponent(int norm_dim, FloatType epsilon = 1e-5)
NormComponent(const NormComponent &r) = delete
NormComponent(NormComponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in)
void deriv(Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Private Members

int norm_dim
FloatType epsilon
int in_size[TensDim]
size_t other_dim_vol
size_t stride
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
mutable RingBuffer<Tensor<FloatType, TensDim>> out_buf
mutable RingBuffer<Matrix<FloatType>> std_buf

ScaleComponent

template<typename _FloatType, int TensDim>
class ScaleComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline ScaleComponent(int scale_dim, int dimension_size, bool use_affine, bool use_bias, const Vector<FloatType> &affine_init, const Vector<FloatType> &bias_init)
ScaleComponent(const ScaleComponent &r) = delete
ScaleComponent(ScaleComponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
void getParams(Vector<FloatType> &into, int off) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)

Private Members

int scale_dim
bool use_affine
bool use_bias
int nparams_val
int in_size[TensDim]
size_t other_dim_vol
size_t stride
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
bool setup
Vector<FloatType> gamma
Vector<FloatType> beta
mutable RingBuffer<Tensor<FloatType, TensDim>> in_buf

ScaledDotProductAttentionComponent

template<typename _FloatType>
class ScaledDotProductAttentionComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline ScaledDotProductAttentionComponent(int d_k, int d_v, int use_mask = false)
ScaledDotProductAttentionComponent(const ScaledDotProductAttentionComponent &r) = delete
ScaledDotProductAttentionComponent(ScaledDotProductAttentionComponent &&r) = default
Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &Q, const Tensor<FloatType, 3> &K, Tensor<FloatType, 3> &V)
void deriv(Tensor<FloatType, 3> &&dCost_by_dOut, Tensor<FloatType, 3> &dCost_by_dQ, Tensor<FloatType, 3> &dCost_by_dK, Tensor<FloatType, 3> &dCost_by_dV) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)

Private Members

int C
int B
int d_k
int d_v
bool setup
Batch3tensorPairContractComponent<FloatType> mulQKtoGetS
BatchedMatrixRowSoftMaxComponent<FloatType> softmaxS_to_SS
Batch3tensorPairContractComponent<FloatType> mulSSVtoGetOut

ScaledDotProductAttentionHeadComponent

template<typename _FloatType>
class ScaledDotProductAttentionHeadComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline ScaledDotProductAttentionHeadComponent(const Matrix<FloatType> &W_Q, const Matrix<FloatType> &W_K, const Matrix<FloatType> &W_V, bool use_mask = false)
ScaledDotProductAttentionHeadComponent(const ScaledDotProductAttentionHeadComponent &r) = delete
ScaledDotProductAttentionHeadComponent(ScaledDotProductAttentionHeadComponent &&r) = default
Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &Q, const Tensor<FloatType, 3> &K, const Tensor<FloatType, 3> &V)
void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&dCost_by_dOut, Tensor<FloatType, 3> &dCost_by_dQ, Tensor<FloatType, 3> &dCost_by_dK, Tensor<FloatType, 3> &dCost_by_dV) const
void update(int off, const Vector<FloatType> &new_params)
void step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
void getParams(Vector<FloatType> &into, int off) const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef MatrixTensorContractComponent<FloatType, 3> MatTensMulCptType

Private Members

int C
int B
int E
int d_k
int d_v
bool setup
MatTensMulCptType multWQ
MatTensMulCptType multWK
MatTensMulCptType multWV
ScaledDotProductAttentionComponent<FloatType> attention

SoftMaxComponent

template<typename _FloatType, int TensDim>
class SoftMaxComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline SoftMaxComponent(int softmax_dim, FloatType beta = 1.0)
SoftMaxComponent(const SoftMaxComponent &r) = delete
SoftMaxComponent(SoftMaxComponent &&r) = default
Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in) const
void deriv(Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
inline void resizeInputBuffer(size_t to)
inline void setBeta(FloatType _beta)

Private Members

int softmax_dim
FloatType beta
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
mutable RingBuffer<Tensor<FloatType, TensDim>> out_buf

BatchTensorDNNlayer

Defines

LAYER_TYPE
LAYER_TYPE

Functions

template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, int contract_dim, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_dnn_layer(const Matrix<FLOATTYPE(U)> &weights, int contract_dim, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_dnn_layer(int contract_dim, int fan_out, int fan_in, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto batch_tensor_unbiased_dnn_layer(int contract_dim, int fan_out, int fan_in, const ActivationFunc &activation, U &&u) -> LAYER_TYPE
template<typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, const ActivationFunc &activation, U &&u)
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, U &&u)
template<typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(int fan_out, int fan_in, const ActivationFunc &activation, U &&u)
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto dnn_layer(int fan_out, int fan_in, U &&u)
template<typename _FloatType, int TensDim, typename _InputType, typename Store, typename ActivationFunc>
class BatchTensorDNNlayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline BatchTensorDNNlayer(Store &&leaf, const Matrix<FloatType> &weights, const Vector<FloatType> &bias, int contract_dim, const ActivationFunc &activation)
inline BatchTensorDNNlayer(Store &&leaf, const Matrix<FloatType> &weights, int contract_dim, const ActivationFunc &activation)
BatchTensorDNNlayer(const BatchTensorDNNlayer &r) = delete
BatchTensorDNNlayer(BatchTensorDNNlayer &&r) = default
Tensor<FloatType, TensDim> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline size_t FLOPS(int value_or_deriv) const
inline int nparams() const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

ConvolutionLayer1D

Functions

template<typename U, typename ActivationFunc, typename PaddingFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto conv1d_layer(const Tensor<FLOATTYPE(U), 3> &filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, int stride, U &&u) -> ConvolutionLayer1D<FLOATTYPE(U), INPUTTYPE(U), DDST(u), ActivationFunc, PaddingFunc>
template<typename U, typename ActivationFunc, typename PaddingFunc, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto conv1d_layer(const Tensor<FLOATTYPE(U), 3> &filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, U &&u) -> ConvolutionLayer1D<FLOATTYPE(U), INPUTTYPE(U), DDST(u), ActivationFunc, PaddingFunc>
template<typename _FloatType, typename _InputType, typename Store, typename ActivationFunc, typename PaddingFunc>
class ConvolutionLayer1D

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType
inline ConvolutionLayer1D(Store &&leaf, const Tensor<FloatType, 3> &_filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, int stride = 1)
ConvolutionLayer1D(const ConvolutionLayer1D &r) = delete
ConvolutionLayer1D(ConvolutionLayer1D &&r) = default
Tensor<FloatType, 3> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

Store leaf
int _input_tens_size[LayerInputTensorType::dimension()]
Tensor<FloatType, 3> filter
ActivationFunc activation_func
PaddingFunc padding_func
int depth
int channels
int kernel_size
int stride
bool init
int padded_data_len
int batch_size
mutable FLOPScounter value_FLOPS
mutable FLOPScounter deriv_FLOPS
mutable RingBuffer<Tensor<FloatType, 3>> leaf_buf
mutable RingBuffer<Tensor<FloatType, 3>> activation_deriv_buf

EmbedPositionsSinusoidalLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto embed_positions_sinusoidal_layer(U &&u)
template<typename _FloatType, typename _InputType, typename Store>
class EmbedPositionsSinusoidalLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline EmbedPositionsSinusoidalLayer(Store &&leaf)
EmbedPositionsSinusoidalLayer(const EmbedPositionsSinusoidalLayer &r) = delete
EmbedPositionsSinusoidalLayer(EmbedPositionsSinusoidalLayer &&r) = default
inline Tensor<FloatType, 3> value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

Store leaf
mutable FLOPScounter value_FLOPS

FlattenLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto flatten_layer(U &&u) -> FlattenLayer<FLOATTYPE(U), INPUTTYPE(U), DDST(u)>
template<typename _FloatType, typename _InputType, typename Store>
class FlattenLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType
inline FlattenLayer(Store &&leaf)
FlattenLayer(const FlattenLayer &r) = delete
FlattenLayer(FlattenLayer &&r) = default
Matrix<FloatType> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Matrix<FloatType> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

Store leaf
int _input_tens_size[LayerInputTensorType::dimension()]
bool init

InputLayer

Functions

template<typename FloatType, typename InputType = Matrix<FloatType>>
inline InputLayer<FloatType, InputType> input_layer()
template<typename _FloatType, typename _InputType = Matrix<_FloatType>>
class InputLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline InputLayer()
inline InputLayer(InputLayer &&r) = default
inline InputLayer(const InputLayer &r) = delete
inline const InputType &value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, InputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

LayerCommon

Defines

ISLEAF(a)
FLOATTYPE(a)
INPUTTYPE(a)
LAYEROUTPUTTYPE(a)
LAYERTYPEOUTPUTTYPE(a)
struct LeafTag

MatrixTensorContractLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto matrix_tensor_contract_layer(const Matrix<FLOATTYPE(U)> &weights, U &&u) -> LAYER_TYPE
template<typename _FloatType, int TensDim, typename _InputType, typename Store>
class MatrixTensorContractLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline MatrixTensorContractLayer(Store &&leaf, const Matrix<FloatType> &weights)
MatrixTensorContractLayer(const MatrixTensorContractLayer &r) = delete
MatrixTensorContractLayer(MatrixTensorContractLayer &&r) = default
Tensor<FloatType, TensDim> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

MultiHeadCrossAttentionLayer

Defines

LAYER_TYPE
TEMPL

Functions

TEMPL auto multihead_cross_attention_layer(int Nheads, Matrix<FLOATTYPE(ChainKV)> const *const *W_Q, Matrix<FLOATTYPE(ChainKV)> const *const *W_K, Matrix<FLOATTYPE(ChainKV)> const *const *W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, Matrix<FLOATTYPE(ChainKV)> const *const *W_Q, Matrix<FLOATTYPE(ChainKV)> const *const *W_K, Matrix<FLOATTYPE(ChainKV)> const *const *W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_Q, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_K, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_Q, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_K, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, int E, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE
TEMPL auto multihead_cross_attention_layer(int Nheads, int E, ChainKV &&chain_KV, ChainQ &&chain_Q)
template<typename _FloatType, typename _InputType, typename StoreKV, typename StoreQ>
class MultiHeadCrossAttentionLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef Tensor<FloatType, 3> TensorType
typedef LeafTag tag

Public Functions

inline MultiHeadCrossAttentionLayer(StoreKV &&leaf_KV, StoreQ &&leaf_Q, int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
inline MultiHeadCrossAttentionLayer(StoreKV &&leaf_KV, StoreQ &&leaf_Q, int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadCrossAttentionLayer(const MultiHeadCrossAttentionLayer &r) = delete
MultiHeadCrossAttentionLayer(MultiHeadCrossAttentionLayer &&r) = default
inline TensorType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, TensorType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

StoreKV leaf_KV
StoreQ leaf_Q
MultiHeadAttentionComponent<FloatType> attention

MultiHeadSelfAttentionLayer

Defines

LAYER_TYPE

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, Matrix<FLOATTYPE(U)> const *const *W_Q, Matrix<FLOATTYPE(U)> const *const *W_K, Matrix<FLOATTYPE(U)> const *const *W_V, const Matrix<FLOATTYPE(U)> &W_O, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, Matrix<FLOATTYPE(U)> const *const *W_Q, Matrix<FLOATTYPE(U)> const *const *W_K, Matrix<FLOATTYPE(U)> const *const *W_V, const Matrix<FLOATTYPE(U)> &W_O, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(U)>> &W_Q, const std::vector<Matrix<FLOATTYPE(U)>> &W_K, const std::vector<Matrix<FLOATTYPE(U)>> &W_V, const Matrix<FLOATTYPE(U)> &W_O, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(U)>> &W_Q, const std::vector<Matrix<FLOATTYPE(U)>> &W_K, const std::vector<Matrix<FLOATTYPE(U)>> &W_V, const Matrix<FLOATTYPE(U)> &W_O, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, int E, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto multihead_self_attention_layer(int Nheads, int E, U &&u) -> LAYER_TYPE
template<typename _FloatType, typename _InputType, typename Store>
class MultiHeadSelfAttentionLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

MultiHeadSelfAttentionLayer(Store &&leaf, int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadSelfAttentionLayer(Store &&leaf, int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)
MultiHeadSelfAttentionLayer(const MultiHeadSelfAttentionLayer &r) = delete
MultiHeadSelfAttentionLayer(MultiHeadSelfAttentionLayer &&r) = default
Tensor<FloatType, 3> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, 3> LayerInputType

NormLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto norm_layer (int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector< FLOATTYPE(U)> &affine_init, const Vector< FLOATTYPE(U)> &bias_init, FLOATTYPE(U) epsilon, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto norm_layer(int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector<FLOATTYPE(U)> &affine_init, const Vector<FLOATTYPE(U)> &bias_init, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto norm_layer (int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, FLOATTYPE(U) epsilon, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto norm_layer(int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, U &&u) -> LAYER_TYPE
template<typename _FloatType, int TensDim, typename _InputType, typename Store>
class NormLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline NormLayer(Store &&leaf, int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector<FloatType> &affine_init, const Vector<FloatType> &bias_init, FloatType epsilon)
NormLayer(const NormLayer &r) = delete
NormLayer(NormLayer &&r) = default
inline Tensor<FloatType, TensDim> value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, TensDim> LayerInputType

PairJoinLayer

Functions

template<typename U, typename V, typename std::enable_if<ISLEAF(U) && ISLEAF(V) && std::is_same<INPUTTYPE(U), INPUTTYPE(V)>::value, int>::type = 0>
auto pair_join_layer(U &&u, V &&v)
template<typename _FloatType, typename _InputType, typename Store1, typename Store2>
class PairJoinLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef std::pair<LayerInputType1, LayerInputType2> LayerOutputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType1) LayerInputType1
typedef LAYERTYPEOUTPUTTYPE (StoredType2) LayerInputType2
inline PairJoinLayer(Store1 &&leaf1, Store2 &&leaf2)
PairJoinLayer(const PairJoinLayer &r) = delete
PairJoinLayer(PairJoinLayer &&r) = default
inline LayerOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Store1::type StoredType1
typedef Store2::type StoredType2

Private Members

Store1 leaf1
Store2 leaf2

PairSplitLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto pair_split_layer(U &&u)
template<typename _FloatType, typename _InputType, typename Store>
class PairSplitLayerLeader

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef Store::type StoredType
typedef LayerInputType::first_type LayerOutputType1
typedef LayerInputType::second_type LayerOutputType2

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType
inline PairSplitLayerLeader(Store &&leaf)
inline void cinc(int &i)
inline LayerOutputType1 first(const InputType &x)
inline LayerOutputType2 second(const InputType &x)
inline int deriv_complete(Vector<FloatType> &cost_deriv, int off, InputType *input_above_deriv_return)
inline int deriv_first(Vector<FloatType> &cost_deriv, int off, LayerOutputType1 &&_above_deriv, InputType *input_above_deriv_return)
inline int deriv_second(Vector<FloatType> &cost_deriv, int off, LayerOutputType2 &&_above_deriv, InputType *input_above_deriv_return)
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int getParams(Vector<FloatType> &into, int off)

Public Members

LayerInputType in_buf
LayerOutputType1 above_deriv1
LayerOutputType2 above_deriv2
Store leaf
int val_count
int deriv_count
int update_count
int step_count
int getparams_count
template<typename _FloatType, typename _InputType, typename Store>
class PairSplitLayer1

Public Types

typedef LayerInputType::first_type LayerOutputType
typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType
inline PairSplitLayer1(PairSplitLayerLeader<FloatType, InputType, Store> *leader)
PairSplitLayer1(const PairSplitLayer1 &r) = delete
inline PairSplitLayer1(PairSplitLayer1 &&r)
inline ~PairSplitLayer1()
inline LayerOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Store::type StoredType

Private Members

PairSplitLayerLeader<FloatType, InputType, Store> *leader
template<typename _FloatType, typename _InputType, typename Store>
class PairSplitLayer2

Public Types

typedef LayerInputType::second_type LayerOutputType
typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType
inline PairSplitLayer2(PairSplitLayerLeader<FloatType, InputType, Store> *leader)
PairSplitLayer2(const PairSplitLayer2 &r) = delete
PairSplitLayer2(PairSplitLayer2 &&r) = default
inline LayerOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Store::type StoredType

Private Members

PairSplitLayerLeader<FloatType, InputType, Store> *leader

ReplicateLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto replicate_layer(int N, U &&u)
template<typename _FloatType, typename _InputType, typename Store>
class ReplicateLayerLeader

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef Store::type StoredType

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputOutputType
inline ReplicateLayerLeader(Store &&leaf, int N)
inline void cinc(int &i)
inline LayerInputOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return)
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int getParams(Vector<FloatType> &into, int off)

Public Members

LayerInputOutputType in_buf
std::vector<LayerInputOutputType> above_deriv
Store leaf
int N
int val_count
int deriv_count
int update_count
int step_count
int getparams_count
template<typename _FloatType, typename _InputType, typename Store>
class ReplicateLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputOutputType
inline ReplicateLayer(ReplicateLayerLeader<FloatType, InputType, Store> *leader, int instance, int N)
ReplicateLayer(const ReplicateLayer &r) = delete
inline ReplicateLayer(ReplicateLayer &&r)
inline ~ReplicateLayer()
inline LayerInputOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Store::type StoredType

Private Members

int instance
int N
ReplicateLayerLeader<FloatType, InputType, Store> *leader

ScaledDotProductSelfAttentionLayer

Defines

LAYER_TYPE

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto scaled_dotproduct_self_attention_layer(const Matrix<FLOATTYPE(U)> &W_Q, const Matrix<FLOATTYPE(U)> &W_K, const Matrix<FLOATTYPE(U)> &W_V, bool use_mask, U &&u) -> LAYER_TYPE
template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto scaled_dotproduct_self_attention_layer(const Matrix<FLOATTYPE(U)> &W_Q, const Matrix<FLOATTYPE(U)> &W_K, const Matrix<FLOATTYPE(U)> &W_V, U &&u) -> LAYER_TYPE
template<typename _FloatType, typename _InputType, typename Store>
class ScaledDotProductSelfAttentionLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline ScaledDotProductSelfAttentionLayer(Store &&leaf, const Matrix<FloatType> &W_Q, const Matrix<FloatType> &W_K, const Matrix<FloatType> &W_V, bool use_mask = false)
ScaledDotProductSelfAttentionLayer(const ScaledDotProductSelfAttentionLayer &r) = delete
ScaledDotProductSelfAttentionLayer(ScaledDotProductSelfAttentionLayer &&r) = default
Tensor<FloatType, 3> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
int getParams(Vector<FloatType> &into, int off) const
inline size_t FLOPS(int value_or_deriv) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, 3> LayerInputType

Private Members

int C
int E
int B
int d_k
int d_v
bool setup
ScaledDotProductAttentionHeadComponent<FloatType> attentionQKV
Store leaf

SkipConnection

Defines

LAYER_TYPE

Functions

template<typename Internal, typename Below, typename std::enable_if<ISLEAF(Internal) && ISLEAF(Below), int>::type = 0>
auto skip_connection(Internal &&internal, Below &&below) -> LAYER_TYPE
template<typename _FloatType, typename _InputType, typename ChainInternal, typename ChainBelow>
class SkipConnection

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef ChainBelow::type ChainBelowInternalType
typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (ChainBelowInternalType) LayerInputOutputType
inline SkipConnection(ChainInternal &&leaf_internal, ChainBelow &&leaf_below)
SkipConnection(const SkipConnection &r) = delete
SkipConnection(SkipConnection &&r) = default
LayerInputOutputType value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

ChainBelow leaf_below
ChainInternal leaf_internal

SoftMaxLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto softmax_layer (int softmax_dim, FLOATTYPE(U) beta, U &&u) -> LAYER_TYPE
template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto softmax_layer(int softmax_dim, U &&u) -> LAYER_TYPE
template<typename _FloatType, int TensDim, typename _InputType, typename Store>
class SoftMaxLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

inline SoftMaxLayer(Store &&leaf, int softmax_dim, FloatType beta = 1.0)
inline SoftMaxLayer(SoftMaxLayer &&r) = default
inline SoftMaxLayer(const SoftMaxLayer &r) = delete
Tensor<FloatType, TensDim> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)
inline void setBeta(FloatType beta)

Private Members

Store leaf
SoftMaxComponent<FloatType, TensDim> cpt

SumJoinLayer

Functions

template<typename U, typename V, typename std::enable_if<ISLEAF(U) && ISLEAF(V) && std::is_same<INPUTTYPE(U), INPUTTYPE(V)>::value, int>::type = 0>
auto sum_join_layer(U &&u, V &&v)
template<typename _FloatType, typename _InputType, typename Store1, typename Store2>
class SumJoinLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LayerInputType1 LayerInputOutputType
typedef LeafTag tag

Public Functions

inline SumJoinLayer(Store1 &&leaf1, Store2 &&leaf2)
SumJoinLayer(const SumJoinLayer &r) = delete
SumJoinLayer(SumJoinLayer &&r) = default
inline LayerInputOutputType value(const InputType &x)
inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
inline int update(int off, const Vector<FloatType> &new_params)
inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
inline int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Types

typedef Store1::type StoredType1
typedef Store2::type StoredType2

Private Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType1) LayerInputType1
typedef LAYERTYPEOUTPUTTYPE (StoredType2) LayerInputType2

Private Members

Store1 leaf1
Store2 leaf2

TransformerEncoderDecoderBlock

Functions

template<typename Below, typename ActivationFunc>
auto transformer_decoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, Below &&below)
template<typename Below, typename ActivationFunc>
auto transformer_encoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, Below &&below)
template<typename EncoderInput, typename DecoderInput, typename ActivationFunc>
auto transformer_cross_decoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, EncoderInput &&encoder_in, DecoderInput &&decoder_in)

UnflattenLayer

Functions

template<int OutDimension, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0>
auto unflatten_layer(int const *output_tens_dim, U &&u) -> UnflattenLayer<FLOATTYPE(U), OutDimension, INPUTTYPE(U), DDST(u)>
template<typename _FloatType, int OutDimension, typename _InputType, typename Store>
class UnflattenLayer

Public Types

typedef _FloatType FloatType
typedef _InputType InputType
typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType
inline UnflattenLayer(Store &&leaf, int const *output_tens_size)
UnflattenLayer(const UnflattenLayer &r) = delete
UnflattenLayer(UnflattenLayer &&r) = default
Tensor<FloatType, OutDimension> value(const InputType &x)
int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, OutDimension> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const
int update(int off, const Vector<FloatType> &new_params)
int step(int off, const Vector<FloatType> &derivs, FloatType eps)
inline int nparams() const
inline size_t FLOPS(int value_or_deriv) const
int getParams(Vector<FloatType> &into, int off) const
inline void resizeInputBuffer(size_t to)

Private Members

Store leaf
int _output_tens_size[OutDimension]