Accelerator

Defines

strong_inline

DO_PRAGMA_(x)

DO_PRAGMA(x)

thread_num(a)

thread_max(a)

set_threads(a)

in_thread_parallel_region(a)

thread_for(i, num, ...)

thread_for3d(i1, n1, i2, n2, i3, n3, ...)

thread_for2d(i1, n1, i2, n2, ...)

accelerator

accelerator_inline

accelerator_barrier(dummy)

accelerator_for3dNB(iter1, num1, iter2, num2, iter3, num3, block2, ...)

accelerator_for3d(iter1, num1, iter2, num2, iter3, num3, block2, ...)

accelerator_for2dNB(iter1, num1, iter2, num2, block2, ...)

accelerator_for2d(iter1, num1, iter2, num2, block2, ...)

accelerator_forNB(iter1, num1, ...)

accelerator_for(iter, num, ...)

accelerator_for3d_shm(iter1, num1, iter2, num2, iter3, num3, block2, shm_size, ...)

accelerator_for2dNB_shm(iter1, num1, iter2, num2, block2, shm_size, ...)

accelerator_for2d_shm(iter1, num1, iter2, num2, block2, shm_size, ...)

accelerator_forNB_shm(iter1, num1, shm_size, ...)

accelerator_for_shm(iter, num, shm_size, ...)

autoView(ViewName, ObjName, mode)

doHost(a, ...)

doHost2(a, b, ...)

doHost3(a, b, c, ...)

Functions

void acceleratorInit(void)

void acceleratorReport()

inline void acceleratorCopyToDevice(void *to, void const *from, size_t bytes)

inline void acceleratorCopyFromDevice(void *to, void const *from, size_t bytes)

inline void acceleratorCopyDeviceToDevice(void *to, void const *from, size_t bytes)

inline void acceleratorCopyDeviceToDeviceAsynch(void *to, void const *from, size_t bytes)

inline void acceleratorCopySynchronize(void)

inline void acceleratorMemSet(void *base, int value, size_t bytes)

inline void *acceleratorAllocHost(size_t bytes)

inline void *acceleratorAllocShared(size_t bytes)

inline void *acceleratorAllocDevice(size_t bytes)

inline void acceleratorFreeHost(void *ptr)

inline void acceleratorFreeShared(void *ptr)

inline void acceleratorFreeDevice(void *ptr)

inline void profileStart()

inline void profileStop()

inline void labelRegionBegin(char const *label)

inline void labelRegionEnd()

template<typename FloatType> inline void atomicAdd(FloatType *p, const FloatType v)

template<typename ViewType> struct viewDeallocator

Public Functions

inline viewDeallocator(ViewType &v)

inline ~viewDeallocator()

Public Members

ViewType &v

Public Static Functions

static inline void free(ViewType &v)

ActivationFuncs

template<typename FloatType> class ReLU

Public Functions

void operator()(Matrix<FloatType> &x, Matrix<FloatType> *deriv = nullptr) const

template<int Dim> void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const

template<typename FloatType> class noActivation

Public Functions

inline void operator()(Matrix<FloatType> &x, Matrix<FloatType> *deriv = nullptr) const

template<int Dim> inline void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const

template<typename FloatType> class GeLU

Public Functions

template<int Dim> void operator()(Tensor<FloatType, Dim> &x, Tensor<FloatType, Dim> *deriv = nullptr) const

Comms

Functions

Communicators &communicators()

void initializeComms(int argc, char **argv)

inline int UniqueID()

template<typename FloatType> inline MPI_Datatype getMPIdataType()

template<typename FloatType> inline void commsReduce(FloatType *data, size_t data_len, const MPI_Comm &comm)

template<typename FloatType> inline void commsReduce(Vector<FloatType> &v, const MPI_Comm &comm)

template<typename FloatType> inline void commsBroadcast(FloatType *data, size_t data_len, int from_rank, const MPI_Comm &comm)

template<typename FloatType> inline void commsBroadcast(Vector<FloatType> &v, int from_rank, const MPI_Comm &comm)

template<typename FloatType> inline void commsBroadcast(Matrix<FloatType> &v, int from_rank, const MPI_Comm &comm)

class Communicators

Public Functions

Communicators(int argc, char **argv)

~Communicators()

inline int worldRank() const

inline int worldNrank() const

inline int nodeRank() const

inline int nodeNrank() const

inline int ddpRank() const

inline int ddpNrank() const

inline int pipelineRank() const

inline int pipelineNrank() const

inline bool isPipelineLeader() const

inline MPI_Comm &pipelineCommunicator()

inline MPI_Comm &ddpCommunicator()

void enableNodePipelining()

void enableColorPipelining(int rank_color)

void enableGlobalPipelining()

void disableParallelism()

void enableDDPnoPipelining()

void reportSetup()

Private Functions

void setupDDPcommunicator()

void freeCommunicators()

void enableDDPnoPipeliningInternal()

Private Members

MPI_Comm pipeline_comm

MPI_Comm ddp_comm

int world_rank

int world_nrank

int node_rank

int node_nrank

int pipeline_rank

int pipeline_nrank

bool is_pipeline_leader

int ddp_rank

int ddp_nrank

Private Static Functions

static void createCommJustThisRank(int world_rank, MPI_Comm &comm)

Components

DDP

Functions

template<typename FloatType> void ddpAverage(FloatType *data, size_t len, bool pipeline_bcast = false)

template<typename FloatType> void ddpAverage(Vector<FloatType> &v, bool pipeline_bcast = false)

DynamicModel

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> LayerWrapper<FLOATTYPE(U), INPUTTYPE(U), LAYEROUTPUTTYPE(U)> enwrap(U &&u)

template<typename FloatType, typename InputType, typename LayerOutputType> class LayerWrapperInternalBase

Public Functions

virtual LayerOutputType value(const InputType &x) = 0

virtual int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const = 0

virtual int nparams() const = 0

virtual size_t FLOPS(int value_or_deriv) const = 0

virtual void resizeInputBuffer(size_t to) = 0

virtual int getParams(Vector<FloatType> &into, int off) const = 0

virtual int step(int off, const Vector<FloatType> &derivs, FloatType eps) = 0

inline virtual ~LayerWrapperInternalBase()

template<typename Store, typename std::enable_if<ISSTORAGE(Store), int>::type = 0> class LayerWrapperInternal : public LayerWrapperInternalBase<Store::type::FloatType, Store::type::InputType, LAYEROUTPUTTYPE(Store::type)>

Public Types

typedef Store::type::FloatType FloatType

typedef Store::type::InputType InputType

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerOutputType

inline LayerWrapperInternal(Store &&layer)

inline virtual LayerOutputType value(const InputType &x) override

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const override

inline virtual int nparams() const override

inline virtual size_t FLOPS(int value_or_deriv) const

inline virtual int getParams(Vector<FloatType> &into, int off) const override

inline virtual int step(int off, const Vector<FloatType> &derivs, FloatType eps) override

inline virtual void resizeInputBuffer(size_t to) override

Public Members

Store layer

template<typename _FloatType, typename _InputType, typename _LayerOutputType> class LayerWrapper

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef _LayerOutputType LayerOutputType

typedef LeafTag tag

Public Functions

LayerWrapper(LayerWrapper &&r) = default

LayerWrapper &operator=(LayerWrapper &&r) = default

template<typename Store, typename std::enable_if<ISSTORAGE(Store), int>::type = 0> inline LayerWrapper(Store &&layer)

inline LayerOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline void resizeInputBuffer(size_t to)

Private Members

std::unique_ptr<LayerWrapperInternalBase<FloatType, InputType, LayerOutputType>> layer

Embeddings

Functions

template<typename FloatType> Tensor<FloatType, 3> embedPositionsSinusoidal(const Tensor<FloatType, 3> &in, FLOPScounter *flops = nullptr)

template<typename FloatType> Tensor<FloatType, 2> embedPositionsSinusoidal(const Tensor<FloatType, 2> &in, FLOPScounter *flops = nullptr)

HPCortex

Init

Functions

void initialize(int argc, char **argv)

InstanceStorage

Defines

DDST(a)

ISSTORAGE(a)

struct StorageTag

template<typename T> struct LeafStore

Public Types

typedef StorageTag tag

typedef T type

Public Functions

inline LeafStore(T &&v)

LeafStore(const LeafStore &r) = delete

inline LeafStore(LeafStore &&r)

Public Members

T v

template<typename T> struct LeafRef

Public Types

typedef StorageTag tag

typedef T type

Public Functions

inline LeafRef(T &v)

LeafRef(const LeafRef &r) = delete

inline LeafRef(LeafRef &&r)

Public Members

T &v

template<typename T> struct deduceStorage

template<typename T> struct deduceStorage<T&>

Public Types

typedef LeafRef<T> type

template<typename T> struct deduceStorage<T&&>

Public Types

typedef LeafStore<T> type

Layers

Linalg

Functions

template<typename FloatType> void thinMulMatMatTranspose_p(FloatType *out_p, const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)

template<typename FloatType> Matrix<FloatType> thinMulMatMatTranspose(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)

template<typename FloatType> Matrix<FloatType> mulMatTransposeThinMat(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FLOPScounter *flops = nullptr)

template<typename FloatType> Matrix<FloatType> computeThinMatOuterProd(const Matrix<FloatType> &above_deriv, const Matrix<FloatType> &activation_deriv, FLOPScounter *flops = nullptr)

template<typename FloatType> Matrix<FloatType> axpyMatThinMat(const Matrix<FloatType> &a, const Matrix<FloatType> &b, const Vector<FloatType> &c, FLOPScounter *flops = nullptr)

template<typename FloatType> Tensor<FloatType, 3> batch3tensorContract(const Tensor<FloatType, 3> &A, const Tensor<FloatType, 3> &B, int contract_dimA, int contract_dimB, FloatType nrm = 1.0, FLOPScounter *flops = nullptr)

template<typename FloatType, int Dim> Tensor<FloatType, Dim> matrixBatchTensorAxpy(const Matrix<FloatType> &A, const Tensor<FloatType, Dim> &X, const Vector<FloatType> &Y, const int contract_dim, FLOPScounter *flops = nullptr)

template<typename FloatType, int Dim> void batchTensorContractToMatrix_p(FloatType *out_p, const Tensor<FloatType, Dim> &A, const Tensor<FloatType, Dim> &B, const int preserve_dim, FLOPScounter *flops = nullptr)

template<typename FloatType, int Dim> Tensor<FloatType, Dim> matrixBatchTensorContractRight(const Tensor<FloatType, Dim> &X, const Matrix<FloatType> &A, const int contract_dim, FLOPScounter *flops = nullptr)

template<typename FloatType, int Dim> Tensor<FloatType, Dim> matrixBatchTensorContractLeft(const Matrix<FloatType> &A, const Tensor<FloatType, Dim> &X, const int contract_dim, FLOPScounter *flops = nullptr)

LossFunctions

Defines

CWRP

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto mse_cost(U &&u) -> CWRP

template<typename CostFunc, typename U, typename std::enable_if<ISLEAF(U) && std::is_default_constructible<CostFunc>::value, int>::type = 0> auto cost_func_wrap(U &&u)

template<typename CostFunc, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto cost_func_wrap(U &&u, const CostFunc &cf)

template<typename Store, typename CostFunc> class CostFuncWrapper

Public Types

typedef Store::type::FloatType FloatType

typedef Store::type::InputType InputType

typedef CostFunc::PredictionType PredictionType

typedef CostFunc::ComparisonType ComparisonType

Public Functions

inline CostFuncWrapper(Store &&leaf, const CostFunc &cost = CostFunc())

inline FloatType loss(const InputType &x, const ComparisonType &y)

inline Vector<FloatType> deriv() const

inline PredictionType predict(const InputType &x)

template<typename _PredictionType = PredictionType, typename _InputType = InputType, int TensDimIn = _InputType::Dimension, int TensDimOut = _PredictionType::Dimension, typename std::enable_if<std::is_same<_PredictionType, Tensor<FloatType, TensDimOut>>::value && std::is_same<_InputType, Tensor<FloatType, TensDimIn>>::value && std::is_same<_PredictionType, ComparisonType>::value, int>::type = 0> inline Tensor<FloatType, TensDimOut - 1> predict(const Tensor<FloatType, TensDimIn - 1> &x, int batch_size)

inline void update(const Vector<FloatType> &new_params)

inline void step(const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline Vector<FloatType> getParams() const

Private Members

Store leaf

PredictionType ypred

ComparisonType yval

CostFunc cost

int nparam

template<typename OutputType> class MSEcostFunc

template<typename FloatType, int Dim> class MSEcostFunc<Tensor<FloatType, Dim>>

Public Types

typedef Tensor<FloatType, Dim> DataType

typedef DataType ComparisonType

typedef DataType PredictionType

Public Static Functions

static FloatType loss(const ComparisonType &y, const PredictionType &ypred)

static PredictionType layer_deriv(const ComparisonType &y, const PredictionType &ypred)

ManagedArray

template<typename FloatType> class ManagedArray

Public Functions

inline ManagedArray()

inline ManagedArray(size_t size, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

inline ManagedArray(size_t size, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

inline ManagedArray(const std::vector<FloatType> &init)

inline ManagedArray(ManagedArray &&r)

inline ManagedArray(const ManagedArray &r)

inline ManagedArray &operator=(ManagedArray &&r)

inline ManagedArray &operator=(const ManagedArray &r)

inline size_t size() const

inline View view(ViewMode mode) const

inline ~ManagedArray()

inline void fill(FloatType init, MemoryManager::Pool assign_pool = MemoryManager::Pool::DevicePool)

inline void lock() const

inline void unlock() const

Private Members

MemoryManager::HandleIterator handle

size_t _size

class View

Subclassed by Tensor< _FloatType, Dim >::View

Public Functions

inline accelerator_inline size_t size () const

inline accelerator_inline FloatType * data ()

inline accelerator_inline FloatType const * data () const

inline accelerator_inline FloatType & operator[] (const size_t i)

inline accelerator_inline FloatType operator[] (const size_t i) const

inline View(ViewMode mode, MemoryManager::HandleIterator handle, size_t _size)

inline View(ViewMode mode, const ManagedArray &parent)

inline void free()

Private Members

FloatType *v

size_t _size

MemoryManager::HandleIterator handle

MemoryManager

Enums

enum ViewMode

Values:

enumerator HostRead

enumerator HostWrite

enumerator DeviceRead

enumerator DeviceWrite

enumerator HostReadWrite

enumerator DeviceReadWrite

Functions

inline std::string memPoolManagerReport(bool detailed = false)

class MemoryManager

Public Types

enum Pool

Values:

enumerator DevicePool

enumerator HostPool

typedef std::list<Entry>::iterator EntryIterator

typedef std::list<Handle>::iterator HandleIterator

Public Functions

inline MemoryManager()

inline MemoryManager(size_t max_size_device, size_t max_size_host)

~MemoryManager()

inline void setVerbose(bool to)

void enableIOlogging()

inline void setDiskRoot(const std::string &to)

inline const std::string &getDiskRoot() const

inline void enableDeletionOfLocalDiskDataOnRestore(bool val = true)

inline void setPoolMaxSize(size_t to, Pool pool)

inline size_t getAllocatedBytes(Pool pool) const

size_t getDiskCachedBytes() const

size_t getDiskUsedBytes() const

std::string report(bool detailed = false) const

void evictToDisk(HandleIterator h)

HandleIterator allocate(size_t bytes, Pool pool = DevicePool)

void *openView(ViewMode mode, HandleIterator h)

void closeView(HandleIterator h)

void free(HandleIterator h)

inline size_t nOpenHandles() const

inline void lock(HandleIterator h)

inline void unlock(HandleIterator h)

Public Static Functions

static inline MemoryManager &globalPool()

Protected Functions

inline std::list<Entry> &getLRUpool(Pool pool)

inline std::map<size_t, std::list<Entry>, std::greater<size_t>> &getFreePool(Pool pool)

inline std::string poolName(Pool pool)

EntryIterator allocEntry(size_t bytes, Pool pool)

void sanityCheck()

void moveEntryToFreePool(EntryIterator it, Pool pool)

void freeEntry(EntryIterator it, Pool pool)

void deallocateFreePool(Pool pool, size_t until_allocated_lte = 0)

EntryIterator getEntry(size_t bytes, Pool pool)

void attachEntry(Handle &handle, Pool pool)

void touchEntry(Handle &handle, Pool pool)

void syncDeviceToHost(Handle &handle)

void syncHostToDevice(Handle &handle)

void syncHostToDisk(Handle &handle)

void syncDiskToHost(Handle &handle)

void syncForRead(Handle &handle, Pool pool)

void markForWrite(Handle &handle, Pool pool)

void prepareEntryForView(Handle &handle, Pool pool)

EntryIterator evictEntry(EntryIterator entry, bool free_it, Pool pool)

void removeDiskData(Handle &handle, bool in_memory_check = true)

Protected Attributes

bool verbose

std::ofstream *io_logger

std::list<Handle> handles

std::list<Entry> device_in_use_pool

std::map<size_t, std::list<Entry>, std::greater<size_t>> device_free_pool

std::list<HandleIterator> device_queued_prefetches

std::list<Entry> host_in_use_pool

std::map<size_t, std::list<Entry>, std::greater<size_t>> host_free_pool

std::list<HandleIterator> host_queued_prefetches

size_t device_allocated

size_t host_allocated

size_t device_pool_max_size

size_t host_pool_max_size

size_t local_disk_allocated

size_t device_allocated_HWM

size_t host_allocated_HWM

size_t local_disk_allocated_HWM

std::string disk_root

bool delete_local_diskdata_on_restore

Protected Static Functions

static void summarizePoolStatus(std::ostream &os, const std::string &descr, const std::map<size_t, std::list<Entry>, std::greater<size_t>> &pool_stat)

static void summarizePoolStatus(std::ostream &os, const std::string &descr, const std::map<size_t, int, std::greater<size_t>> &pool_stat)

struct Entry

Public Members

size_t bytes

void *ptr

Handle *owned_by

struct Handle

Public Functions

inline Handle()

Public Members

size_t lock_entry

bool device_valid

EntryIterator device_entry

bool host_valid

EntryIterator host_entry

size_t bytes

bool device_in_sync

bool host_in_sync

bool disk_in_sync

std::string disk_file

bool disk_file_exists

bool device_prefetch_underway

bool initialized

Optimizers

Functions

template<typename DataLoader, typename ModelType, typename Optimizer> std::vector<typename ModelType::FloatType> train(ModelType &model, const DataLoader &data, Optimizer &optimizer, int nepoch, int batch_size, bool suppress_logging = false)

template<typename FloatType, int DimX, int DimY> inline XYpair<FloatType, DimX + 1, DimY + 1> batchData(int const *indices, int batch_size, const std::vector<XYpair<FloatType, DimX, DimY>> &data)

template<typename FloatType, int DimX, int DimY, typename ModelType, typename Optimizer> std::vector<FloatType> train(ModelType &model, const std::vector<XYpair<FloatType, DimX, DimY>> &data, Optimizer &optimizer, int nepoch, int batch_size, bool suppress_logging = false)

template<typename FloatType> struct noScheduler

Public Functions

inline noScheduler(FloatType lr)

inline FloatType operator()(const int epoch) const

Public Members

FloatType lr

template<typename FloatType, typename LRscheduler = noScheduler<FloatType>> class GradientDescentOptimizer

Public Functions

inline GradientDescentOptimizer(const LRscheduler &sched)

template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0> inline GradientDescentOptimizer(FloatType lr)

inline void epochStart(int epoch, bool verbose = true)

inline Vector<FloatType> descentProfile(FloatType &step_size, const Vector<FloatType> &deriv) const

Private Members

LRscheduler sched

FloatType eps

template<typename FloatType> struct AdamParams

Public Functions

inline AdamParams(FloatType beta1 = 0.99, FloatType beta2 = 0.999, FloatType eps = 1e-8)

Public Members

FloatType beta1

FloatType beta2

FloatType eps

template<typename FloatType, typename LRscheduler = noScheduler<FloatType>> class AdamOptimizer

Public Functions

inline AdamOptimizer(const AdamParams<FloatType> &ap, const LRscheduler &sched)

inline AdamOptimizer(const LRscheduler &sched)

template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0> inline AdamOptimizer(const AdamParams<FloatType> &ap, FloatType lr)

template<typename L = LRscheduler, typename std::enable_if<std::is_same<L, noScheduler<FloatType>>::value, int>::type = 0> inline AdamOptimizer(FloatType lr)

inline void epochStart(int epoch, bool verbose = true)

inline Vector<FloatType> descentProfile(FloatType &step_size, const Vector<FloatType> &g)

Private Functions

inline void reset()

Private Members

LRscheduler sched

AdamParams<FloatType> ap

FloatType alpha

size_t t

Vector<FloatType> m

Vector<FloatType> v

template<typename FloatType> class DecayScheduler

Public Functions

inline DecayScheduler(FloatType eps, FloatType decay_rate)

inline FloatType operator()(const int epoch) const

Private Members

FloatType eps

FloatType decay_rate

template<typename FloatType, int DimX, int DimY> struct XYpair

Public Members

Tensor<FloatType, DimX> x

Tensor<FloatType, DimY> y

template<typename FloatType, int DimX, int DimY> class XYpairDataLoader

Public Functions

inline XYpairDataLoader(const std::vector<XYpair<FloatType, DimX, DimY>> &data)

inline size_t size() const

inline XYpair<FloatType, DimX + 1, DimY + 1> batch(int const *indices, int batch_size) const

Private Members

const std::vector<XYpair<FloatType, DimX, DimY>> &data

Padding

template<typename FloatType> class NoPadding

Public Functions

template<int Dim> inline Tensor<FloatType, Dim> padInput(const Tensor<FloatType, Dim> &in) const

template<int Dim> inline Tensor<FloatType, 3> unpadDeriv(const Tensor<FloatType, Dim> &deriv_pad) const

Public Static Functions

static inline int layerOutputLength(int input_size, int kernel_size, int stride)

template<typename FloatType> class SamePaddingZero1D

Public Functions

inline SamePaddingZero1D(int kernel_size, int stride = 1)

inline Tensor<FloatType, 3> padInput(const Tensor<FloatType, 3> &in) const

inline Tensor<FloatType, 3> unpadDeriv(const Tensor<FloatType, 3> &deriv_pad) const

Public Static Functions

static inline int layerOutputLength(int input_size, int kernel_size, int stride)

Private Members

int kernel_size

int stride

Performance

struct FLOPScounter

Public Functions

inline FLOPScounter()

inline size_t add(size_t v)

inline void lock()

inline bool locked() const

inline size_t value() const

Private Members

bool _locked

size_t _value

Pipelining

Defines

CWRP

Functions

template<typename PipelineBlockType> auto pipeline_mse_cost(PipelineBlockType &u) -> CWRP

template<typename InputType, typename OutputType, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto pipeline_block(U &&u, int const *block_output_dims, int const *block_input_dims) -> PipelineBlock<DDST(u), InputType, OutputType>

template<typename PipelineBlockType, typename CostFunc> class BatchPipelineCostFuncWrapper

Public Types

Values:

enumerator InputDimension

enumerator OutputDimension

typedef PipelineBlockType::FloatType FloatType

typedef PipelineBlockType::InputType InputType

typedef PipelineBlockType::OutputType OutputType

typedef PipelineBlockType::BlockInputType BlockInputType

Public Functions

typedef LAYEROUTPUTTYPE (PipelineBlockType) BlockOutputType

inline BatchPipelineCostFuncWrapper(PipelineBlockType &block, int call_batch_size, const CostFunc &cost = CostFunc())

inline FloatType loss(const InputType &x, const OutputType &y)

inline Vector<FloatType> deriv() const

inline void update(const Vector<FloatType> &new_params)

inline void step(const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline Vector<FloatType> getParams() const

inline Matrix<FloatType> predict(const Matrix<FloatType> &x)

inline Vector<FloatType> predict(const Vector<FloatType> &x)

Private Members

PipelineBlockType &block

CostFunc cost

int nparam

int value_lag

int deriv_lag

int call_batch_size

int rank

int nrank

Vector<FloatType> deriv_store

template<typename PipelineBlockType, typename CostFunc> class PipelineCostFuncWrapper

Public Functions

inline PipelineCostFuncWrapper(PipelineBlockType &block, const CostFunc &cost = CostFunc())

inline std::pair<FloatType, bool> loss(const OutputType &x, const OutputType &y)

inline std::pair<Vector<FloatType>, bool> deriv() const

inline void update(const Vector<FloatType> &new_params)

inline void step(const Vector<FloatType> &derivs, FloatType eps)

inline int nparams()

inline Vector<FloatType> getParams() const

inline int valueLag() const

inline int derivLag() const

Private Types

Values:

enumerator InputDimension

enumerator OutputDimension

typedef PipelineBlockType::FloatType FloatType

typedef PipelineBlockType::InputType InputType

typedef PipelineBlockType::OutputType OutputType

typedef PipelineBlockType::BlockInputType BlockInputType

Private Functions

typedef LAYEROUTPUTTYPE (PipelineBlockType) BlockOutputType

Private Members

PipelineBlockType &block

RingBuffer<OutputType> yval_buf_v

size_t calls

OutputType ypred

OutputType yval

CostFunc cost

int nparam

int value_lag

int deriv_lag

int rank

struct LockControlWrapper

Subclassed by LockControlWrapperTensor< FloatType, Dim >

Public Functions

virtual void lock() = 0

virtual void unlock() = 0

template<typename FloatType, int Dim> struct LockControlWrapperTensor : public LockControlWrapper 

Public Functions

inline LockControlWrapperTensor(Tensor<FloatType, Dim> const *v)

inline virtual void lock() override

inline virtual void unlock() override

Public Members

Tensor<FloatType, Dim> const *v

class PipelineCommunicator

Subclassed by PipelineBlock< BlockStore, InputType_, OutputType_ >

Public Functions

inline PipelineCommunicator()

inline int pipelineDepth() const

inline void waitAll(const std::vector<CommsRequest> &reqs)

template<typename T, typename U> inline void passLeft(std::vector<CommsRequest> &reqs, T const *send_bulk, T const *send_last, U *recv_first, U *recv_bulk) const

template<typename T, typename U> inline void passRight(std::vector<CommsRequest> &reqs, T const *send_first, T const *send_bulk, U *recv_bulk, U *recv_last) const

template<typename T> inline void passLeftLastToFirst(std::vector<CommsRequest> &reqs, T const *send_last, T *recv_first)

Public Static Functions

template<typename T> static inline CommsRequest send(const T &mat, int to)

template<typename T> static inline CommsRequest recv(T &mat, int from)

Protected Attributes

int rank

int next_rank

int prev_rank

int pipeline_depth

bool is_first

bool is_last

struct CommsRequest

Public Functions

template<typename FloatType, int Dim> inline CommsRequest(MPI_Request r, const Tensor<FloatType, Dim> &vv)

Public Members

std::unique_ptr<LockControlWrapper> v

MPI_Request req

template<typename BlockStore, typename InputType_, typename OutputType_> class PipelineBlock : public PipelineCommunicator 

Public Types

Values:

enumerator BlockInputDimension

enumerator BlockOutputDimension

typedef BlockStore::type::FloatType FloatType

typedef BlockStore::type::InputType BlockInputType

typedef InputType_ InputType

typedef OutputType_ OutputType

Public Functions

typedef LAYEROUTPUTTYPE (typename BlockStore::type) BlockOutputType

inline PipelineBlock(BlockStore &&_block, int const *block_output_dims_, int const *block_input_dims_)

PipelineBlock(const PipelineBlock &r) = delete

PipelineBlock(PipelineBlock &&r) = default

inline int nparams() const

inline int valueLag() const

inline int derivLag() const

inline OutputType value(const InputType &in)

inline void deriv(Vector<FloatType> &cost_deriv, OutputType &&above_deriv)

inline void update(const Vector<FloatType> &new_params)

inline void step(const Vector<FloatType> &derivs, FloatType eps)

inline void getParams(Vector<FloatType> &into) const

Private Functions

template<typename OutType, typename B, typename std::enable_if<!std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0> inline OutType get_as(B &&v)

template<typename OutType, typename B, typename std::enable_if<std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0> inline OutType get_as(B &&v)

template<typename OutType, typename B, typename std::enable_if<!std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0> inline const OutType &get_as(const B &v)

template<typename OutType, typename B, typename std::enable_if<std::is_same<typename std::decay<B>::type, OutType>::value, int>::type = 0> inline const OutType &get_as(const B &v)

Private Members

BlockStore block

int block_output_dims[BlockOutputDimension]

int block_input_dims[BlockInputDimension]

int nparam

int stage_off

BlockInputType prev_block_in

BlockOutputType prev_above_deriv

Vector<FloatType> prev_cost_deriv_passright

int dcalls

Random

Typedefs

typedef std::mt19937 GlobalRNGtype

Functions

GlobalRNGtype &globalRNG()

inline void reseedGlobalRNG(size_t seed)

template<typename FloatType, int Dim, typename Dist, typename RNG> void random(Tensor<FloatType, Dim> &m, Dist &dist, RNG &rng)

template<typename FloatType, int Dim, typename RNG> void uniformRandom(Tensor<FloatType, Dim> &m, RNG &rng, FloatType min = FloatType(-1.0), FloatType max = FloatType(1.0))

template<typename FloatType, int Dim> inline void uniformRandom(Tensor<FloatType, Dim> &m, FloatType min = FloatType(-1.0), FloatType max = FloatType(1.0))

template<typename FloatType, typename RNG> void glorotUniformRandom(Matrix<FloatType> &m, RNG &rng, FloatType gain = FloatType(1.0))

template<typename FloatType> inline void glorotUniformRandom(Matrix<FloatType> &m, FloatType gain = FloatType(1.0))

template<typename FloatType, typename RNG> size_t drawWeightedRandomIndex(FloatType const *weights, int nweights, size_t stride, RNG &rng)

template<typename FloatType> size_t drawWeightedRandomIndex(FloatType const *weights, int nweights, size_t stride)

Variables

constexpr size_t default_seed = 1234

RingBuffer

template<typename T> class RingBuffer

Public Functions

inline RingBuffer(size_t size)

inline RingBuffer()

inline void resize(size_t size)

inline void push(T &&v)

inline T pop()

inline bool isFilled() const

inline size_t size() const

inline const T &latest() const

Private Members

std::vector<T> ring

size_t off

bool filled

Serialization

Enums

enum class Endianness

Values:

enumerator Big

enumerator Little

enumerator System

Functions

std::string toString(const Endianness e)

Endianness endianness()

uint8_t BitReverseTable256(size_t i)

template<typename T> inline T bitReverse(T in)

class BinaryWriter

Public Functions

BinaryWriter(const std::string &filename, const Endianness end = Endianness::System)

template<typename T, int Dim> void write(const Tensor<T, Dim> &t)

template<typename Model, typename std::enable_if<ISLEAF(Model), int>::type = 0> void write(const Model &model)

template<typename Store, typename CostFunc> void write(const CostFuncWrapper<Store, CostFunc> &model)

inline void close()

Private Functions

template<typename T> inline void writeValue(T v)

Private Members

std::ofstream of

bool do_flip

class BinaryReader

Public Functions

BinaryReader(const std::string &filename)

template<typename T, int Dim> void read(Tensor<T, Dim> &t)

template<typename Model, typename std::enable_if<ISLEAF(Model), int>::type = 0> void read(Model &model)

template<typename Store, typename CostFunc> void read(CostFuncWrapper<Store, CostFunc> &model)

inline void close()

Private Functions

template<typename T> inline T readValue()

Private Members

std::ifstream of

bool do_flip

Tensors

Defines

_1D_TENSOR_ONLY

_2D_TENSOR_ONLY

_3D_TENSOR_ONLY

_4D_TENSOR_ONLY

Typedefs

template<typename FloatType> using Vector = Tensor<FloatType, 1>: Alias vector to 1D tensor.

template<typename FloatType> using Matrix = Tensor<FloatType, 2>: Alias matrix to 2D tensor.

Functions

template<size_t Dim> accelerator_inline size_t tensorSize (int const *dims)

Compute the linear size of a tensor of dimension “Dim” and the provided dimensions.

Parameters:: dims – The tensor dimension (array of size Dim)

template<size_t Dim> accelerator_inline size_t tensorOffset (int const *coord, int const *dims)

Compute the linear (pointer) offset of a specific coordate within a tensor of dimension “Dim” and the provided dimensions.

Parameters:

coor – The coordinate (array of size Dim)
dims – The tensor dimension (array of size Dim)

template<size_t Dim> accelerator_inline void tensorOffsetUnmap (int *coord, int const *dims, size_t offset)

Compute the coordinate associated with a specific linear (pointer) offset for a tensor of dimension “Dim” and the provided dimensions.

Parameters:

coord – [out] The tensor coordinate (array of size Dim)
dims – [in] The tensor dimension (array of size Dim)
offset – [in] The input linear offset

template<int Dim> accelerator_inline size_t tensorDimensionStride (int iter_dim, int const *size)

Compute the stride for iterating over a specific dimension for a tensor of dimension “Dim” with the provided dimensions.

Parameters:

iter_dim – The dimension that will be iterated over
size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t tensorDimensionBase (int iter_dim, int const *other_coord, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension of a tensor of dimension “Dim”.

Parameters:

iter_dim – The dimension that will be iterated over
other_coor – The coordinates for the other dimensions (array of size Dim-1)
size – The tensor dimension (array of size Dim)

template<int Dim> accelerator_inline size_t batchTensorDimensionBaseLin (int iter_dim, int batch_idx, size_t other_dim_lin, int const *size)

Compute the linear (pointer) offset for the base element for iterating over a specific dimension for a batch-tensor (last dim is the batch dimension) of dimension “Dim”.

Parameters:

iter_dim – The dimension that will be iterated over
batch_idx – The batch index (coordinate in last dimension)
other_dim_lin – The coordinates in dimensions apart from iter_dim and Dim-1 expressed as a lexicographic linear index in descending order, e.g. z + size_z * (y + size_y * x)
size – The tensor dimension (array of size Dim)

template<typename FloatType> void pokeColumn(Matrix<FloatType> &into, int col, const Vector<FloatType> &data)

Insert a vector as particular column of a matrix, i.e. into(:,col) = data(:)

Parameters:

The – target matrix
col – The column index
data – The input column

template<typename FloatType> void pokeRow(Matrix<FloatType> &into, int row, const Vector<FloatType> &data)

Insert a vector as particular row of a matrix, i.e. into(row,:) = data(:)

Parameters:

The – target matrix
row – The row index
data – The input row

template<typename FloatType> Vector<FloatType> peekColumn(const Matrix<FloatType> &m, int col)

Retrieve a specific column of a matrix m, i.e. return m(:,col)

Parameters:

m – The matrix
col – The column index

template<typename FloatType> Matrix<FloatType> peekColumns(const Matrix<FloatType> &m, int col_start, int col_end)

Retrieve multiple consecutive columns of a matrix m, i.e. return m(:,col_start:col_end+1)

Parameters:

m – The matrix
col_start – The first column index
col_end – The last column index

template<typename FloatType> void pokeColumns(Matrix<FloatType> &into, int col_start, int col_end, const Matrix<FloatType> &cols)

Insert multiple consecutive columns of a matrix m, i.e. into(:,col_start:col_end+1) = cols(:,:)

Parameters:

into – The matrix in which to insert the columns
col_start – The first column index
col_end – The last column index
cols – The matrix containing the columns (cols = col_end-col_start+1)

template<typename FloatType> std::ostream &operator<<(std::ostream &os, const Vector<FloatType> &v): Output a vector to a stream.

template<typename FloatType> std::ostream &operator<<(std::ostream &os, const Matrix<FloatType> &v): Output a matrix to a stream.

template<typename FloatType> Vector<FloatType> operator*(const Matrix<FloatType> &A, const Vector<FloatType> &x): Perform the matrix-vector product of A and x.

template<typename FloatType, int Dim> Tensor<FloatType, Dim> &operator+=(Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b): Addition-assignment operator for tensors.

template<typename FloatType, int Dim> Tensor<FloatType, Dim> operator+(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b): Addition operator for tensors.

template<typename FloatType, int Dim> Tensor<FloatType, Dim> &operator-=(Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b): Subtraction-assignment operator for tensors.

template<typename FloatType, int Dim> Tensor<FloatType, Dim> operator-(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b): Subtraction operator for tensors.

template<typename FloatType, int Dim> Tensor<FloatType, Dim> &operator*=(Tensor<FloatType, Dim> &a, FloatType eps): Scalar multiplication-assignment operator for tensors.

template<typename FloatType, int Dim> Tensor<FloatType, Dim> operator*(FloatType eps, const Tensor<FloatType, Dim> &b): Scalar left-multiplication operator for tensors.

template<typename FloatType, int Dim> inline Tensor<FloatType, Dim> operator*(const Tensor<FloatType, Dim> &b, FloatType eps): Scalar right-multiplication operator for tensors.

template<int Dim, typename FloatType> Vector<FloatType> flatten(const Tensor<FloatType, Dim> &t): “Flatten” a tensor into a vector. The output mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType> FloatType *flatten(FloatType *host_ptr, const Tensor<FloatType, Dim> &in)

“Flatten” a tensor into a pre-allocated host array and return the pointer to the element of the array one past the flattened tensor.

Parameters:

host_ptr – The host array destination. The output mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)
in – The input tensor

Returns:

A pointer to the element of the array one past the flattened tensor. note, the copy is performed on the host side

template<int Dim, typename FloatType> void unflatten(Tensor<FloatType, Dim> &out, const Vector<FloatType> &t)

“Unflatten” vector into tensor

Parameters:

out – The output tensor. Its dimensions should be set correctly prior to calling this function
t – The input vector. The input mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType> FloatType const *unflatten(Tensor<FloatType, Dim> &out, FloatType const *host_ptr)

“Unflatten” a tensor from a pre-allocated host array and return the pointer to the element of the array one past the flattened tensor.

Parameters:

out – The output tensor. Its dimensions should be set correctly prior to calling this function
host_ptr – The input array pointer. The input mapping is lexicographic in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

Returns:

A pointer to the element of the array one past the flattened tensor. note, the copy is performed on the host side

template<int Dim1, int Dim2, typename FloatType> Vector<FloatType> flatten2(const Tensor<FloatType, Dim1> &t1, const Tensor<FloatType, Dim2> &t2)

Flatten two tensors into a single contiguous array.

Parameters:

t1 – The first tensor
t2 – The second tensor

Returns:

An output vector of length t1.data_len() + t2.data_len(), where the elements within the sub-arrays are obtained from their corresponding tensor via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim1, int Dim2, typename FloatType> void unflatten2(Tensor<FloatType, Dim1> &t1, Tensor<FloatType, Dim2> &t2, const Vector<FloatType> &v)

Unflatten two tensors from a single contiguous array.

The output tensor dimensions should be set appropriately prior to calling this function

Parameters:

t1 – [out] The first tensor
t2 – [out] The first tensor
v – [in] An input vector of length t1.data_len() + t2.data_len(), where the elements within the sub-arrays map to their corresponding tensor coordinates via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType> Vector<FloatType> flattenNsameDim(Tensor<FloatType, Dim> const *const *tens, int N)

Flatten N tensors of the same dimension into a single contiguous array.

Parameters:

tens – An array of pointers to input tensors
N – The number of tensors

Returns:

An output vector of length \sum_i tens[i].data_len(), where the elements within the sub-arrays are obtained from their corresponding tensor via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType> void unflattenNsameDim(Tensor<FloatType, Dim> *const *tens, int N, const Vector<FloatType> &v)

Unflatten N tensors of the same dimension from a single contiguous array.

Parameters:

tens – The output tensor array. The tensor dimensions should be set appropriately prior to calling this function.
N – The number of tensors
v – The input vector. This must have length \sum_i tens[i].data_len(), where the elements within the sub-arrays map to their corresponding tensor coordinates via a lexicographic mapping in descending order, e.g. (x,y,z) -> z + sizez*(y + sizey*x)

template<int Dim, typename FloatType> Tensor<FloatType, Dim> batchTensorConcatenate(Tensor<FloatType, Dim> const *const *in, int Ntens, int concat_dim)

Concatenate (stack) Ntens tensors along a dimension concat_dim < Dim-1 (last dim is assumed to be the batch index).

Dimensions other than concat_dim must all have the same size.

Parameters:

in – The input tensor array
Ntens – The number of tensors
concat_dim – The dimension along which the concatenation is performed

template<int Dim, typename FloatType> void batchTensorSplit(Tensor<FloatType, Dim> *const *out, int Ntens, const Tensor<FloatType, Dim> &in, int split_dim)

Split a tensor along a dimension split_dim < Dim-1 (last dim is the batch index) into multiple tensors.

Dimensions other than split_dim must all have the same size.

Parameters:

out – The output tensors. These should be pre-initialized to the appropriate sizes.
Ntens – The number of output tensors
in – The input tensor
split_dim – The dimension along which to split

template<int Dim, typename FloatType> double norm2(const Tensor<FloatType, Dim> &T): Return the tensor norm^2, i.e. \sum_{i,j,k,…} T[i,j,k,…]^2.

template<typename _FloatType, int Dim> struct Tensor

#include <Tensors.hpp>

A class for tensors of arbitrary dimension and floating point type.

Public Types

Values:

enumerator Dimension

typedef _FloatType FloatType: The floating point type

typedef const int *Dims: Array type for tensor dimensions

typedef const int *Coord: Array type for tensor coordinates

Public Functions

inline Tensor(): Default constructor for a zero-size tensor.

inline Tensor(Dims dims, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:

dims – The tensor dimensions
alloc_pool – The memory pool for the initial allocatio (default: device)

inline Tensor(Dims dims, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a tensor with the provided dimensions uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:

dims – The tensor dimensions
init – The initial value for all elements
alloc_pool – The memory pool for the initial allocatio (default: device)

inline Tensor(Dims dims, const std::vector<FloatType> &init_vals)

Construct a tensor with the provided dimensions initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

dims – The tensor dimensions
init_vals – The initial values with lexicographic mapping in descending order, e.g. z + size_z * (y + size_y * x)

inline Tensor(Dims dims, FloatType const *init_vals)

Construct a tensor with the provided dimensions initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

dims – The tensor dimensions
init_vals – The initial values with lexicographic mapping in descending order, e.g. z + size_z * (y + size_y * x)

inline _1D_TENSOR_ONLY Tensor(int len, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 1D tensor (vector) with the provided length with the initial memory allocation in the provided pool.

Parameters:

len – The vector length
alloc_pool – The memory pool for the initial allocatio (default: device)

inline _1D_TENSOR_ONLY Tensor(int len, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 1D tensor (vector) with the provided length uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:

len – The vector length
init – The initial value for all elements
alloc_pool – The memory pool for the initial allocatio (default: device)

inline _1D_TENSOR_ONLY Tensor(const std::vector<FloatType> &init_vals)

Construct a 1D tensor (vector) initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:: init_vals – The initial values

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 2D tensor (matrix) with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:

size0 – The size of the 1st dimension (number of rows)
size1 – The size of the 2nd dimension (number of columns)
alloc_pool – The memory pool for the initial allocatio (default: device)

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 2D tensor (matrix) with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:

size0 – The size of the 1st dimension (number of rows)
size1 – The size of the 2nd dimension (number of columns)
init – The initial value for all elements
alloc_pool – The memory pool for the initial allocation (default: device)

inline _2D_TENSOR_ONLY Tensor(int size0, int size1, const std::vector<FloatType> &init_vals)

Construct a 2D tensor (matrix) with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

size0 – The size of the 1st dimension (number of rows)
size1 – The size of the 2nd dimension (number of columns)
init_vals – The initial values with lexicographic mapping y + size1*x for coord (x,y)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 3D tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:

size0 – The size of the 1st dimension
size1 – The size of the 2nd dimension
size2 – The size of the 3rd dimension
alloc_pool – The memory pool for the initial allocatio (default: device)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 3D tensor with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:

size0 – The size of the 1st dimension
size1 – The size of the 2nd dimension
size2 – The size of the 3rd dimension
init – The initial value for all elements
alloc_pool – The memory pool for the initial allocation (default: device)

inline _3D_TENSOR_ONLY Tensor(int size0, int size1, int size2, const std::vector<FloatType> &init_vals)

Construct a 3D tensor with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

size0 – The size of the 1st dimension
size1 – The size of the 2nd dimension
size2 – The size of the 3rd dimension
init_vals – The initial values with lexicographic mapping z + size2*(y + size1*x) for coord (x,y,z)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 4D tensor with the provided dimensions with the initial memory allocation in the provided pool.

Parameters:

size0 – The size of the 1st dimension
size1 – The size of the 2nd dimension
size2 – The size of the 3rd dimension
size3 – The size of the 4th dimension
alloc_pool – The memory pool for the initial allocatio (default: device)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, FloatType init, MemoryManager::Pool alloc_pool = MemoryManager::Pool::DevicePool)

Construct a 4D tensor with the provided dimensions, uniformly initialized with the provided value with the initial memory allocation in the provided pool.

Parameters:

size0 – The size of the 1st dimension
size1 – The size of the 2nd dimension
size2 – The size of the 3rd dimension
size3 – The size of the 4th dimension
init – The initial value for all elements
alloc_pool – The memory pool for the initial allocation (default: device)

inline _4D_TENSOR_ONLY Tensor(int size0, int size1, int size2, int size3, const std::vector<FloatType> &init_vals)

Construct a 4D tensor with the provided dimensions, initialized from the provided array. The initial memory allocation will be in the host pool.

Parameters:

size0 – The size of the 1st dimension
size1 – The size of the 2nd dimension
size2 – The size of the 3rd dimension
size3 – The size of the 4th dimension
init_vals – The initial values with lexicographic mapping t + size3*(z + size2*(y + size1*x) ) for coord (x,y,z,t)

inline int const *sizeArray() const: Return the tensor dimensions as an array pointer.

inline int size(int i) const

Return the tensor size along a specific dimension.

Parameters:: i – The dimension

inline std::string sizeArrayString() const: Return the tensor dimensions as a string.

inline size_t data_len() const: Return the linear dimension (flattened size) of the tensor, or equivalently, the total number of elements.

inline View view(ViewMode mode) const: Return a view to this tensor opened with a specific view mode.

inline void lock() const: “Lock” the memory region associated with this object such that it cannot be auto-evicted to free space in a memory pool. A possible use case is to ensure a memory region remains valid while performing an asynchronouse background copy

inline void unlock() const: “Unlock the memory region, allowing it to be evicted. This is the default state.

Tensor sliceLastDimension(int idx_start, int idx_end) const: Return a tensor where the last dimension contains the slice between idx_start and idx_end (inclusive). E.g., for a 3D tensor T, return T(:,:,idx_start:idx_end+1)

void pokeLastDimension(const Tensor<FloatType, Dim - 1> &ins, const int idx)

Insert a tensor of Dim-1 such that (*this)(i,j,k,…, idx) = ins(i,j,k,…). E.g., for a 3D tensor T and 2D input I, set T[:,:,idx] = I[:,:].

Parameters:

ins – The Dim-1 dimensional tensor to insert
idx – The index in the last dimension on which to insert the tensor

Tensor<FloatType, Dim - 1> peekLastDimension(const int idx) const

Return a tensor of dimension Dim-1 such that out(i,j,k,…) = (*this)(i,j,k,…, idx). E.g., for a 3D tensor T, return T[:,:,idx].

Parameters:: idx – The index in the last dimension on which to insert the tensor

Public Static Functions

static inline constexpr int dimension(): Return the tensor dimension.

Private Members

ManagedArray<FloatType> vals: Memory-contiguous container for tensor data

int _size[Dim]: Tensor dimensions

class View : private ManagedArray<FloatType>::View

#include <Tensors.hpp>

The tensor View accessor class

Public Functions

inline View(ViewMode mode, const Tensor<FloatType, Dim> &parent)

Construct a view with a specific view mode and parent object.

Parameters:

mode – The view mode
parent – The parent object

inline void free(): Free the view. This must be called explicitly once the view is no longer needed.

inline accelerator_inline FloatType & operator() (const Coord coord): Access the tensor at the provided coordinate.

inline accelerator_inline FloatType operator() (const Coord coord) const: Access the tensor at the provided coordinate.

inline _1D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i): Access the 1D tensor at the index (i)

inline _1D_TENSOR_ONLY accelerator_inline FloatType operator() (int i) const: Access the 1D tensor at the index (i)

inline _2D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j): Access the 2D tensor at the coordinate (i,j)

inline _2D_TENSOR_ONLY accelerator_inline FloatType operator() (int i, int j) const: Access the 2D tensor at the coordinate (i,j)

inline _3D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j, int k): Access the 3D tensor at the coordinate (i,j,k)

inline _3D_TENSOR_ONLY accelerator_inline FloatType operator() (int i, int j, int k) const: Access the 3D tensor at the coordinate (i,j,k)

inline _4D_TENSOR_ONLY accelerator_inline FloatType & operator() (int i, int j, int k, int l): Access the 4D tensor at the coordinate (i,j,k,l)

inline _4D_TENSOR_ONLY accelerator_inline FloatType operator() (int i, int j, int k, int l) const: Access the 4D tensor at the coordinate (i,j,k,l)

inline accelerator_inline FloatType const * data () const: Return a pointer to the underlying array.

inline accelerator_inline FloatType * data (): Return a pointer to the underlying array.

inline accelerator_inline size_t data_len () const: Return the linear dimension (flattened size) of the tensor, or equivalently, the total number of elements.

inline accelerator_inline size_t size (int i) const

Return the tensor size along a specific dimension.

Parameters:: i – The dimension

inline accelerator_inline int const * sizeArray () const: Return the tensor dimensions as an array pointer.

inline accelerator_inline FloatType & compact3 (int i, int j, int k)

Access a tensor element at a coordinate expressed such that the first Dim-2 dimensions are expressed lexicographically.

Parameters:

i – The first Dim-2 dimensions expressed lexicographically in descending order (e.g. z+sizez*(y+sizey*x))
j – The index of dimension Dim-2
k – The index of dimension Dim-1

inline accelerator_inline FloatType compact3 (int i, int j, int k) const

Access a tensor element at a coordinate expressed such that the first Dim-2 dimensions are expressed lexicographically.

Parameters:

i – The first Dim-2 dimensions expressed lexicographically in descending order (e.g. z+sizez*(y+sizey*x))
j – The index of dimension Dim-2
k – The index of dimension Dim-1

Private Types

typedef ManagedArray<FloatType>::View Base

Private Members

int *_size: Tensor dimensions

bool is_device_ptr: Track whether the tensor dimensions array is allocated on the device or host

Testing

Functions

template<typename FloatType> bool near(FloatType a, FloatType b, FloatType rel_tol, FloatType *reldiff_p = nullptr)

template<typename FloatType> bool near(const Vector<FloatType> &a, const Vector<FloatType> &b, FloatType rel_tol, bool verbose = false)

template<typename FloatType> bool near(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FloatType rel_tol, bool verbose = false)

template<typename FloatType> bool abs_near(FloatType a, FloatType b, FloatType abs_tol, FloatType *absdiff_p = nullptr)

template<typename FloatType> bool abs_near(const Matrix<FloatType> &a, const Matrix<FloatType> &b, FloatType abs_tol, bool verbose = false)

template<typename FloatType, int Dim> bool abs_near(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b, FloatType abs_tol, bool verbose = false)

template<typename FloatType, int Dim> bool equal(const Tensor<FloatType, Dim> &a, const Tensor<FloatType, Dim> &b, bool verbose = false)

template<typename Op, typename PreOp> void benchmark(double &mean, double &std, int nrpt, int nwarmup, const Op &op, const PreOp &preop)

template<typename TensType> TensType::FloatType testCost(const Vector<typename TensType::FloatType> &c, const TensType &v)

template<typename ModelType> void testDeriv(ModelType &model, int const *in_sizes, int const *out_sizes, typename ModelType::FloatType delta = typename ModelType::FloatType(1e-4))

template<typename ComponentWrapper> void testComponentDeriv(ComponentWrapper &cpt, typename ComponentWrapper::FloatType delta = typename ComponentWrapper::FloatType(1e-4), bool _2nd_order = false)

template<typename FloatType> std::vector<FloatType> softMaxVector(const std::vector<FloatType> &v, FloatType beta = 1.0)

Timing

Functions

inline std::chrono::system_clock::time_point now()

inline size_t usSinceEpoch()

inline size_t usCountSince(const std::chrono::system_clock::time_point &when)

inline double since(const std::chrono::system_clock::time_point &when)

Batch3tensorPairContractComponent

template<typename _FloatType> class Batch3tensorPairContractComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline Batch3tensorPairContractComponent(int contract_dim_A, int contract_dim_B, FloatType nrm = 1.0)

Batch3tensorPairContractComponent(const Batch3tensorPairContractComponent &r) = delete

Batch3tensorPairContractComponent(Batch3tensorPairContractComponent &&r) = default

inline Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &A, const Tensor<FloatType, 3> &B)

inline void deriv(Tensor<FloatType, 3> &&_dcost_by_dC, Tensor<FloatType, 3> &dcost_by_dA, Tensor<FloatType, 3> &dcost_by_dB) const

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline void resizeInputBuffer(size_t to)

Private Members

FloatType nrm

int contract_dim_A

int contract_dim_B

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

mutable RingBuffer<Tensor<FloatType, 3>> A_buf

mutable RingBuffer<Tensor<FloatType, 3>> B_buf

BatchedMatrixRowSoftMaxComponent

template<typename _FloatType> class BatchedMatrixRowSoftMaxComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchedMatrixRowSoftMaxComponent(bool use_mask = false, FloatType beta = 1.0)

BatchedMatrixRowSoftMaxComponent(const BatchedMatrixRowSoftMaxComponent &r) = delete

BatchedMatrixRowSoftMaxComponent(BatchedMatrixRowSoftMaxComponent &&r) = default

Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &in) const

void deriv(Tensor<FloatType, 3> &&dcost_by_dOut, Tensor<FloatType, 3> &dcost_by_dIn) const

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

inline void resizeInputBuffer(size_t to)

Private Members

FloatType beta

mutable RingBuffer<Tensor<FloatType, 3>> out_buf

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

bool use_mask

BatchTensorConcatenateComponent

template<typename _FloatType, int TensDim> class BatchTensorConcatenateComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchTensorConcatenateComponent(int concat_dim, int Ntens)

BatchTensorConcatenateComponent(const BatchTensorConcatenateComponent &r) = delete

BatchTensorConcatenateComponent(BatchTensorConcatenateComponent &&r) = default

inline Tensor<FloatType, TensDim> value(Tensor<FloatType, TensDim> const *const *in)

inline void deriv(Tensor<FloatType, TensDim> &&_dcost_by_dOut, Tensor<FloatType, TensDim> *const *dcost_by_dIn) const

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

Private Members

int concat_dim

int Ntens

std::vector<std::array<int, TensDim>> tens_dims

bool setup

BatchTensorDimensionSliceComponent

template<typename _FloatType, int TensDim> class BatchTensorDimensionSliceComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchTensorDimensionSliceComponent(int slice_dim, int slice_idx)

BatchTensorDimensionSliceComponent(const BatchTensorDimensionSliceComponent &r) = delete

BatchTensorDimensionSliceComponent(BatchTensorDimensionSliceComponent &&r) = default

Tensor<FloatType, TensDim - 1> value(const Tensor<FloatType, TensDim> &in)

void deriv(Tensor<FloatType, TensDim - 1> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

Private Members

int slice_dim

int slice_idx

int in_size[TensDim]

int out_size[TensDim - 1]

size_t other_dim_vol

size_t offset_in

bool setup

BatchTensorDNNcomponent

template<typename _FloatType, int TensDim, typename ActivationFunc> class BatchTensorDNNcomponent

Public Types

typedef _FloatType FloatType

Public Functions

inline BatchTensorDNNcomponent(const Matrix<FloatType> &weights, const Vector<FloatType> &bias, int contract_dim, const ActivationFunc &activation)

inline BatchTensorDNNcomponent(const Matrix<FloatType> &_weights, int contract_dim, const ActivationFunc &activation)

BatchTensorDNNcomponent(const BatchTensorDNNcomponent &r) = delete

BatchTensorDNNcomponent(BatchTensorDNNcomponent &&r) = default

Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &x)

void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dCost_by_dOut, Tensor<FloatType, TensDim> &dCost_by_dIn) const

void update(int off, const Vector<FloatType> &new_params)

void step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

void getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Matrix<FloatType> weights

Vector<FloatType> bias

int batch_size

int contract_dim

bool use_bias

int in_dims[TensDim]

int out_dims[TensDim]

size_t other_size

size_t stride

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

bool setup

ActivationFunc activation_func

mutable RingBuffer<Tensor<FloatType, TensDim>> in_buf

mutable RingBuffer<Tensor<FloatType, TensDim>> activation_deriv_buf

MatrixTensorContractComponent

template<typename _FloatType, int TensDim> class MatrixTensorContractComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline MatrixTensorContractComponent(const Matrix<FloatType> &weights)

MatrixTensorContractComponent(const MatrixTensorContractComponent &r) = delete

MatrixTensorContractComponent(MatrixTensorContractComponent &&r) = default

Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &x)

void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dCost_by_dOut, Tensor<FloatType, TensDim> &dCost_by_dIn) const

void update(int off, const Vector<FloatType> &new_params)

void step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

void getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Matrix<FloatType> weights

int size0

int size1

int batch_size

int in_dims[TensDim]

int out_dims[TensDim]

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

bool setup

mutable RingBuffer<Tensor<FloatType, TensDim>> in_buf

MultiHeadAttentionComponent

template<typename _FloatType> class MultiHeadAttentionComponent

Public Types

typedef _FloatType FloatType

Public Functions

MultiHeadAttentionComponent(int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)

MultiHeadAttentionComponent(int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)

MultiHeadAttentionComponent(const MultiHeadAttentionComponent &r) = delete

MultiHeadAttentionComponent(MultiHeadAttentionComponent &&r) = default

TensorType value(const TensorType &Q, const TensorType &K, const TensorType &V)

void deriv(Vector<FloatType> &cost_deriv, int off, TensorType &&dCost_by_dOut, TensorType &dCost_by_dQ, TensorType &dCost_by_dK, TensorType &dCost_by_dV) const

void update(int off, const Vector<FloatType> &new_params)

void step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

size_t FLOPS(int value_or_deriv) const

void getParams(Vector<FloatType> &into, int off) const

void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, 3> TensorType

Private Members

int C

int E

int B

int Nparams_layer

bool setup

std::vector<std::unique_ptr<ScaledDotProductAttentionHeadComponent<FloatType>>> heads

BatchTensorConcatenateComponent<FloatType, 3> concatY

MatrixTensorContractComponent<FloatType, 3> multW_O

NormComponent

template<typename _FloatType, int TensDim> class NormComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline NormComponent(int norm_dim, FloatType epsilon = 1e-5)

NormComponent(const NormComponent &r) = delete

NormComponent(NormComponent &&r) = default

Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in)

void deriv(Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline void resizeInputBuffer(size_t to)

Private Members

int norm_dim

FloatType epsilon

int in_size[TensDim]

size_t other_dim_vol

size_t stride

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

bool setup

mutable RingBuffer<Tensor<FloatType, TensDim>> out_buf

mutable RingBuffer<Matrix<FloatType>> std_buf

ScaleComponent

template<typename _FloatType, int TensDim> class ScaleComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline ScaleComponent(int scale_dim, int dimension_size, bool use_affine, bool use_bias, const Vector<FloatType> &affine_init, const Vector<FloatType> &bias_init)

ScaleComponent(const ScaleComponent &r) = delete

ScaleComponent(ScaleComponent &&r) = default

Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in)

void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const

void update(int off, const Vector<FloatType> &new_params)

void step(int off, const Vector<FloatType> &derivs, FloatType eps)

void getParams(Vector<FloatType> &into, int off) const

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

inline void resizeInputBuffer(size_t to)

Private Members

int scale_dim

bool use_affine

bool use_bias

int nparams_val

int in_size[TensDim]

size_t other_dim_vol

size_t stride

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

bool setup

Vector<FloatType> gamma

Vector<FloatType> beta

mutable RingBuffer<Tensor<FloatType, TensDim>> in_buf

ScaledDotProductAttentionComponent

template<typename _FloatType> class ScaledDotProductAttentionComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline ScaledDotProductAttentionComponent(int d_k, int d_v, int use_mask = false)

ScaledDotProductAttentionComponent(const ScaledDotProductAttentionComponent &r) = delete

ScaledDotProductAttentionComponent(ScaledDotProductAttentionComponent &&r) = default

Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &Q, const Tensor<FloatType, 3> &K, Tensor<FloatType, 3> &V)

void deriv(Tensor<FloatType, 3> &&dCost_by_dOut, Tensor<FloatType, 3> &dCost_by_dQ, Tensor<FloatType, 3> &dCost_by_dK, Tensor<FloatType, 3> &dCost_by_dV) const

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

inline void resizeInputBuffer(size_t to)

Private Members

int C

int B

int d_k

int d_v

bool setup

Batch3tensorPairContractComponent<FloatType> mulQKtoGetS

BatchedMatrixRowSoftMaxComponent<FloatType> softmaxS_to_SS

Batch3tensorPairContractComponent<FloatType> mulSSVtoGetOut

ScaledDotProductAttentionHeadComponent

template<typename _FloatType> class ScaledDotProductAttentionHeadComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline ScaledDotProductAttentionHeadComponent(const Matrix<FloatType> &W_Q, const Matrix<FloatType> &W_K, const Matrix<FloatType> &W_V, bool use_mask = false)

ScaledDotProductAttentionHeadComponent(const ScaledDotProductAttentionHeadComponent &r) = delete

ScaledDotProductAttentionHeadComponent(ScaledDotProductAttentionHeadComponent &&r) = default

Tensor<FloatType, 3> value(const Tensor<FloatType, 3> &Q, const Tensor<FloatType, 3> &K, const Tensor<FloatType, 3> &V)

void deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&dCost_by_dOut, Tensor<FloatType, 3> &dCost_by_dQ, Tensor<FloatType, 3> &dCost_by_dK, Tensor<FloatType, 3> &dCost_by_dV) const

void update(int off, const Vector<FloatType> &new_params)

void step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

void getParams(Vector<FloatType> &into, int off) const

inline size_t FLOPS(int value_or_deriv) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef MatrixTensorContractComponent<FloatType, 3> MatTensMulCptType

Private Members

int C

int B

int E

int d_k

int d_v

bool setup

MatTensMulCptType multWQ

MatTensMulCptType multWK

MatTensMulCptType multWV

ScaledDotProductAttentionComponent<FloatType> attention

SoftMaxComponent

template<typename _FloatType, int TensDim> class SoftMaxComponent

Public Types

typedef _FloatType FloatType

Public Functions

inline SoftMaxComponent(int softmax_dim, FloatType beta = 1.0)

SoftMaxComponent(const SoftMaxComponent &r) = delete

SoftMaxComponent(SoftMaxComponent &&r) = default

Tensor<FloatType, TensDim> value(const Tensor<FloatType, TensDim> &in) const

void deriv(Tensor<FloatType, TensDim> &&dcost_by_dOut, Tensor<FloatType, TensDim> &dcost_by_dIn) const

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

inline void resizeInputBuffer(size_t to)

inline void setBeta(FloatType _beta)

Private Members

int softmax_dim

FloatType beta

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

mutable RingBuffer<Tensor<FloatType, TensDim>> out_buf

BatchTensorDNNlayer

Defines

LAYER_TYPE

LAYER_TYPE

Functions

template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto batch_tensor_dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, int contract_dim, const ActivationFunc &activation, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto batch_tensor_dnn_layer(const Matrix<FLOATTYPE(U)> &weights, int contract_dim, const ActivationFunc &activation, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto batch_tensor_dnn_layer(int contract_dim, int fan_out, int fan_in, const ActivationFunc &activation, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto batch_tensor_unbiased_dnn_layer(int contract_dim, int fan_out, int fan_in, const ActivationFunc &activation, U &&u) -> LAYER_TYPE

template<typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, const ActivationFunc &activation, U &&u)

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto dnn_layer(const Matrix<FLOATTYPE(U)> &weights, const Vector<FLOATTYPE(U)> &bias, U &&u)

template<typename U, typename ActivationFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto dnn_layer(int fan_out, int fan_in, const ActivationFunc &activation, U &&u)

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto dnn_layer(int fan_out, int fan_in, U &&u)

template<typename _FloatType, int TensDim, typename _InputType, typename Store, typename ActivationFunc> class BatchTensorDNNlayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline BatchTensorDNNlayer(Store &&leaf, const Matrix<FloatType> &weights, const Vector<FloatType> &bias, int contract_dim, const ActivationFunc &activation)

inline BatchTensorDNNlayer(Store &&leaf, const Matrix<FloatType> &weights, int contract_dim, const ActivationFunc &activation)

BatchTensorDNNlayer(const BatchTensorDNNlayer &r) = delete

BatchTensorDNNlayer(BatchTensorDNNlayer &&r) = default

Tensor<FloatType, TensDim> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline size_t FLOPS(int value_or_deriv) const

inline int nparams() const

int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Store leaf

BatchTensorDNNcomponent<FloatType, TensDim, ActivationFunc> cpt

ConvolutionLayer1D

Functions

template<typename U, typename ActivationFunc, typename PaddingFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto conv1d_layer(const Tensor<FLOATTYPE(U), 3> &filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, int stride, U &&u) -> ConvolutionLayer1D<FLOATTYPE(U), INPUTTYPE(U), DDST(u), ActivationFunc, PaddingFunc>

template<typename U, typename ActivationFunc, typename PaddingFunc, typename std::enable_if<ISLEAF(U), int>::type = 0> auto conv1d_layer(const Tensor<FLOATTYPE(U), 3> &filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, U &&u) -> ConvolutionLayer1D<FLOATTYPE(U), INPUTTYPE(U), DDST(u), ActivationFunc, PaddingFunc>

template<typename _FloatType, typename _InputType, typename Store, typename ActivationFunc, typename PaddingFunc> class ConvolutionLayer1D

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType

inline ConvolutionLayer1D(Store &&leaf, const Tensor<FloatType, 3> &_filter, const ActivationFunc &activation_func, const PaddingFunc &padding_func, int stride = 1)

ConvolutionLayer1D(const ConvolutionLayer1D &r) = delete

ConvolutionLayer1D(ConvolutionLayer1D &&r) = default

Tensor<FloatType, 3> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Store leaf

int _input_tens_size[LayerInputTensorType::dimension()]

Tensor<FloatType, 3> filter

ActivationFunc activation_func

PaddingFunc padding_func

int depth

int channels

int kernel_size

int stride

bool init

int padded_data_len

int batch_size

mutable FLOPScounter value_FLOPS

mutable FLOPScounter deriv_FLOPS

mutable RingBuffer<Tensor<FloatType, 3>> leaf_buf

mutable RingBuffer<Tensor<FloatType, 3>> activation_deriv_buf

EmbedPositionsSinusoidalLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto embed_positions_sinusoidal_layer(U &&u)

template<typename _FloatType, typename _InputType, typename Store> class EmbedPositionsSinusoidalLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline EmbedPositionsSinusoidalLayer(Store &&leaf)

EmbedPositionsSinusoidalLayer(const EmbedPositionsSinusoidalLayer &r) = delete

EmbedPositionsSinusoidalLayer(EmbedPositionsSinusoidalLayer &&r) = default

inline Tensor<FloatType, 3> value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Store leaf

mutable FLOPScounter value_FLOPS

FlattenLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto flatten_layer(U &&u) -> FlattenLayer<FLOATTYPE(U), INPUTTYPE(U), DDST(u)>

template<typename _FloatType, typename _InputType, typename Store> class FlattenLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType

inline FlattenLayer(Store &&leaf)

FlattenLayer(const FlattenLayer &r) = delete

FlattenLayer(FlattenLayer &&r) = default

Matrix<FloatType> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Matrix<FloatType> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Store leaf

int _input_tens_size[LayerInputTensorType::dimension()]

bool init

InputLayer

Functions

template<typename FloatType, typename InputType = Matrix<FloatType>> inline InputLayer<FloatType, InputType> input_layer()

template<typename _FloatType, typename _InputType = Matrix<_FloatType>> class InputLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline InputLayer()

inline InputLayer(InputLayer &&r) = default

inline InputLayer(const InputLayer &r) = delete

inline const InputType &value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, InputType &&above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

LayerCommon

Defines

ISLEAF(a)

FLOATTYPE(a)

INPUTTYPE(a)

LAYEROUTPUTTYPE(a)

LAYERTYPEOUTPUTTYPE(a)

struct LeafTag

MatrixTensorContractLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto matrix_tensor_contract_layer(const Matrix<FLOATTYPE(U)> &weights, U &&u) -> LAYER_TYPE

template<typename _FloatType, int TensDim, typename _InputType, typename Store> class MatrixTensorContractLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline MatrixTensorContractLayer(Store &&leaf, const Matrix<FloatType> &weights)

MatrixTensorContractLayer(const MatrixTensorContractLayer &r) = delete

MatrixTensorContractLayer(MatrixTensorContractLayer &&r) = default

Tensor<FloatType, TensDim> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Store leaf

MatrixTensorContractComponent<FloatType, TensDim> cpt

MultiHeadCrossAttentionLayer

Defines

LAYER_TYPE

TEMPL

Functions

TEMPL auto multihead_cross_attention_layer(int Nheads, Matrix<FLOATTYPE(ChainKV)> const *const *W_Q, Matrix<FLOATTYPE(ChainKV)> const *const *W_K, Matrix<FLOATTYPE(ChainKV)> const *const *W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE

TEMPL auto multihead_cross_attention_layer(int Nheads, Matrix<FLOATTYPE(ChainKV)> const *const *W_Q, Matrix<FLOATTYPE(ChainKV)> const *const *W_K, Matrix<FLOATTYPE(ChainKV)> const *const *W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE

TEMPL auto multihead_cross_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_Q, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_K, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE

TEMPL auto multihead_cross_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_Q, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_K, const std::vector<Matrix<FLOATTYPE(ChainKV)>> &W_V, const Matrix<FLOATTYPE(ChainKV)> &W_O, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE

TEMPL auto multihead_cross_attention_layer(int Nheads, int E, bool use_mask, ChainKV &&chain_KV, ChainQ &&chain_Q) -> LAYER_TYPE

TEMPL auto multihead_cross_attention_layer(int Nheads, int E, ChainKV &&chain_KV, ChainQ &&chain_Q)

template<typename _FloatType, typename _InputType, typename StoreKV, typename StoreQ> class MultiHeadCrossAttentionLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef Tensor<FloatType, 3> TensorType

typedef LeafTag tag

Public Functions

inline MultiHeadCrossAttentionLayer(StoreKV &&leaf_KV, StoreQ &&leaf_Q, int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)

inline MultiHeadCrossAttentionLayer(StoreKV &&leaf_KV, StoreQ &&leaf_Q, int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)

MultiHeadCrossAttentionLayer(const MultiHeadCrossAttentionLayer &r) = delete

MultiHeadCrossAttentionLayer(MultiHeadCrossAttentionLayer &&r) = default

inline TensorType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, TensorType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

StoreKV leaf_KV

StoreQ leaf_Q

MultiHeadAttentionComponent<FloatType> attention

MultiHeadSelfAttentionLayer

Defines

LAYER_TYPE

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto multihead_self_attention_layer(int Nheads, Matrix<FLOATTYPE(U)> const *const *W_Q, Matrix<FLOATTYPE(U)> const *const *W_K, Matrix<FLOATTYPE(U)> const *const *W_V, const Matrix<FLOATTYPE(U)> &W_O, bool use_mask, U &&u) -> LAYER_TYPE

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto multihead_self_attention_layer(int Nheads, Matrix<FLOATTYPE(U)> const *const *W_Q, Matrix<FLOATTYPE(U)> const *const *W_K, Matrix<FLOATTYPE(U)> const *const *W_V, const Matrix<FLOATTYPE(U)> &W_O, U &&u) -> LAYER_TYPE

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto multihead_self_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(U)>> &W_Q, const std::vector<Matrix<FLOATTYPE(U)>> &W_K, const std::vector<Matrix<FLOATTYPE(U)>> &W_V, const Matrix<FLOATTYPE(U)> &W_O, bool use_mask, U &&u) -> LAYER_TYPE

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto multihead_self_attention_layer(int Nheads, const std::vector<Matrix<FLOATTYPE(U)>> &W_Q, const std::vector<Matrix<FLOATTYPE(U)>> &W_K, const std::vector<Matrix<FLOATTYPE(U)>> &W_V, const Matrix<FLOATTYPE(U)> &W_O, U &&u) -> LAYER_TYPE

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto multihead_self_attention_layer(int Nheads, int E, bool use_mask, U &&u) -> LAYER_TYPE

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto multihead_self_attention_layer(int Nheads, int E, U &&u) -> LAYER_TYPE

template<typename _FloatType, typename _InputType, typename Store> class MultiHeadSelfAttentionLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

MultiHeadSelfAttentionLayer(Store &&leaf, int Nheads, Matrix<FloatType> const *const *W_Q, Matrix<FloatType> const *const *W_K, Matrix<FloatType> const *const *W_V, const Matrix<FloatType> &W_O, bool use_mask = false)

MultiHeadSelfAttentionLayer(Store &&leaf, int Nheads, const std::vector<Matrix<FloatType>> &W_Q, const std::vector<Matrix<FloatType>> &W_K, const std::vector<Matrix<FloatType>> &W_V, const Matrix<FloatType> &W_O, bool use_mask = false)

MultiHeadSelfAttentionLayer(const MultiHeadSelfAttentionLayer &r) = delete

MultiHeadSelfAttentionLayer(MultiHeadSelfAttentionLayer &&r) = default

Tensor<FloatType, 3> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

int getParams(Vector<FloatType> &into, int off) const

void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, 3> LayerInputType

Private Members

MultiHeadAttentionComponent<FloatType> mha

Store leaf

NormLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto norm_layer (int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector< FLOATTYPE(U)> &affine_init, const Vector< FLOATTYPE(U)> &bias_init, FLOATTYPE(U) epsilon, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto norm_layer(int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector<FLOATTYPE(U)> &affine_init, const Vector<FLOATTYPE(U)> &bias_init, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto norm_layer (int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, FLOATTYPE(U) epsilon, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto norm_layer(int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, U &&u) -> LAYER_TYPE

template<typename _FloatType, int TensDim, typename _InputType, typename Store> class NormLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline NormLayer(Store &&leaf, int norm_dim, int norm_dim_size, bool use_affine, bool use_bias, const Vector<FloatType> &affine_init, const Vector<FloatType> &bias_init, FloatType epsilon)

NormLayer(const NormLayer &r) = delete

NormLayer(NormLayer &&r) = default

inline Tensor<FloatType, TensDim> value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, TensDim> LayerInputType

Private Members

NormComponent<FloatType, TensDim> nrm

ScaleComponent<FloatType, TensDim> scale

Store leaf

PairJoinLayer

Functions

template<typename U, typename V, typename std::enable_if<ISLEAF(U) && ISLEAF(V) && std::is_same<INPUTTYPE(U), INPUTTYPE(V)>::value, int>::type = 0> auto pair_join_layer(U &&u, V &&v)

template<typename _FloatType, typename _InputType, typename Store1, typename Store2> class PairJoinLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef std::pair<LayerInputType1, LayerInputType2> LayerOutputType

typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType1) LayerInputType1

typedef LAYERTYPEOUTPUTTYPE (StoredType2) LayerInputType2

inline PairJoinLayer(Store1 &&leaf1, Store2 &&leaf2)

PairJoinLayer(const PairJoinLayer &r) = delete

PairJoinLayer(PairJoinLayer &&r) = default

inline LayerOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Store1::type StoredType1

typedef Store2::type StoredType2

Private Members

Store1 leaf1

Store2 leaf2

PairSplitLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto pair_split_layer(U &&u)

template<typename _FloatType, typename _InputType, typename Store> class PairSplitLayerLeader

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef Store::type StoredType

typedef LayerInputType::first_type LayerOutputType1

typedef LayerInputType::second_type LayerOutputType2

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType

inline PairSplitLayerLeader(Store &&leaf)

inline void cinc(int &i)

inline LayerOutputType1 first(const InputType &x)

inline LayerOutputType2 second(const InputType &x)

inline int deriv_complete(Vector<FloatType> &cost_deriv, int off, InputType *input_above_deriv_return)

inline int deriv_first(Vector<FloatType> &cost_deriv, int off, LayerOutputType1 &&_above_deriv, InputType *input_above_deriv_return)

inline int deriv_second(Vector<FloatType> &cost_deriv, int off, LayerOutputType2 &&_above_deriv, InputType *input_above_deriv_return)

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int getParams(Vector<FloatType> &into, int off)

Public Members

LayerInputType in_buf

LayerOutputType1 above_deriv1

LayerOutputType2 above_deriv2

Store leaf

int val_count

int deriv_count

int update_count

int step_count

int getparams_count

template<typename _FloatType, typename _InputType, typename Store> class PairSplitLayer1

Public Types

typedef LayerInputType::first_type LayerOutputType

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType

inline PairSplitLayer1(PairSplitLayerLeader<FloatType, InputType, Store> *leader)

PairSplitLayer1(const PairSplitLayer1 &r) = delete

inline PairSplitLayer1(PairSplitLayer1 &&r)

inline ~PairSplitLayer1()

inline LayerOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Store::type StoredType

Private Members

PairSplitLayerLeader<FloatType, InputType, Store> *leader

template<typename _FloatType, typename _InputType, typename Store> class PairSplitLayer2

Public Types

typedef LayerInputType::second_type LayerOutputType

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputType

inline PairSplitLayer2(PairSplitLayerLeader<FloatType, InputType, Store> *leader)

PairSplitLayer2(const PairSplitLayer2 &r) = delete

PairSplitLayer2(PairSplitLayer2 &&r) = default

inline LayerOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Store::type StoredType

Private Members

PairSplitLayerLeader<FloatType, InputType, Store> *leader

ReplicateLayer

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto replicate_layer(int N, U &&u)

template<typename _FloatType, typename _InputType, typename Store> class ReplicateLayerLeader

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef Store::type StoredType

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputOutputType

inline ReplicateLayerLeader(Store &&leaf, int N)

inline void cinc(int &i)

inline LayerInputOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return)

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int getParams(Vector<FloatType> &into, int off)

Public Members

LayerInputOutputType in_buf

std::vector<LayerInputOutputType> above_deriv

Store leaf

int N

int val_count

int deriv_count

int update_count

int step_count

int getparams_count

template<typename _FloatType, typename _InputType, typename Store> class ReplicateLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType) LayerInputOutputType

inline ReplicateLayer(ReplicateLayerLeader<FloatType, InputType, Store> *leader, int instance, int N)

ReplicateLayer(const ReplicateLayer &r) = delete

inline ReplicateLayer(ReplicateLayer &&r)

inline ~ReplicateLayer()

inline LayerInputOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Store::type StoredType

Private Members

int instance

int N

ReplicateLayerLeader<FloatType, InputType, Store> *leader

ScaledDotProductSelfAttentionLayer

Defines

LAYER_TYPE

Functions

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto scaled_dotproduct_self_attention_layer(const Matrix<FLOATTYPE(U)> &W_Q, const Matrix<FLOATTYPE(U)> &W_K, const Matrix<FLOATTYPE(U)> &W_V, bool use_mask, U &&u) -> LAYER_TYPE

template<typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto scaled_dotproduct_self_attention_layer(const Matrix<FLOATTYPE(U)> &W_Q, const Matrix<FLOATTYPE(U)> &W_K, const Matrix<FLOATTYPE(U)> &W_V, U &&u) -> LAYER_TYPE

template<typename _FloatType, typename _InputType, typename Store> class ScaledDotProductSelfAttentionLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline ScaledDotProductSelfAttentionLayer(Store &&leaf, const Matrix<FloatType> &W_Q, const Matrix<FloatType> &W_K, const Matrix<FloatType> &W_V, bool use_mask = false)

ScaledDotProductSelfAttentionLayer(const ScaledDotProductSelfAttentionLayer &r) = delete

ScaledDotProductSelfAttentionLayer(ScaledDotProductSelfAttentionLayer &&r) = default

Tensor<FloatType, 3> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, 3> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

int getParams(Vector<FloatType> &into, int off) const

inline size_t FLOPS(int value_or_deriv) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Tensor<FloatType, 3> LayerInputType

Private Members

int C

int E

int B

int d_k

int d_v

bool setup

ScaledDotProductAttentionHeadComponent<FloatType> attentionQKV

Store leaf

SkipConnection

Defines

LAYER_TYPE

Functions

template<typename Internal, typename Below, typename std::enable_if<ISLEAF(Internal) && ISLEAF(Below), int>::type = 0> auto skip_connection(Internal &&internal, Below &&below) -> LAYER_TYPE

template<typename _FloatType, typename _InputType, typename ChainInternal, typename ChainBelow> class SkipConnection

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef ChainBelow::type ChainBelowInternalType

typedef LeafTag tag

Public Functions

typedef LAYERTYPEOUTPUTTYPE (ChainBelowInternalType) LayerInputOutputType

inline SkipConnection(ChainInternal &&leaf_internal, ChainBelow &&leaf_below)

SkipConnection(const SkipConnection &r) = delete

SkipConnection(SkipConnection &&r) = default

LayerInputOutputType value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

ChainBelow leaf_below

ChainInternal leaf_internal

SoftMaxLayer

Defines

LAYER_TYPE

Functions

template<int TensDim, typename U, typename std::enable_if< ISLEAF(U), int >::type = 0> auto softmax_layer (int softmax_dim, FLOATTYPE(U) beta, U &&u) -> LAYER_TYPE

template<int TensDim, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto softmax_layer(int softmax_dim, U &&u) -> LAYER_TYPE

template<typename _FloatType, int TensDim, typename _InputType, typename Store> class SoftMaxLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

inline SoftMaxLayer(Store &&leaf, int softmax_dim, FloatType beta = 1.0)

inline SoftMaxLayer(SoftMaxLayer &&r) = default

inline SoftMaxLayer(const SoftMaxLayer &r) = delete

Tensor<FloatType, TensDim> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, TensDim> &&above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

inline void setBeta(FloatType beta)

Private Members

Store leaf

SoftMaxComponent<FloatType, TensDim> cpt

SumJoinLayer

Functions

template<typename U, typename V, typename std::enable_if<ISLEAF(U) && ISLEAF(V) && std::is_same<INPUTTYPE(U), INPUTTYPE(V)>::value, int>::type = 0> auto sum_join_layer(U &&u, V &&v)

template<typename _FloatType, typename _InputType, typename Store1, typename Store2> class SumJoinLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LayerInputType1 LayerInputOutputType

typedef LeafTag tag

Public Functions

inline SumJoinLayer(Store1 &&leaf1, Store2 &&leaf2)

SumJoinLayer(const SumJoinLayer &r) = delete

SumJoinLayer(SumJoinLayer &&r) = default

inline LayerInputOutputType value(const InputType &x)

inline int deriv(Vector<FloatType> &cost_deriv, int off, LayerInputOutputType &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

inline int update(int off, const Vector<FloatType> &new_params)

inline int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

inline int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Types

typedef Store1::type StoredType1

typedef Store2::type StoredType2

Private Functions

typedef LAYERTYPEOUTPUTTYPE (StoredType1) LayerInputType1

typedef LAYERTYPEOUTPUTTYPE (StoredType2) LayerInputType2

Private Members

Store1 leaf1

Store2 leaf2

TransformerEncoderDecoderBlock

Functions

template<typename Below, typename ActivationFunc> auto transformer_decoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, Below &&below)

template<typename Below, typename ActivationFunc> auto transformer_encoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, Below &&below)

template<typename EncoderInput, typename DecoderInput, typename ActivationFunc> auto transformer_cross_decoder_block(int E, int nheads, int d_act, const ActivationFunc &activation, EncoderInput &&encoder_in, DecoderInput &&decoder_in)

UnflattenLayer

Functions

template<int OutDimension, typename U, typename std::enable_if<ISLEAF(U), int>::type = 0> auto unflatten_layer(int const *output_tens_dim, U &&u) -> UnflattenLayer<FLOATTYPE(U), OutDimension, INPUTTYPE(U), DDST(u)>

template<typename _FloatType, int OutDimension, typename _InputType, typename Store> class UnflattenLayer

Public Types

typedef _FloatType FloatType

typedef _InputType InputType

typedef LeafTag tag

Public Functions

typedef LAYEROUTPUTTYPE (typename Store::type) LayerInputTensorType

inline UnflattenLayer(Store &&leaf, int const *output_tens_size)

UnflattenLayer(const UnflattenLayer &r) = delete

UnflattenLayer(UnflattenLayer &&r) = default

Tensor<FloatType, OutDimension> value(const InputType &x)

int deriv(Vector<FloatType> &cost_deriv, int off, Tensor<FloatType, OutDimension> &&_above_deriv, InputType *input_above_deriv_return = nullptr) const

int update(int off, const Vector<FloatType> &new_params)

int step(int off, const Vector<FloatType> &derivs, FloatType eps)

inline int nparams() const

inline size_t FLOPS(int value_or_deriv) const

int getParams(Vector<FloatType> &into, int off) const

inline void resizeInputBuffer(size_t to)

Private Members

Store leaf

int _output_tens_size[OutDimension]