1 /// A memory allocator for Gfx-d
2 module gfx.memalloc;
3 
4 import gfx.core.log :       LogTag;
5 import gfx.core.rc :        AtomicRefCounted, IAtomicRefCounted;
6 import gfx.graal.device :   Device;
7 import gfx.graal.memory :   DeviceMemory, MemoryProperties, MemoryRequirements,
8                             MemoryType, MemProps;
9 
10 enum gfxMemallocLogMask = 0x1000_0000;
11 package immutable gfxMemallocLog = LogTag("GFX-MEMALLOC", gfxMemallocLogMask);
12 
13 /// Option flags for creating an Allocator
14 enum AllocatorFlags
15 {
16     /// Default behavior, no flag set
17     none = 0,
18     /// If set, each allocation will have a dedicated DeviceMemory
19     /// Even if not set, some backends (i.e. OpenGL) require a dedicated DeviceMemory
20     /// per allocation and will do so regardless of this flag.
21     dedicatedOnly = 1,
22 }
23 
24 /// Option to define allocation behavior for each heap of the device
25 struct HeapOptions
26 {
27     /// How many bytes may be use on the heap.
28     /// set to 0 to forbid use of a specific heap and to size_t.max to allow entire use
29     size_t maxUsage = size_t.max;
30     /// Size of a single DeviceMemory on this heap.
31     /// set to 0 to use default behavior for this heap
32     size_t blockSize = 0;
33 }
34 
35 /// Options for the creation of an Allocator
36 struct AllocatorOptions
37 {
38     /// option flags
39     AllocatorFlags flags;
40     /// One HeapOption per heap in the device, or empty to use default behavior.
41     /// Default behavior is to allow use of entire heap. Default block size is
42     /// 256Mb for heaps > 1Gb, and heapSize/8 for smaller ones.
43     HeapOptions[] heapOptions;
44 }
45 
46 /// Flags controlling an allocation of memory
47 enum AllocFlags {
48     /// default behavior, no flags.
49     none            = 0,
50     /// Set to force the creation of a new DeviceMemory, that will be dedicated for the allocation.
51     dedicated       = 1,
52     /// Set to prohib the creation of a new DeviceMemory. This forces the use of an existing chunk, and fails if it cannot find one.
53     neverAllocate   = 2,
54 }
55 
56 /// Describes the usage of a memory allocation
57 enum MemoryUsage {
58     /// No intended usage. The type of memory will not be influenced by the usage.
59     unknown,
60     /// Memory will be used on device only (MemProps.deviceLocal) and having it mappable
61     /// on host is not requested (although it is possible on some devices).
62     /// Usage:
63     /// $(UL
64     ///   $(LI Resources written and read by device, e.g. images used as attachments. )
65     ///   $(LI Resources transferred from host once or infrequently and read by device multiple times,
66     ///        e.g. textures, vertex bufers, uniforms etc. )
67     /// )
68     gpuOnly,
69     /// Memory will be mappable on host. It usually means CPU (system) memory.
70     /// Resources created for this usage may still be accessible to the device,
71     /// but access to them can be slower. Guarantees to be MemProps.hostVisible and MemProps.hostCoherent.
72     /// Usage:
73     /// $(UL $(LI Staging copy of resources used as transfer source.))
74     cpuOnly,
75     /// Memory that is both mappable on host (guarantees to be MemProps.hostVisible)
76     /// and preferably fast to access by GPU. CPU reads may be uncached and very slow.
77     /// Usage:
78     /// $(UL $(LI Resources written frequently by host (dynamic), read by device.
79     /// E.g. textures, vertex buffers, uniform buffers updated every frame or every draw call.))
80     cpuToGpu,
81     /// Memory mappable on host (guarantees to be MemProps.hostVisible) and cached.
82     /// Usage:
83     /// $(UL
84     ///     $(LI Resources written by device, read by host - results of some computations,
85     ///          e.g. screen capture, average scene luminance for HDR tone mapping.)
86     ///     $(LI Any resources read or accessed randomly on host, e.g. CPU-side copy of
87     ///          vertex buffer used as source of transfer, but also used for collision detection.)
88     /// )
89     gpuToCpu,
90 }
91 
92 /// Structure controlling an allocation of memory
93 struct AllocOptions {
94     /// Control flags
95     AllocFlags flags;
96     /// Intended usage. Will affect preferredBits and requiredBits;
97     MemoryUsage usage;
98     /// MemProps bits that are optional but are preferred to be present.
99     /// Allocation will favor memory types with these bits if available, but may
100     /// fallback to other memory types.
101     MemProps preferredProps;
102     /// MemProps bits that must be set.
103     /// Allocation will fail if it can't allocate a memory type satisfies all bits.
104     MemProps requiredProps;
105     /// mask of memory type indices (0b0101 means indices 0 and 2) that, if not
106     /// zero, will constrain MemoryRequirement.memTypeMask
107     uint memTypeIndexMask;
108 
109     /// Initializes an AllocOptions with usage
110     static @property AllocOptions forUsage(MemoryUsage usage) {
111         AllocOptions options;
112         options.usage = usage;
113         return options;
114     }
115     /// set flags to options
116     AllocOptions withFlags(AllocFlags flags) {
117         this.flags = flags;
118         return this;
119     }
120     /// set preferredProps to options
121     AllocOptions withPreferredProps(MemProps props) {
122         this.preferredProps = props;
123         return this;
124     }
125     /// set requiredProps to options
126     AllocOptions withRequiredBits(MemProps props) {
127         this.requiredProps = props;
128         return this;
129     }
130     /// set type index mask to options
131     AllocOptions withTypeIndexMask(uint indexMask) {
132         this.memTypeIndexMask = indexMask;
133         return this;
134     }
135 }
136 
137 
138 
139 /// Create an Allocator for device and options
140 Allocator createAllocator(Device device, AllocatorOptions options)
141 {
142     import gfx.graal : Backend;
143     if ((options.flags & AllocatorFlags.dedicatedOnly) || device.instance.backend == Backend.gl3) {
144         import gfx.memalloc.dedicated : DedicatedAllocator;
145         return new DedicatedAllocator(device, options);
146     }
147     else {
148         import gfx.memalloc.pool : PoolAllocator;
149         return new PoolAllocator(device, options);
150     }
151 }
152 
153 /// Memory allocator for a device
154 abstract class Allocator : AtomicRefCounted
155 {
156     import gfx.core.rc : Rc;
157     import gfx.graal.buffer : BufferUsage;
158     import gfx.graal.image : ImageInfo;
159 
160     package Device _device;
161     package AllocatorOptions _options;
162     package MemoryProperties _memProps;
163     package size_t _linearOptimalGranularity;
164 
165     package this(Device device, AllocatorOptions options)
166     {
167         import gfx.core.rc : retainObj;
168 
169         _device = retainObj(device);
170         _options = options;
171         _memProps = device.physicalDevice.memoryProperties;
172         _linearOptimalGranularity = device.physicalDevice.limits.linearOptimalGranularity;
173 
174         import std.algorithm : all;
175         import std.exception : enforce;
176         enforce(_memProps.types.all!(mt => mt.heapIndex < _memProps.heaps.length));
177     }
178 
179     override void dispose()
180     {
181         import gfx.core.rc : releaseObj;
182 
183         releaseObj(_device);
184     }
185 
186     /// Device this allocator is bound to.
187     final @property Device device() {
188         return _device;
189     }
190 
191     /// Allocate memory for the given requirements
192     /// Returns: A MemAlloc object
193     /// Throws: An Exception if memory could not be allocated
194     final MemAlloc allocate (in MemoryRequirements requirements,
195                              in AllocOptions options=AllocOptions.init)
196     {
197         AllocResult res;
198         if (allocateRaw(requirements, options, ResourceLayout.unknown, res)) {
199             return new MemAlloc(
200                 res.mem, res.offset, requirements.size, res.block, res.blockData
201             );
202         }
203         else {
204             import std.format : format;
205             throw new Exception(format(
206                 "Could not allocate memory for requirements: %s", requirements
207             ));
208         }
209     }
210 
211     /// Create a buffer, then allocate and bind memory for its requirements
212     final BufferAlloc allocateBuffer (in BufferUsage usage, in size_t size,
213                                       in AllocOptions options=AllocOptions.init)
214     {
215         auto buf = _device.createBuffer(usage, size);
216         const requirements = buf.memoryRequirements;
217         AllocResult res;
218         if (allocateRaw(requirements, options, ResourceLayout.linear, res)) {
219             buf.bindMemory(res.mem, res.offset);
220             return new BufferAlloc(
221                 buf, res.offset, requirements.size, res.block, res.blockData
222             );
223         }
224         else {
225             import std.format : format;
226             throw new Exception(format(
227                 "Could not allocate memory for buffer with usage %s and size %s",
228                 usage, size
229             ));
230         }
231     }
232 
233     /// Create an image, then allocate and bind memory for its requirements
234     final ImageAlloc allocateImage (in ImageInfo info,
235                                     in AllocOptions options=AllocOptions.init)
236     {
237         import gfx.graal.image : ImageTiling;
238 
239         auto img = _device.createImage(info);
240         const requirements = img.memoryRequirements;
241         const layout = info.tiling == ImageTiling.optimal ? ResourceLayout.optimal : ResourceLayout.linear;
242         AllocResult res;
243         if (allocateRaw(requirements, options, layout, res)) {
244             img.bindMemory(res.mem, res.offset);
245             return new ImageAlloc(
246                 img, res.offset, requirements.size, res.block, res.blockData
247             );
248         }
249         else {
250             import std.format : format;
251             throw new Exception(format(
252                 "Could not allocate memory for image with info %s", info
253             ));
254         }
255     }
256 
257     AllocStats collectStats() {
258         return AllocStats.init;
259     }
260 
261     /// Attempt to allocate memory for the given index and for given requirements.
262     /// If successful, result is filled with necessary data.
263     /// Returns: true if successful, false otherwise.
264     abstract protected bool tryAllocate (in MemoryRequirements requirements,
265                                          in uint memoryTypeIndex,
266                                          in AllocOptions options,
267                                          in ResourceLayout layout,
268                                          ref AllocResult result)
269     in {
270         assert(memoryTypeIndex < _memProps.types.length);
271         assert(
272             ((1 << memoryTypeIndex) & requirements.memTypeMask) != 0,
273             "memoryTypeIndex is not compatible with requirements"
274         );
275     }
276 
277     private final bool allocateRaw (in MemoryRequirements requirements,
278                                     in AllocOptions options,
279                                     in ResourceLayout layout,
280                                     ref AllocResult result)
281     {
282         uint allowedMask = requirements.memTypeMask;
283         uint index = findMemoryTypeIndex(_memProps.types, allowedMask, options);
284         if (index != uint.max) {
285             if (tryAllocate(requirements, index, options, layout, result)) return true;
286 
287             while (allowedMask != 0) {
288                 // retrieve former index from possible choices
289                 allowedMask &= ~(1 << index);
290                 index = findMemoryTypeIndex(_memProps.types, allowedMask, options);
291                 if (index == uint.max) continue;
292                 if (tryAllocate(requirements, index, options, layout, result)) return true;
293             }
294         }
295 
296         return false;
297     }
298 }
299 
300 
301 /// Represent a single allocation within a DeviceMemory
302 class MemAlloc : AtomicRefCounted
303 {
304     import gfx.core.rc : Rc;
305     import gfx.graal.memory : MemoryMap;
306 
307     private DeviceMemory _mem;
308     private size_t _offset;
309     private size_t _size;
310     private MemBlock _block;
311     private Object _blockData;
312     private size_t _mapCount;
313     private void* _mapPtr;
314     private bool _dedicated;
315 
316     package this(DeviceMemory mem, size_t offset, size_t size,
317                  MemBlock block, Object blockData)
318     {
319         import gfx.core.rc : retainObj;
320 
321         _mem = retainObj(mem);
322         _offset = offset;
323         _size = size;
324         _block = retainObj(block);
325         _blockData = blockData;
326         _dedicated = mem.size == size;
327     }
328 
329     override void dispose()
330     {
331         import gfx.core.rc : releaseObj;
332 
333         _block.free(_blockData);
334         releaseObj(_mem);
335         releaseObj(_block);
336     }
337 
338     final @property size_t offset() const {
339         return _offset;
340     }
341 
342     final @property size_t size() const {
343         return _size;
344     }
345 
346     final @property DeviceMemory mem() {
347         return _mem;
348     }
349 
350     /// Artificially increment the mapping reference count in order
351     /// to keep the memory mapped even if no MemoryMap is alive
352     final void retainMap() {
353         if (_dedicated) {
354             dedicatedMap();
355         }
356         else {
357             _block.map();
358         }
359     }
360 
361     final void releaseMap() {
362         if (_dedicated) {
363             dedicatedUnmap();
364         }
365         else {
366             _block.unmap();
367         }
368     }
369 
370     final MemoryMap map(in size_t offset=0, in size_t size=size_t.max)
371     {
372         import std.algorithm : min;
373 
374         const off = this.offset + offset;
375         const sz = min(this.size-offset, size);
376         void* ptr;
377         void delegate() unmap;
378 
379         if (_dedicated) {
380             dedicatedMap();
381             ptr = _mapPtr;
382             unmap = &dedicatedUnmap;
383         }
384         else {
385             ptr = _block.map();
386             unmap = &_block.unmap;
387         }
388 
389         auto data = ptr[off .. off+sz];
390         return MemoryMap (_mem, off, data, unmap);
391     }
392 
393     private void dedicatedMap() {
394         if (!_mapCount) _mapPtr = _mem.mapRaw(0, _mem.size);
395         ++_mapCount;
396     }
397 
398     private void dedicatedUnmap() {
399         --_mapCount;
400         if (!_mapCount) {
401             _mem.unmapRaw();
402             _mapPtr = null;
403         }
404     }
405 }
406 
407 final class BufferAlloc : MemAlloc
408 {
409     import gfx.graal.buffer : Buffer;
410 
411     private Buffer _buffer;
412 
413     package this (Buffer buffer, size_t offset, size_t size, MemBlock block, Object blockData)
414     {
415         import gfx.core.rc : retainObj;
416 
417         super(buffer.boundMemory, offset, size, block, blockData);
418         _buffer = retainObj(buffer);
419     }
420 
421     override void dispose()
422     {
423         import gfx.core.rc : releaseObj;
424 
425         releaseObj(_buffer);
426         super.dispose();
427     }
428 
429     final @property Buffer buffer() {
430         return _buffer;
431     }
432 }
433 
434 final class ImageAlloc : MemAlloc
435 {
436     import gfx.graal.image : Image;
437 
438     private Image _image;
439 
440     package this (Image image, size_t offset, size_t size, MemBlock block, Object blockData)
441     {
442         import gfx.core.rc : retainObj;
443 
444         super(image.boundMemory, offset, size, block, blockData);
445         _image = retainObj(image);
446     }
447 
448     override void dispose()
449     {
450         import gfx.core.rc : releaseObj;
451 
452         releaseObj(_image);
453         super.dispose();
454     }
455 
456     final @property Image image() {
457         return _image;
458     }
459 }
460 
461 
462 /// Find a memory type index suitable for the given allowedIndexMask and info.
463 /// Params:
464 ///     types               = the memory types obtained from a device
465 ///     allowedIndexMask    = the mask obtained from MemoryRequirements.memTypeMask
466 ///     options             = an optional AllocOptions that will constraint the
467 ///                           choice
468 /// Returns: the found index of memory type, or uint.max if none could satisfy requirements
469 uint findMemoryTypeIndex(in MemoryType[] types,
470                          in uint allowedIndexMask,
471                          in AllocOptions options=AllocOptions.init)
472 {
473     const allowedMask = options.memTypeIndexMask != 0 ?
474             allowedIndexMask & options.memTypeIndexMask :
475             allowedIndexMask;
476 
477     MemProps preferred = options.preferredProps;
478     MemProps required = options.requiredProps;
479 
480     switch (options.usage) {
481     case MemoryUsage.gpuOnly:
482         preferred |= MemProps.deviceLocal;
483         break;
484     case MemoryUsage.cpuOnly:
485         required |= MemProps.hostVisible | MemProps.hostCoherent;
486         break;
487     case MemoryUsage.cpuToGpu:
488         required |= MemProps.hostVisible;
489         preferred |= MemProps.deviceLocal;
490         break;
491     case MemoryUsage.gpuToCpu:
492         required |= MemProps.hostVisible;
493         preferred |= MemProps.hostCoherent | MemProps.hostCached;
494         break;
495     case MemoryUsage.unknown:
496     default:
497         break;
498     }
499 
500     uint maxValue = uint.max;
501     uint index = uint.max;
502 
503     foreach (i; 0 .. cast(uint)types.length) {
504         const mask = 1 << i;
505         // is this type allowed?
506         if ((allowedMask & mask) != 0) {
507             const props = types[i].props;
508             // does it have the required properties?
509             if ((props & required) == required) {
510                 // it is a valid candidate. calcaulate its value as number of
511                 // preferred flags present
512                 import core.bitop : popcnt;
513                 const value = popcnt(cast(uint)(props & preferred));
514                 if (maxValue == uint.max || value > maxValue) {
515                     index = i;
516                     maxValue = value;
517                 }
518             }
519         }
520     }
521     return index;
522 }
523 
524 /// Layout of a resource
525 /// This is important to determined whether a page alignment or simple alignemnt
526 /// is necessary between two consecutive resources
527 enum ResourceLayout {
528     /// layout is unknown
529     unknown,
530     /// layout of buffers and linear images
531     linear,
532     /// layout of optimal images
533     optimal,
534 }
535 
536 /// Some stats of an allocator that can be collected with Allocator.collectStats
537 struct AllocStats
538 {
539     /// A chunk is a suballocation from a block
540     static struct Chunk
541     {
542         size_t start;
543         size_t end;
544         bool occupied;
545         ResourceLayout layout;
546     }
547 
548     /// A block is a one to one mapping on a DeviceMemory
549     static struct Block
550     {
551         size_t size;
552         Chunk[] chunks;
553     }
554 
555     size_t totalReserved;
556     size_t totalUsed;
557     size_t totalFrag;
558     size_t linearOptimalGranularity;
559     Block[] blocks;
560 
561     string toString()
562     {
563         import std.format : format;
564         string res = "AllocStats (\n";
565 
566         res ~= format("  total reserved: %s\n", totalReserved);
567         res ~= format("  total used    : %s\n", totalUsed);
568         res ~= format("  total frag    : %s\n", totalFrag);
569         res ~= format("  granularity   : %s\n", linearOptimalGranularity);
570 
571         foreach (b; blocks) {
572             res ~= "  DeviceMemory (\n";
573             res ~= format("    size: %s\n", b.size);
574             foreach (c; b.chunks) {
575                 res ~= "    Resource (\n";
576 
577                 res ~= format("      start   : %s\n", c.start);
578                 res ~= format("      end     : %s\n", c.end);
579                 res ~= format("      occupied: %s\n", c.occupied);
580                 res ~= format("      layout  : %s\n", c.layout);
581 
582                 res ~= "    )\n";
583             }
584 
585             res ~= "  )\n";
586         }
587 
588         res ~= ")\n";
589         return res;
590     }
591 }
592 
593 
594 package:
595 
596 /// A block of memory associated to one DeviceMemory
597 interface MemBlock : IAtomicRefCounted
598 {
599     /// increase map count and return cached pointer
600     /// if map count was zero, it maps the memory to the cached pointer before
601     void* map();
602     /// decrease map count and unmap memory if it reaches zero
603     void unmap();
604     /// called by MemAlloc when it is disposed to notify its memory block
605     void free(Object blockData);
606 }
607 
608 /// The result of allocation request
609 struct AllocResult
610 {
611     DeviceMemory mem;
612     size_t offset;
613     MemBlock block;
614     Object blockData;
615 }
616 
617 /// whether two adjacent block should check for granularity alignment
618 bool granularityMatters(in ResourceLayout l1, in ResourceLayout l2) pure
619 {
620     if (l1 == ResourceLayout.unknown || l2 == ResourceLayout.unknown) return true;
621     return l1 != l2;
622 }
623 
624 unittest {
625     assert(!granularityMatters(ResourceLayout.linear, ResourceLayout.linear));
626     assert(!granularityMatters(ResourceLayout.optimal, ResourceLayout.optimal));
627     assert( granularityMatters(ResourceLayout.linear, ResourceLayout.optimal));
628     assert( granularityMatters(ResourceLayout.optimal, ResourceLayout.linear));
629 }