1 /// A memory allocator for Gfx-d
2 module gfx.memalloc;
3 
4 import gfx.core.log :       LogTag;
5 import gfx.core.rc :        AtomicRefCounted;
6 import gfx.graal.device :   Device;
7 import gfx.graal.memory :   DeviceMemory, MemoryProperties, MemoryRequirements,
8                             MemoryType, MemProps;
9 
10 enum gfxMemallocLogMask = 0x1000_0000;
11 package immutable gfxMemallocLog = LogTag("GFX-MEMALLOC", gfxMemallocLogMask);
12 
13 /// Option flags for creating an Allocator
14 enum AllocatorFlags
15 {
16     /// Default behavior, no flag set
17     none = 0,
18     /// If set, each allocation will have a dedicated DeviceMemory
19     /// Even if not set, some backends (i.e. OpenGL) require a dedicated DeviceMemory
20     /// per allocation and will do so regardless of this flag.
21     dedicatedOnly = 1,
22 }
23 
24 /// Option to define allocation behavior for each heap of the device
25 struct HeapOptions
26 {
27     /// How many bytes may be use on the heap.
28     /// set to 0 to forbid use of a specific heap and to size_t.max to allow entire use
29     size_t maxUsage = size_t.max;
30     /// Size of a single DeviceMemory on this heap.
31     /// set to 0 to use default behavior for this heap
32     size_t blockSize = 0;
33 }
34 
35 /// Options for the creation of an Allocator
36 struct AllocatorOptions
37 {
38     /// option flags
39     AllocatorFlags flags;
40     /// One HeapOption per heap in the device, or empty to use default behavior.
41     /// Default behavior is to allow use of entire heap. Default block size is
42     /// 256Mb for heaps > 1Gb, and heapSize/8 for smaller ones.
43     HeapOptions[] heapOptions;
44 }
45 
46 /// Flags controlling an allocation of memory
47 enum AllocFlags {
48     /// default behavior, no flags.
49     none            = 0,
50     /// Set to force the creation of a new DeviceMemory, that will be dedicated for the allocation.
51     dedicated       = 1,
52     /// Set to prohib the creation of a new DeviceMemory. This forces the use of an existing chunk, and fails if it cannot find one.
53     neverAllocate   = 2,
54 }
55 
56 /// Describes the usage of a memory allocation
57 enum MemoryUsage {
58     /// No intended usage. The type of memory will not be influenced by the usage.
59     unknown,
60     /// Memory will be used on device only (MemProps.deviceLocal) and having it mappable
61     /// on host is not requested (although it is possible on some devices).
62     /// Usage:
63     /// $(UL
64     ///   $(LI Resources written and read by device, e.g. images used as attachments. )
65     ///   $(LI Resources transferred from host once or infrequently and read by device multiple times,
66     ///        e.g. textures, vertex bufers, uniforms etc. )
67     /// )
68     gpuOnly,
69     /// Memory will be mappable on host. It usually means CPU (system) memory.
70     /// Resources created for this usage may still be accessible to the device,
71     /// but access to them can be slower. Guarantees to be MemProps.hostVisible and MemProps.hostCoherent.
72     /// Usage:
73     /// $(UL $(LI Staging copy of resources used as transfer source.))
74     cpuOnly,
75     /// Memory that is both mappable on host (guarantees to be MemProps.hostVisible)
76     /// and preferably fast to access by GPU. CPU reads may be uncached and very slow.
77     /// Usage:
78     /// $(UL $(LI Resources written frequently by host (dynamic), read by device.
79     /// E.g. textures, vertex buffers, uniform buffers updated every frame or every draw call.))
80     cpuToGpu,
81     /// Memory mappable on host (guarantees to be MemProps.hostVisible) and cached.
82     /// Usage:
83     /// $(UL
84     ///     $(LI Resources written by device, read by host - results of some computations,
85     ///          e.g. screen capture, average scene luminance for HDR tone mapping.)
86     ///     $(LI Any resources read or accessed randomly on host, e.g. CPU-side copy of
87     ///          vertex buffer used as source of transfer, but also used for collision detection.)
88     /// )
89     gpuToCpu,
90 }
91 
92 /// Structure controlling an allocation of memory
93 struct AllocOptions {
94     /// Control flags
95     AllocFlags flags;
96     /// Intended usage. Will affect preferredBits and requiredBits;
97     MemoryUsage usage;
98     /// MemProps bits that are optional but are preferred to be present.
99     /// Allocation will favor memory types with these bits if available, but may
100     /// fallback to other memory types.
101     MemProps preferredProps;
102     /// MemProps bits that must be set.
103     /// Allocation will fail if it can't allocate a memory type satisfies all bits.
104     MemProps requiredProps;
105     /// mask of memory type indices (0b0101 means indices 0 and 2) that, if not
106     /// zero, will constrain MemoryRequirement.memTypeMask
107     uint memTypeIndexMask;
108 
109     /// Initializes an AllocOptions with usage
110     static @property AllocOptions forUsage(MemoryUsage usage) {
111         AllocOptions options;
112         options.usage = usage;
113         return options;
114     }
115     /// set flags to options
116     AllocOptions withFlags(AllocFlags flags) {
117         this.flags = flags;
118         return this;
119     }
120     /// set preferredProps to options
121     AllocOptions withPreferredProps(MemProps props) {
122         this.preferredProps = props;
123         return this;
124     }
125     /// set requiredProps to options
126     AllocOptions withRequiredBits(MemProps props) {
127         this.requiredProps = props;
128         return this;
129     }
130     /// set type index mask to options
131     AllocOptions withTypeIndexMask(uint indexMask) {
132         this.memTypeIndexMask = indexMask;
133         return this;
134     }
135 }
136 
137 
138 
139 /// Create an Allocator for device and options
140 Allocator createAllocator(Device device, AllocatorOptions options)
141 {
142     import gfx.graal : Backend;
143     if ((options.flags & AllocatorFlags.dedicatedOnly) || device.instance.backend == Backend.gl3) {
144         import gfx.memalloc.dedicated : DedicatedAllocator;
145         return new DedicatedAllocator(device, options);
146     }
147     else {
148         import gfx.memalloc.pool : PoolAllocator;
149         return new PoolAllocator(device, options);
150     }
151 }
152 
153 /// Memory allocator for a device
154 abstract class Allocator : AtomicRefCounted
155 {
156     import gfx.core.rc : atomicRcCode, Rc;
157     import gfx.graal.buffer : BufferUsage;
158     import gfx.graal.image : ImageInfo;
159 
160     mixin(atomicRcCode);
161 
162     package Device _device;
163     package AllocatorOptions _options;
164     package MemoryProperties _memProps;
165     package size_t _linearOptimalGranularity;
166 
167     package this(Device device, AllocatorOptions options)
168     {
169         import gfx.core.rc : retainObj;
170 
171         _device = retainObj(device);
172         _options = options;
173         _memProps = device.physicalDevice.memoryProperties;
174         _linearOptimalGranularity = device.physicalDevice.limits.linearOptimalGranularity;
175 
176         import std.algorithm : all;
177         import std.exception : enforce;
178         enforce(_memProps.types.all!(mt => mt.heapIndex < _memProps.heaps.length));
179     }
180 
181     override void dispose()
182     {
183         import gfx.core.rc : releaseObj;
184 
185         releaseObj(_device);
186     }
187 
188     /// Device this allocator is bound to.
189     final @property Device device() {
190         return _device;
191     }
192 
193     /// Allocate memory for the given requirements
194     /// Returns: A MemAlloc object
195     /// Throws: An Exception if memory could not be allocated
196     final MemAlloc allocate (in MemoryRequirements requirements,
197                              in AllocOptions options=AllocOptions.init)
198     {
199         AllocResult res;
200         if (allocateRaw(requirements, options, ResourceLayout.unknown, res)) {
201             return new MemAlloc(
202                 res.mem, res.offset, requirements.size, res.block, res.blockData
203             );
204         }
205         else {
206             import std.format : format;
207             throw new Exception(format(
208                 "Could not allocate memory for requirements: %s", requirements
209             ));
210         }
211     }
212 
213     /// Create a buffer, then allocate and bind memory for its requirements
214     final BufferAlloc allocateBuffer (in BufferUsage usage, in size_t size,
215                                       in AllocOptions options=AllocOptions.init)
216     {
217         auto buf = _device.createBuffer(usage, size);
218         const requirements = buf.memoryRequirements;
219         AllocResult res;
220         if (allocateRaw(requirements, options, ResourceLayout.linear, res)) {
221             buf.bindMemory(res.mem, res.offset);
222             return new BufferAlloc(
223                 buf, res.offset, requirements.size, res.block, res.blockData
224             );
225         }
226         else {
227             import std.format : format;
228             throw new Exception(format(
229                 "Could not allocate memory for buffer with usage %s and size %s",
230                 usage, size
231             ));
232         }
233     }
234 
235     /// Create an image, then allocate and bind memory for its requirements
236     final ImageAlloc allocateImage (in ImageInfo info,
237                                     in AllocOptions options=AllocOptions.init)
238     {
239         import gfx.graal.image : ImageTiling;
240 
241         auto img = _device.createImage(info);
242         const requirements = img.memoryRequirements;
243         const layout = info.tiling == ImageTiling.optimal ? ResourceLayout.optimal : ResourceLayout.linear;
244         AllocResult res;
245         if (allocateRaw(requirements, options, layout, res)) {
246             img.bindMemory(res.mem, res.offset);
247             return new ImageAlloc(
248                 img, res.offset, requirements.size, res.block, res.blockData
249             );
250         }
251         else {
252             import std.format : format;
253             throw new Exception(format(
254                 "Could not allocate memory for image with info %s", info
255             ));
256         }
257     }
258 
259     AllocStats collectStats() {
260         return AllocStats.init;
261     }
262 
263     /// Attempt to allocate memory for the given index and for given requirements.
264     /// If successful, result is filled with necessary data.
265     /// Returns: true if successful, false otherwise.
266     abstract protected bool tryAllocate (in MemoryRequirements requirements,
267                                          in uint memoryTypeIndex,
268                                          in AllocOptions options,
269                                          in ResourceLayout layout,
270                                          ref AllocResult result)
271     in {
272         assert(memoryTypeIndex < _memProps.types.length);
273         assert(
274             ((1 << memoryTypeIndex) & requirements.memTypeMask) != 0,
275             "memoryTypeIndex is not compatible with requirements"
276         );
277     }
278 
279     private final bool allocateRaw (in MemoryRequirements requirements,
280                                     in AllocOptions options,
281                                     in ResourceLayout layout,
282                                     ref AllocResult result)
283     {
284         uint allowedMask = requirements.memTypeMask;
285         uint index = findMemoryTypeIndex(_memProps.types, allowedMask, options);
286         if (index != uint.max) {
287             if (tryAllocate(requirements, index, options, layout, result)) return true;
288 
289             while (allowedMask != 0) {
290                 // retrieve former index from possible choices
291                 allowedMask &= ~(1 << index);
292                 index = findMemoryTypeIndex(_memProps.types, allowedMask, options);
293                 if (index == uint.max) continue;
294                 if (tryAllocate(requirements, index, options, layout, result)) return true;
295             }
296         }
297 
298         return false;
299     }
300 }
301 
302 
303 /// Represent a single allocation within a DeviceMemory
304 class MemAlloc : AtomicRefCounted
305 {
306     import gfx.core.rc : atomicRcCode, Rc;
307     import gfx.graal.memory : MemoryMap;
308 
309     mixin(atomicRcCode);
310 
311     private DeviceMemory _mem;
312     private size_t _offset;
313     private size_t _size;
314     private MemBlock _block;
315     private Object _blockData;
316     private size_t _mapCount;
317     private void* _mapPtr;
318     private bool _dedicated;
319 
320     package this(DeviceMemory mem, size_t offset, size_t size,
321                  MemBlock block, Object blockData)
322     {
323         import gfx.core.rc : retainObj;
324 
325         _mem = retainObj(mem);
326         _offset = offset;
327         _size = size;
328         _block = retainObj(block);
329         _blockData = blockData;
330         _dedicated = mem.size == size;
331     }
332 
333     override void dispose()
334     {
335         import gfx.core.rc : releaseObj;
336 
337         _block.free(_blockData);
338         releaseObj(_mem);
339         releaseObj(_block);
340     }
341 
342     final @property size_t offset() const {
343         return _offset;
344     }
345 
346     final @property size_t size() const {
347         return _size;
348     }
349 
350     final @property DeviceMemory mem() {
351         return _mem;
352     }
353 
354     /// Artificially increment the mapping reference count in order
355     /// to keep the memory mapped even if no MemoryMap is alive
356     final void retainMap() {
357         if (_dedicated) {
358             dedicatedMap();
359         }
360         else {
361             _block.map();
362         }
363     }
364 
365     final void releaseMap() {
366         if (_dedicated) {
367             dedicatedUnmap();
368         }
369         else {
370             _block.unmap();
371         }
372     }
373 
374     final MemoryMap map(in size_t offset=0, in size_t size=size_t.max)
375     {
376         import std.algorithm : min;
377 
378         const off = this.offset + offset;
379         const sz = min(this.size-offset, size);
380         void* ptr;
381         void delegate() unmap;
382 
383         if (_dedicated) {
384             dedicatedMap();
385             ptr = _mapPtr;
386             unmap = &dedicatedUnmap;
387         }
388         else {
389             ptr = _block.map();
390             unmap = &_block.unmap;
391         }
392 
393         auto data = ptr[off .. off+sz];
394         return MemoryMap (_mem, off, data, unmap);
395     }
396 
397     private void dedicatedMap() {
398         if (!_mapCount) _mapPtr = _mem.mapRaw(0, _mem.size);
399         ++_mapCount;
400     }
401 
402     private void dedicatedUnmap() {
403         --_mapCount;
404         if (!_mapCount) {
405             _mem.unmapRaw();
406             _mapPtr = null;
407         }
408     }
409 }
410 
411 final class BufferAlloc : MemAlloc
412 {
413     import gfx.graal.buffer : Buffer;
414 
415     private Buffer _buffer;
416 
417     package this (Buffer buffer, size_t offset, size_t size, MemBlock block, Object blockData)
418     {
419         import gfx.core.rc : retainObj;
420 
421         super(buffer.boundMemory, offset, size, block, blockData);
422         _buffer = retainObj(buffer);
423     }
424 
425     override void dispose()
426     {
427         import gfx.core.rc : releaseObj;
428 
429         releaseObj(_buffer);
430         super.dispose();
431     }
432 
433     final @property Buffer buffer() {
434         return _buffer;
435     }
436 }
437 
438 final class ImageAlloc : MemAlloc
439 {
440     import gfx.graal.image : Image;
441 
442     private Image _image;
443 
444     package this (Image image, size_t offset, size_t size, MemBlock block, Object blockData)
445     {
446         import gfx.core.rc : retainObj;
447 
448         super(image.boundMemory, offset, size, block, blockData);
449         _image = retainObj(image);
450     }
451 
452     override void dispose()
453     {
454         import gfx.core.rc : releaseObj;
455 
456         releaseObj(_image);
457         super.dispose();
458     }
459 
460     final @property Image image() {
461         return _image;
462     }
463 }
464 
465 
466 /// Find a memory type index suitable for the given allowedIndexMask and info.
467 /// Params:
468 ///     types               = the memory types obtained from a device
469 ///     allowedIndexMask    = the mask obtained from MemoryRequirements.memTypeMask
470 ///     info                = an optional AllocOptions that will constraint the
471 ///                           choice
472 /// Returns: the found index of memory type, or uint.max if none could satisfy requirements
473 uint findMemoryTypeIndex(in MemoryType[] types,
474                          in uint allowedIndexMask,
475                          in AllocOptions options=AllocOptions.init)
476 {
477     const allowedMask = options.memTypeIndexMask != 0 ?
478             allowedIndexMask & options.memTypeIndexMask :
479             allowedIndexMask;
480 
481     MemProps preferred = options.preferredProps;
482     MemProps required = options.requiredProps;
483 
484     switch (options.usage) {
485     case MemoryUsage.gpuOnly:
486         preferred |= MemProps.deviceLocal;
487         break;
488     case MemoryUsage.cpuOnly:
489         required |= MemProps.hostVisible | MemProps.hostCoherent;
490         break;
491     case MemoryUsage.cpuToGpu:
492         required |= MemProps.hostVisible;
493         preferred |= MemProps.deviceLocal;
494         break;
495     case MemoryUsage.gpuToCpu:
496         required |= MemProps.hostVisible;
497         preferred |= MemProps.hostCoherent | MemProps.hostCached;
498         break;
499     case MemoryUsage.unknown:
500     default:
501         break;
502     }
503 
504     uint maxValue = uint.max;
505     uint index = uint.max;
506 
507     foreach (i; 0 .. cast(uint)types.length) {
508         const mask = 1 << i;
509         // is this type allowed?
510         if ((allowedMask & mask) != 0) {
511             const props = types[i].props;
512             // does it have the required properties?
513             if ((props & required) == required) {
514                 // it is a valid candidate. calcaulate its value as number of
515                 // preferred flags present
516                 import core.bitop : popcnt;
517                 const value = popcnt(cast(uint)(props & preferred));
518                 if (maxValue == uint.max || value > maxValue) {
519                     index = i;
520                     maxValue = value;
521                 }
522             }
523         }
524     }
525     return index;
526 }
527 
528 /// Layout of a resource
529 /// This is important to determined whether a page alignment or simple alignemnt
530 /// is necessary between two consecutive resources
531 enum ResourceLayout {
532     /// layout is unknown
533     unknown,
534     /// layout of buffers and linear images
535     linear,
536     /// layout of optimal images
537     optimal,
538 }
539 
540 /// Some stats of an allocator that can be collected with Allocator.collectStats
541 struct AllocStats
542 {
543     /// A chunk is a suballocation from a block
544     static struct Chunk
545     {
546         size_t start;
547         size_t end;
548         bool occupied;
549         ResourceLayout layout;
550     }
551 
552     /// A block is a one to one mapping on a DeviceMemory
553     static struct Block
554     {
555         size_t size;
556         Chunk[] chunks;
557     }
558 
559     size_t totalReserved;
560     size_t totalUsed;
561     size_t totalFrag;
562     size_t linearOptimalGranularity;
563     Block[] blocks;
564 
565     string toString()
566     {
567         import std.format : format;
568         string res = "AllocStats (\n";
569 
570         res ~= format("  total reserved: %s\n", totalReserved);
571         res ~= format("  total used    : %s\n", totalUsed);
572         res ~= format("  total frag    : %s\n", totalFrag);
573         res ~= format("  granularity   : %s\n", linearOptimalGranularity);
574 
575         foreach (b; blocks) {
576             res ~= "  DeviceMemory (\n";
577             res ~= format("    size: %s\n", b.size);
578             foreach (c; b.chunks) {
579                 res ~= "    Resource (\n";
580 
581                 res ~= format("      start   : %s\n", c.start);
582                 res ~= format("      end     : %s\n", c.end);
583                 res ~= format("      occupied: %s\n", c.occupied);
584                 res ~= format("      layout  : %s\n", c.layout);
585 
586                 res ~= "    )\n";
587             }
588 
589             res ~= "  )\n";
590         }
591 
592         res ~= ")\n";
593         return res;
594     }
595 }
596 
597 
598 package:
599 
600 /// A block of memory associated to one DeviceMemory
601 interface MemBlock : AtomicRefCounted
602 {
603     /// increase map count and return cached pointer
604     /// if map count was zero, it maps the memory to the cached pointer before
605     void* map();
606     /// decrease map count and unmap memory if it reaches zero
607     void unmap();
608     /// called by MemAlloc when it is disposed to notify its memory block
609     void free(Object blockData);
610 }
611 
612 /// The result of allocation request
613 struct AllocResult
614 {
615     DeviceMemory mem;
616     size_t offset;
617     MemBlock block;
618     Object blockData;
619 }
620 
621 /// whether two adjacent block should check for granularity alignment
622 bool granularityMatters(in ResourceLayout l1, in ResourceLayout l2) pure
623 {
624     if (l1 == ResourceLayout.unknown || l2 == ResourceLayout.unknown) return true;
625     return l1 != l2;
626 }
627 
628 unittest {
629     assert(!granularityMatters(ResourceLayout.linear, ResourceLayout.linear));
630     assert(!granularityMatters(ResourceLayout.optimal, ResourceLayout.optimal));
631     assert( granularityMatters(ResourceLayout.linear, ResourceLayout.optimal));
632     assert( granularityMatters(ResourceLayout.optimal, ResourceLayout.linear));
633 }