1 module gfx.memalloc.pool;
2 
3 package:
4 
5 import gfx.graal.device :   Device;
6 import gfx.graal.memory :   DeviceMemory, MemoryHeap, MemoryProperties,
7                             MemoryRequirements, MemoryType;
8 import gfx.memalloc;
9 
10 final class PoolAllocator : Allocator
11 {
12     MemoryPool[] _pools;
13 
14     this(Device device, AllocatorOptions options)
15     {
16         super(device, options);
17 
18         foreach (i, mh; _memProps.heaps) {
19             const heapOpts = _options.heapOptions.length>i ? _options.heapOptions[i] : HeapOptions.init;
20             _pools ~= new MemoryPool(this, cast(uint)i, heapOpts, mh, _memProps.types);
21         }
22     }
23 
24     override AllocStats collectStats() {
25         AllocStats stats;
26         stats.linearOptimalGranularity = _linearOptimalGranularity;
27         foreach (p; _pools) {
28             p.feedStats(stats);
29         }
30         return stats;
31     }
32 
33     override bool tryAllocate(in MemoryRequirements requirements,
34                               in uint memTypeIndex, in AllocOptions options,
35                               in ResourceLayout layout, ref AllocResult result)
36     {
37         auto pool = _pools[_memProps.types[memTypeIndex].heapIndex];
38         return pool.allocate(memTypeIndex, requirements.alignment, requirements.size, options, layout, result);
39     }
40 
41 }
42 
43 private:
44 
45 enum size_t largeHeapThreshold = 1024*1024*1024;
46 enum size_t largeHeapBlockSize =  256*1024*1024;
47 
48 
49 /// Represent an entire heap of memory.
50 /// It is made of several MemoryBlock, each of which is associated to a single
51 /// DeviceMemory object.
52 final class MemoryPool
53 {
54     PoolAllocator _allocator;
55     uint _memTypeIndex;
56     MemoryHeap _heap;
57     MemoryType[] _types;
58 
59     size_t _maxUsage;
60     size_t _blockSize;
61     size_t _used;
62 
63     MemoryBlock[] _blocks;
64 
65     this (PoolAllocator allocator, uint memTypeIndex, HeapOptions options, MemoryHeap heap, MemoryType[] types) {
66         _allocator = allocator;
67         _memTypeIndex = memTypeIndex;
68         _heap = heap;
69         _types = types;
70         _maxUsage = options.maxUsage;
71         _blockSize = options.blockSize;
72 
73         if (_maxUsage == size_t.max) {
74             _maxUsage = heap.size;
75         }
76         if (_blockSize == 0 || _blockSize > _maxUsage) {
77             if (heap.size > largeHeapThreshold) {
78                 _blockSize = largeHeapBlockSize;
79             }
80             else {
81                 import std.math : isPowerOf2, nextPow2;
82                 auto target = heap.size / 8;
83                 if (!isPowerOf2(target)) {
84                     target = nextPow2(target) / 2;
85                 }
86             }
87         }
88         if (_blockSize > _maxUsage) {
89             _blockSize = _maxUsage;
90         }
91     }
92 
93     bool allocate(uint memTypeIndex, size_t alignment, size_t size,
94                   in AllocOptions options, in ResourceLayout layout,
95                   ref AllocResult result)
96     {
97         import std.algorithm : filter;
98         if (!(options.flags & AllocFlags.dedicated)) {
99             foreach (b; _blocks.filter!(b => (b._typeIndex == memTypeIndex && !b._dedicated))) {
100                 if (b.allocate(alignment, size, layout, result)) return true;
101             }
102         }
103         if (options.flags & AllocFlags.neverAllocate) return false;
104         if (size > (_maxUsage-_used)) return false;
105         return newBlockAllocation(memTypeIndex, size, options, layout, result);
106     }
107 
108     void free(MemoryBlock block) {
109         import std.algorithm : remove;
110         _used += block._size;
111         _blocks = _blocks.remove!(b => b is block);
112     }
113 
114     void feedStats(ref AllocStats stats) {
115         foreach (b; _blocks) {
116             b.feedStats(stats);
117         }
118     }
119 
120     private bool newBlockAllocation(in uint memTypeIndex, in size_t size,
121                                     in AllocOptions options,
122                                     in ResourceLayout layout,
123                                     ref AllocResult result) {
124         Device dev = _allocator._device;
125         DeviceMemory dm;
126         try {
127             import std.algorithm : max;
128             auto sz = max(_blockSize, size);
129             if (options.flags & AllocFlags.dedicated) {
130                 sz = size;
131             }
132             dm = dev.allocateMemory(memTypeIndex, sz);
133         }
134         catch (Exception ex) {
135             return false;
136         }
137         auto b = new MemoryBlock(_allocator, this, dm, dm.size == size);
138         _used -= size;
139         _blocks ~= b;
140         return b.allocate(0, size, layout, result);
141     }
142 
143 }
144 
145 // linked list of memory chunks
146 // each chunk can be occupied (and is then associated to a single allocation)
147 // or free and is then merged with its free neighboors
148 final class MemoryChunk {
149     size_t offset;
150     size_t size;
151 
152     ResourceLayout layout;
153     bool occupied;
154 
155     MemoryChunk prev;
156     MemoryChunk next;
157 
158     this (size_t offset, size_t size, MemoryChunk prev=null, MemoryChunk next=null) {
159         this.offset = offset;
160         this.size = size;
161         this.prev = prev;
162         this.next = next;
163     }
164 
165     size_t pageBeg(in size_t granularity) {
166         return page(offset, granularity);
167     }
168 
169     size_t pageEnd(in size_t granularity) {
170         return page(offset+size-1, granularity);
171     }
172 }
173 
174 size_t alignUp(in size_t address, in size_t alignment) pure nothrow @nogc
175 {
176     if (alignment == 0) return address;
177     return ((address + alignment - 1) / alignment) * alignment;
178 }
179 
180 size_t page(in size_t addr, in size_t granularity) pure nothrow @nogc
181 {
182     return addr & ~(granularity-1);
183 }
184 
185 bool onSamePage(in size_t prevOffset, in size_t prevSize, in size_t nextOffset, in size_t granularity)
186 {
187     const prevPage = page(prevOffset+prevSize-1, granularity);
188     const nextPage = page(nextOffset, granularity);
189     return prevPage == nextPage;
190 }
191 
192 /// Represent a single DeviceMemory
193 class MemoryBlock : MemBlock
194 {
195     import gfx.core.rc : atomicRcCode, Rc;
196 
197     mixin(atomicRcCode);
198 
199     Rc!PoolAllocator _allocator; // needed to maintain it alive as long allocations are alive
200     MemoryPool _pool;
201     Rc!DeviceMemory _memory;
202     bool _dedicated;
203     size_t _size;
204     size_t _mapCount;
205     void* _mapPtr;
206     uint _typeIndex;
207     size_t _granularity;
208 
209     MemoryChunk _chunk0;
210 
211     this (PoolAllocator allocator, MemoryPool pool, DeviceMemory memory, bool dedicated) {
212         _allocator = allocator;
213         _pool = pool;
214         _memory = memory;
215         _dedicated = dedicated;
216         _size = memory.size;
217         _typeIndex = memory.typeIndex;
218         _granularity = allocator._linearOptimalGranularity;
219 
220         // start with a single free chunk that occupies the whole block
221         _chunk0 = new MemoryChunk(0, _size);
222     }
223 
224     override void dispose() {
225         _pool.free(this);
226         _memory.unload();
227         _allocator.unload();
228     }
229 
230     bool allocate(size_t alignment, size_t size, ResourceLayout layout, ref AllocResult result)
231     {
232         for (auto chunk=_chunk0; chunk !is null; chunk=chunk.next) {
233 
234             if (chunk.occupied) continue;
235 
236             size_t offset = alignUp(chunk.offset, alignment);
237 
238             // align up if page conflict in previous chunks
239             if (_granularity > 1) {
240                 bool pageConflict;
241                 const thisPage = page(offset, _granularity);
242                 auto prev = chunk.prev;
243                 while (prev) {
244                     if (prev.pageEnd(_granularity) == thisPage) {
245                         if (prev.occupied && granularityMatters(prev.layout, layout)) {
246                             pageConflict = true;
247                             break;
248                         }
249                     }
250                     else {
251                         break;
252                     }
253                     prev = prev.prev;
254                 }
255                 if (pageConflict) {
256                     offset = alignUp(offset, _granularity);
257                 }
258             }
259 
260             if (offset+size > chunk.offset+chunk.size) continue;
261 
262             // discard this chunk if page conflict in next chunks
263             if (_granularity > 1) {
264                 bool pageConflict;
265                 const thisPage = page(offset+size-1, _granularity);
266                 auto next = chunk.next;
267                 while (next) {
268                     if (next.pageBeg(_granularity) == thisPage) {
269                         if (next.occupied && granularityMatters(next.layout, layout)) {
270                             pageConflict = true;
271                             break;
272                         }
273                     }
274                     else {
275                         break;
276                     }
277                     next = next.next;
278                 }
279                 if (pageConflict) {
280                     continue;
281                 }
282             }
283 
284             return allocateHere(chunk, offset, size, layout, result);
285         }
286 
287         return false;
288     }
289 
290     override void *map()
291     {
292         if (!_mapCount) {
293             _mapPtr = _memory.mapRaw(0, _size);
294         }
295         _mapCount++;
296         return _mapPtr;
297     }
298 
299     override void unmap()
300     {
301         _mapCount--;
302         if (!_mapCount) {
303             _memory.unmapRaw();
304             _mapPtr = null;
305         }
306     }
307 
308     override void free(Object returnData) {
309         import gfx.core.util : unsafeCast;
310         auto chunk = unsafeCast!MemoryChunk(returnData);
311         chunk.occupied = false;
312         chunk.layout = ResourceLayout.unknown;
313         // merge this chunk with previous and next one if they are free
314         if (chunk.prev && !chunk.prev.occupied) {
315             auto cp = chunk.prev;
316             cp.next = chunk.next;
317             if (cp.next) cp.next.prev = cp;
318             cp.size = chunk.size+chunk.offset - cp.offset;
319             chunk = cp;
320         }
321         if (chunk.next && !chunk.next.occupied) {
322             auto cn = chunk.next;
323             chunk.next = cn.next;
324             if (chunk.next) chunk.next.prev = chunk;
325             chunk.size = cn.size+cn.offset - chunk.offset;
326         }
327     }
328 
329     bool allocateHere(MemoryChunk chunk, in size_t offset, in size_t size,
330                       in ResourceLayout layout, ref AllocResult result) {
331         assert(chunk);
332         assert(!chunk.occupied);
333         assert(chunk.offset <= offset);
334         const shift = offset - chunk.offset;
335         assert(chunk.size >= shift+size);
336 
337         if (offset != chunk.offset) {
338             // alignment padding necessary
339             // we create shift bytes of external fragmentation
340             chunk.offset = offset;
341             chunk.size -= shift;
342         }
343         if (size < chunk.size) {
344             // trim right
345             auto c = new MemoryChunk(offset+size, chunk.size-size, chunk, chunk.next);
346             chunk.next = c;
347             if (c.next) c.next.prev = c;
348             chunk.size = size;
349         }
350 
351         chunk.occupied = true;
352         chunk.layout = layout;
353         result.mem = _memory.obj;
354         result.offset = offset;
355         result.block = this;
356         result.blockData = chunk;
357         return true;
358     }
359 
360     void feedStats(ref AllocStats stats) {
361         stats.totalReserved += _size;
362         AllocStats.Block sb;
363         sb.size = _size;
364         size_t lastEnd;
365         bool lastOccupied;
366         for (auto chunk=_chunk0; chunk !is null; chunk = chunk.next) {
367             AllocStats.Chunk sc;
368             sc.start = chunk.offset;
369             sc.end = chunk.offset+chunk.size;
370             sc.layout = chunk.layout;
371             sc.occupied = chunk.occupied;
372             sb.chunks ~= sc;
373             if (chunk.occupied) {
374                 stats.totalUsed += chunk.size;
375                 if (lastOccupied) {
376                     stats.totalFrag += chunk.offset-lastEnd;
377                 }
378             }
379             lastOccupied = chunk.occupied;
380             lastEnd = sc.end;
381         }
382         stats.blocks ~= sb;
383     }
384 
385     invariant {
386         assert(_chunk0 && _chunk0.offset == 0);
387     }
388 }