/*------------------------------------------------------------------------- * * mspan.c * Memory span management. * * Portions Copyright (c) 1996-2013, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION * src/backend/utils/mmgr/mspan.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "utils/mspan.h" /* * PostgreSQL normally uses 8kB pages for most things, but many common * architecture/operating system pairings use a 4kB page size for memory * allocation, so we do that here also. We assume that a large allocation * is likely to begin on a page boundary; if not, we'll discard bytes from * the beginning and end of the object and use only the middle portion that * is properly aligned. This works, but is not ideal, so it's best to keep * this conservatively small. There don't seem to be any common architectures * where the page size is less than 4kB, so this should be good enough; also, * the smaller we make it, the bigger the page map will be. */ #define MSPAN_PAGE_BITS 12 #define MSPAN_PAGE_SIZE (1 << MSPAN_PAGE_BITS) #define MSPAN_PAGE_MASK (MSPAN_PAGE_SIZE - 1) /* Maximum number of pages for a 32-bit address space. */ #define MSPAN_MAX_32BIT_PAGES (1 << (32 - MSPAN_PAGE_BITS)) /* * Amount of space to allocate from the operating system at one time, as * a multiple of our page size. The first chunk will be of the first size * in the array, and then we work up as we allocate more chunks. */ static Size mspan_sysalloc_pages[] = { 256, 512, 1024, 2048, 4096, 8192 }; /* * Small allocations are handled by dividing a relatively large chunk of * memory called a superblock into many small objects of equal size. The * chunk sizes are defined by the following array. Larger size classes are * spaced more widely than smaller size classes. We fudge the spacing for * size classes >1k to avoid space wastage: based on the knowledge that we * plan to allocate 64k superblocks, we bump the maximum object size up * to the largest multiple of 8 bytes that still lets us fit the same * number of objects into one superblock. * * NB: Because of this fudging, if the size of a superblock is ever changed, * these size classes should be reworked to be optimal for the new size. * * NB: The optimal spacing for size classes, as well as the size of the * superblocks themselves, is not a question that has one right answer. * Some allocators (such as tcmalloc) use more closely-spaced size classes * than we do here, while others (like aset.c) use more widely-spaced classes. * Spacing the classes more closely avoids wasting memory within individual * chunks, but also means a larger number of potentially-unfilled superblocks. * This system is really only suitable for allocating relatively large amounts * of memory, where the unfilled superblocks will be a small percentage of * the total allocations. */ static const uint16 mspan_size_classes[] = { 8, 16, 24, 32, 40, 48, 56, 64, /* 8 classes separated by 8 bytes */ 80, 96, 112, 128, /* 4 classes separated by 16 bytes */ 160, 192, 224, 256, /* 4 classes separated by 32 bytes */ 320, 384, 448, 512, /* 4 classes separated by 64 bytes */ 640, 768, 896, 1024, /* 4 classes separated by 128 bytes */ 1280, 1560, 1816, 2048, /* 4 classes separated by ~256 bytes */ 2616, 3120, 3640, 4096, /* 4 classes separated by ~512 bytes */ 5456, 6552, 7280, 8192 /* 4 classes separated by ~1024 bytes */ }; #define MSPAN_SUPERBLOCK_SIZE 65536 /* must be a multiple of page size */ #define MSPAN_PAGES_PER_SUPERBLOCK (MSPAN_SUPERBLOCK_SIZE >> MSPAN_PAGE_BITS) /* * The following lookup table is used to map the size of small objects * (less than 1kB) onto the corresponding size class. To use this table, * round the size of the object up to the next multiple of 8 bytes, and then * index into this array. */ static char mspan_size_class_map[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 19, 19, 19, 19, 19, 19, 19, 19, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23 }; #define MSPAN_SIZE_CLASS_MAP_QUANTUM 8 /* * We divide partially-filled superblocks into four fullness classes. * Generally, fullness class N represent blocks where the precentage of * free objects is >= (N * 25%) and < (N * 25%), but we only ever allocate * from superblocks in fullness class 1, so the active superblock will * always be in that class regardless of fullness. Moreover, we're lazy about * moving superblocks between lists, so there's no guarantee that the * actual degree of fullness for a given superblock matches the list that * it's currently on. */ #define MSPAN_NUMBER_OF_FULLNESS_CLASSES 4 #define MSPAN_SMALL_ALLOCATION_LISTS \ (lengthof(mspan_size_classes) * MSPAN_NUMBER_OF_FULLNESS_CLASSES) /* * Some spans are superblocks; in those cases, the span_type will be equal * to the size class. We use other constants to represent free spans, * large allocations, and other types of special spans. */ #define MSPAN_TYPE_FREE ((uint16) -1) #define MSPAN_TYPE_LARGE ((uint16) -2) #define MSPAN_TYPE_SPAN_OF_SPANS ((uint16) -3) /* * Management information for a span of memory. */ struct mspan { relptr(void) parent; /* Context if used, manager if free. */ relptr(mspan) prevspan; /* Previous span. */ relptr(mspan) nextspan; /* Next span. */ Size first_page; /* Starting page number. */ Size npages; /* Length of span in pages. */ uint16 span_type; /* Type of span. */ uint16 ninitialized; /* Maximum number of objects ever allocated. */ uint16 nused; /* Number of objects currently allocated. */ uint16 firstfree; /* First object on free list. */ void *syschunk; /* Pointer returned by OS malloc. */ Size syspages; /* Original size of span, before splitting. */ }; #define MSPAN_FIRSTFREE_NONE ((uint16) -1) /* * Management information for an allocation context. */ struct mspan_context { relptr(mspan_manager) manager; relptr(mspan) large_allocation; relptr(mspan) small_allocation[MSPAN_SMALL_ALLOCATION_LISTS]; }; /* Helper functions. */ static mspan_context *mspan_allocate_context_descriptor(char *base, mspan_manager *mgr); static char *mspan_allocate_from_superblock(char *base, mspan *superblock); static mspan *mspan_allocate_span_descriptor(char *base, mspan_manager *mgr); static mspan *mspan_allocate_span(char *base, mspan_manager *mgr, mspan_context *cxt, uint16 span_type, Size pages); static void mspan_destroy_span(char *base, mspan *span); static void mspan_ensure_active_superblock(char *base, mspan_context *cxt, uint16 size_class); static mspan *mspan_find_free_span(char *base, mspan_manager *mgr, Size minpages, Size maxpages); static void mspan_initialize_span(char *base, mspan_manager *mgr, mspan_context *cxt, mspan *span, uint16 span_type); static void mspan_link_span_to_context(char *base, mspan_context *cxt, mspan *span); static void mspan_link_span_to_manager(char *base, mspan_manager *mgr, mspan *span); static void mspan_release_span(char *base, mspan_manager *mgr, mspan *span); static void mspan_unlink_span(char *base, mspan *span); static void mspan_update_page_map(char *base, mspan_manager *mgr, Size first_page, Size npages, Size value); /* * Initialize backend-private mspan_manager. * * We must be prepared to manage memory anywhere in the process address * space. */ void mspan_initialize_private_manager(mspan_manager *mgr) { const unsigned bits = SIZEOF_SIZE_T * BITS_PER_BYTE; memset(mgr, 0, sizeof(mspan_manager)); aspace_map_initialize(&mgr->page_map, INT64CONST(1) << (bits - MSPAN_PAGE_BITS), bits <= 32 ? ASPACE_MAP_32BIT_VALUES : 0); } /* * Initialize dynamic shared memory mspan_manager. * * We need only be prepared to manage the specified number of bytes. */ mspan_manager * mspan_initialize_dsm_manager(dsm_segment *seg, void *start, Size nbytes) { char *segbase = dsm_segment_address(seg); Size segsize = dsm_segment_map_length(seg); char *astart = start; char *aend = astart + nbytes; mspan_manager *mgr; /* Arena to be managed must be within the segment. */ Assert(astart >= segbase && astart + nbytes <= segbase + segsize); /* Arena to be managed must not be smaller than the metadata. */ Assert(nbytes >= sizeof(mspan_manager)); /* Allocate and zero space for the manager. */ mgr = (mspan_manager *) astart; astart += sizeof(mspan_manager); memset(mgr, 0, sizeof(mspan_manager)); /* Initialize those fields that require it. */ mgr->base = astart - segbase; if ((mgr->base & MSPAN_PAGE_MASK) != 0) mgr->base = (mgr->base & MSPAN_PAGE_MASK) + MSPAN_PAGE_SIZE; mgr->npages = ((aend - segbase) - mgr->base) >> MSPAN_PAGE_BITS; Assert(mgr->npages > 0); aspace_map_initialize(&mgr->page_map, mgr->npages, mgr->npages <= MSPAN_MAX_32BIT_PAGES ? ASPACE_MAP_32BIT_VALUES : 0); return mgr; } /* * Create a new allocation context within an address space. */ mspan_context * mspan_context_create(dsm_segment *seg, mspan_manager *mgr) { char *base = (seg != NULL ? dsm_segment_address(seg) : NULL); mspan_context *cxt; if (relptr_is_null(mgr->freecontext)) cxt = mspan_allocate_context_descriptor(base, mgr); else { /* Pop a previously-allocated context from the free list. */ cxt = relptr_access(base, mgr->freecontext); mgr->freecontext.relptr_off = * (Size *) cxt; } /* All lists of allocations begin empty. */ memset(cxt, 0, sizeof(mspan_context)); /* Increment the number of active contexts. */ ++mgr->ncontexts; return cxt; } /* * Destroy an allocation context within an address space. * * This releases all storage associated with the context. */ void mspan_context_destroy(dsm_segment *seg, mspan_context *cxt) { char *base = (seg != NULL ? dsm_segment_address(seg) : NULL); mspan_manager *mgr = relptr_access(base, cxt->manager); int i; /* Release large allocations one at a time. */ while (!relptr_is_null(cxt->large_allocation)) { mspan *span = relptr_access(base, cxt->large_allocation); mspan_release_span(base, mgr, span); } /* Release small allocations one superblock at a time. */ for (i = 0; i < MSPAN_SMALL_ALLOCATION_LISTS; ++i) { while (!relptr_is_null(cxt->small_allocation[i])) { mspan *span = relptr_access(base, cxt->small_allocation[i]); mspan_release_span(base, mgr, span); } } /* Put this context object back on the manager's free list. */ * (Size *) cxt = mgr->freecontext.relptr_off; relptr_store(base, mgr->freecontext, cxt); } /* * Allocate memory. */ void * mspan_alloc(dsm_segment *seg, mspan_context *cxt, Size size, int flags) { char *base = (seg != NULL ? dsm_segment_address(seg) : NULL); uint16 size_class; int aidx; /* If it's bigger than the largest size class, allocate whole pages. */ if (size > mspan_size_classes[lengthof(mspan_size_classes) - 1]) { mspan_manager *mgr = relptr_access(base, cxt->manager); Size pages = (size + MSPAN_PAGE_SIZE - 1) >> MSPAN_PAGE_BITS; mspan *span; span = mspan_allocate_span(base, mgr, cxt, MSPAN_TYPE_LARGE, pages); if (span == NULL) { if (base == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); else ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); } } /* Map allocation to a size class. */ if (size < lengthof(mspan_size_class_map) * MSPAN_SIZE_CLASS_MAP_QUANTUM) { int mapidx; mapidx = (size + MSPAN_SIZE_CLASS_MAP_QUANTUM - 1) / MSPAN_SIZE_CLASS_MAP_QUANTUM; size_class = mspan_size_class_map[mapidx]; } else { uint16 min = mspan_size_class_map[lengthof(mspan_size_class_map) - 1]; uint16 max = lengthof(mspan_size_classes) - 1; while (min < max) { uint16 mid = (min + max) / 2; uint16 class_size = mspan_size_classes[mid]; if (class_size < size) min = mid + 1; else max = mid; } size_class = min; } Assert(size <= mspan_size_classes[size_class]); /* * Allocate from a superblock for the appropriate size class. * * We always allocate from fullness class 1. Whatever superblock is * at the head of that list becomes our victim for allocation until it's * completely full, at which point we'll move it to its proper fullness * class and allocate from the next block on the list. If there isn't one, * we'll call mspan_ensure_active_superblock to find or create a suitable * block. * * You might wonder why we allocate from fullness class 1 rather than * fullness class 0. The reason is that it's much better to have a smaller * number of superblocks with higher average utilization than a larger * number with lower utilization. When a superblock has only a few * remaining allocations, we prefer to hold off allocating from it in the * hopes that the remaining chunks will soon be freed, allowing us to * deallocate the entire superblock. */ aidx = size_class * MSPAN_NUMBER_OF_FULLNESS_CLASSES + 1; for (;;) { mspan *superblock; void *result; /* Find active superblock. */ if (relptr_is_null(cxt->small_allocation[aidx])) mspan_ensure_active_superblock(base, cxt, size_class); superblock = relptr_access(base, cxt->small_allocation[aidx]); Assert(superblock->span_type == size_class); /* Allocate from superblock if possible. */ result = mspan_allocate_from_superblock(base, superblock); if (result != NULL) return result; /* Move active superblock to proper fullness class. */ mspan_unlink_span(base, superblock); mspan_link_span_to_context(base, cxt, superblock); } } /* * Allocate new space for a new context descriptor. * * We expect the number of contexts to remain small. Therefore, when * allocating backend-local memory, we allocate them one at a time from the * OS; and when allocating from dynamic shared memory, we allocate space for * them one page at a time, rather than (for example) a full superblock. * * Context descriptors are never freed; instead, when the user destroys a * context, we just push the context descriptor onto a free list. Because * of this, we don't need a span describing the space set aside for context * descriptors, or page map entries pointing to it. This helps us avoid * circular dependencies inside the allocator. */ static mspan_context * mspan_allocate_context_descriptor(char *base, mspan_manager *mgr) { mspan_context *cxt = NULL; mspan *span; Size pageno; Size i; /* Outside of a dynamic shared memory segment, just allocate from OS. */ if (base == NULL) { cxt = malloc(sizeof(mspan_context)); if (cxt == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of memory"))); return cxt; } /* * We must allocate from within the segment, so can't fall back on malloc. * It's desirable to avoid fragmenting spans that are large enough to * contain a superblock, but smaller spans are not as useful, so they're * a good way to satisfy our single-page request. Therefore, we first * look for a small span, then try to allocate from the boundary, then * finally look for a large span. If none of that works, we're out out * of memory. */ span = mspan_find_free_span(base, mgr, 1, MSPAN_PAGES_PER_SUPERBLOCK - 1); if (span != NULL) pageno = span->first_page; else { if (mgr->boundary < mgr->npages) pageno = ++mgr->boundary; else { span = mspan_find_free_span(base, mgr, MSPAN_PAGES_PER_SUPERBLOCK, 0); if (span == NULL) ereport(ERROR, (errcode(ERRCODE_OUT_OF_MEMORY), errmsg("out of shared memory"))); pageno = span->first_page; } } if (span != NULL) { /* * If the span is just one page, deallocate it completely (context * objects are never freed, so this is OK). Otherwise, remove * the first page from the span and put the rest back on the * appropriate free list. Also adjust the page map entries as * appropriate. */ Assert(span->span_type == MSPAN_TYPE_FREE); mspan_update_page_map(base, mgr, pageno, 1, 0); mspan_unlink_span(base, span); if (span->npages == 1) mspan_destroy_span(base, span); else { ++span->first_page; --span->npages; mspan_link_span_to_manager(base, mgr, span); /* * The last-page entry for this span is still OK, so no need to * update that. Technically, the first-page entries isn't needed * any more since the page we just stole will never be freed, but * let's do it just to be consistent. */ mspan_update_page_map(base, mgr, span->first_page, 1, ((char *) span) - base); } } /* * OK, we have a page, either from a span or from the boundary. Carve * it up int chunks of just the right size. */ for (i = 0; i < MSPAN_PAGE_SIZE; i += MAXALIGN(sizeof(mspan_context))) { Size offset = pageno * MSPAN_PAGE_SIZE + i; /* Plan to return the first object as the context. */ if (i == 0) { cxt = (mspan_context *) (base + offset); continue; } /* Push remaining objects onto the free list. */ * (Size *) (base + offset) = mgr->freecontext.relptr_off; mgr->freecontext.relptr_off = offset; } Assert(cxt != NULL); return cxt; } /* * Attempt to allocate an object from a superblock. */ static char * mspan_allocate_from_superblock(char *base, mspan *superblock) { char *spanbase; char *result; uint16 object_size; uint16 total; /* Quick exit if there are no free objects. */ object_size = mspan_size_classes[superblock->span_type]; total = MSPAN_SUPERBLOCK_SIZE / object_size; if (superblock->nused >= total) return NULL; /* Do the allocation. */ spanbase = base + superblock->first_page * MSPAN_PAGE_SIZE; if (superblock->firstfree != MSPAN_FIRSTFREE_NONE) { /* There's a freed object available for reuse. Allocate it. */ result = spanbase + (superblock->firstfree * object_size); superblock->firstfree = ((uint16 *) result)[0]; } else { /* Carve out an object from not-previously-used part of span. */ result = spanbase + (superblock->ninitialized * object_size); ++superblock->ninitialized; } /* Update counter and return result. */ ++superblock->nused; Assert(superblock->nused <= superblock->ninitialized); Assert(result < spanbase + object_size * total); return result; } /* * Allocate a span. * * We can do this either by finding a free span which is already suitable * or which can be split, or by allocating space from the boundary or OS and * creating a span descriptor to describe it. * * If we're allocating storage for a span-of-spans, then cxt will be NULL; * otherwise, it's the context with which the new span should be associated. * * If we're allocating storage for a large object, then pages should be the * minimum number of pages required to hold the object; otherwise, it's * ignored. */ static mspan * mspan_allocate_span(char *base, mspan_manager *mgr, mspan_context *cxt, uint16 span_type, Size pages) { mspan *span; char *syschunk = NULL; Size syspages = 0; Size first_page; /* * Search for an existing span. If we're allocating space for a large * object, the span has to be large enough to hold the object; otherwise, * we want something large enough to contain a superblock. */ if (span_type != MSPAN_TYPE_LARGE) pages = MSPAN_PAGES_PER_SUPERBLOCK; span = mspan_find_free_span(base, mgr, pages, 0); if (span != NULL) { /* Remove the span from the free list. */ mspan_unlink_span(base, span); /* Reinitialize span for new usage. */ mspan_initialize_span(base, mgr, cxt, span, span_type); /* If needed, shrink or split the span. */ if (span->npages > pages) { if (base != NULL && span->first_page + span->npages >= mgr->boundary) { Size newboundary; Size nreturned; /* * The span is adjacent to the boundary, so we can just shrink * it and move the boundary backwards. But we need to clear * any page map entries in the region we're returning, to * prevent confusion down the road. */ Assert(span->first_page + span->npages == mgr->boundary); newboundary = span->first_page + pages; nreturned = mgr->boundary - newboundary; mspan_update_page_map(base, mgr, newboundary, nreturned, 0); mgr->boundary = newboundary; span->npages = pages; } else { /* * XXX. The span isn't adjacent to the boundary. If there's * a free span following this one, we can transfer the excess * pages from this span to that span. If not, we need to * allocate a new span to contain the free pages. */ } } /* * Make page map entries for the new span. * * If this is anything other than a large allocation, we could * potentially see requests to free objects located anywhere within * the span, so page map entries are needed for every page of the span. * But for a large object, the first and last page are good enough; * we should never get a request to free something in the middle of * such a span, and the only other lookups we expect are for the pages * end pages to check whether this is a free span that can be * consolidated. * * XXX. So what happens if we go to do these page map updates and * run out of memory? It's not safe to just bail out here; we'd * leak the allocated span. */ Assert(span->npages == pages); if (span_type != MSPAN_TYPE_LARGE) mspan_update_page_map(base, mgr, span->first_page, span->npages, (((char *) span) - base) | 1); else { mspan_update_page_map(base, mgr, span->first_page, 1, (((char *) span) - base) | 1); mspan_update_page_map(base, mgr, span->first_page + span->npages - 1, 1, (((char *) span) - base) | 1); } return span; } /* * If we're allocating a span of spans, the span descriptor will be * carved out of the span itself; after all, it's intended to contain * spans. Otherwise, we prefer to allocate the span descriptor here * rather than after finding storage, because it's easier to back this * out if storage allocation fails than the other way around. * * XXX. This is completely bogus in the non-DSM case, because we might * recuse back into this routine, allocate more space from the OS, and yet * not know it, and thus allocate more again. */ if (span_type != MSPAN_TYPE_SPAN_OF_SPANS) { span = mspan_allocate_span_descriptor(base, mgr); if (span == NULL) return NULL; } /* Allocate storage for the new span. */ if (base != NULL) { /* In the dynamic shared memory case, allocate from the boundary. */ if (mgr->boundary + pages >= mgr->npages) { /* Not enough pages remaining. */ if (span != NULL) mspan_destroy_span(base, span); return NULL; } first_page = mgr->boundary; mgr->boundary += pages; /* * If this is a span-of-spans, allocate a descriptor for the new span * out of the span itself. If it's not, span should already be * non-NULL; see above. */ if (span_type == MSPAN_TYPE_SPAN_OF_SPANS) { Assert(span == NULL); span = (mspan *) (base + first_page * MSPAN_PAGE_SIZE); } Assert(span != NULL); /* Initialize the new span. */ span->first_page = first_page; span->npages = pages; mspan_initialize_span(base, mgr, cxt, span, span_type); } else { /* Allocate from the operating system. */ if (pages > mspan_sysalloc_pages[0]) { /* * This is a large allocation, so ask the operating system for * exactly the amount of space we need. */ syspages = pages; syschunk = malloc(syspages * MSPAN_PAGE_SIZE); if (syschunk == NULL) { if (span != NULL) mspan_destroy_span(base, span); return NULL; } } else { Size i; /* * Try to allocate a chunk of the size appropriate to the number * of system chunks already allocated. If that fails, ratchet the * request back, but not below the minimum chunk size. */ i = Max(mgr->nsyschunks, lengthof(mspan_sysalloc_pages) - 1); for (;;) { syspages = mspan_sysalloc_pages[i]; syschunk = malloc(syspages * MSPAN_PAGE_SIZE); if (syschunk != NULL) break; if (i == 0) { if (span != NULL) mspan_destroy_span(base, span); return NULL; } --i; } } /* * Work out the number of usable pages in the span, and the location * of the first one. If the operating system returned a page-aligned * address, as we hope, then the number of usable pages is exactly * equal to the number of pages we allocated. If not, then both the * first and last pages are partial and therefore unusable. */ first_page = ((Size) syschunk) << MSPAN_PAGE_BITS; if (((Size) syschunk) % MSPAN_PAGE_BITS != 0) { ++first_page; syspages -= 2; } } /* * If this is a span-of-spans, allocate a descriptor for the new span * out of the span itself. If it's not, span should already be * non-NULL; see above. */ if (span_type == MSPAN_TYPE_SPAN_OF_SPANS) { Assert(span == NULL); span = (mspan *) (base + first_page * MSPAN_PAGE_SIZE); } Assert(span != NULL); /* Initialize the new span. */ span->first_page = first_page; span->npages = pages; span->syschunk = syschunk; span->syspages = syspages; mspan_initialize_span(base, mgr, cxt, span, span_type); /* * XXX. If we just allocated a new system chunk, it's probably larger * than the number of pages we actually used for the span. We need to * turn the rest into another span, put it on the free list, and make * page map entries for it. */ return span; } /* * Allocate new space for a new span descriptor. */ static mspan * mspan_allocate_span_descriptor(char *base, mspan_manager *mgr) { mspan *span_of_spans; if (!relptr_is_null(mgr->span_of_spans)) { char *result; /* Try to allocate from the first span-of-spans. */ span_of_spans = relptr_access(base, mgr->span_of_spans); Assert(span_of_spans->span_type == MSPAN_TYPE_SPAN_OF_SPANS); result = mspan_allocate_from_superblock(base, span_of_spans); if (result != NULL) return (mspan *) result; /* Walk the list looking for a span-of-spans that isn't full. */ for (;;) { span_of_spans = relptr_access(base, span_of_spans->nextspan); if (span_of_spans == NULL) break; Assert(span_of_spans->span_type == MSPAN_TYPE_SPAN_OF_SPANS); result = mspan_allocate_from_superblock(base, span_of_spans); if (result != NULL) { /* * Move the span from which we allocate to head of list in * the hope of speeding up future searches. */ mspan_unlink_span(base, span_of_spans); mspan_link_span_to_manager(base, mgr, span_of_spans); /* Return a pointer to the space we allocated. */ return (mspan *) result; } } } /* Create a new span descriptor. */ span_of_spans = mspan_allocate_span(base, mgr, NULL, MSPAN_TYPE_SPAN_OF_SPANS, 0); if (span_of_spans == NULL) return NULL; return (mspan *) mspan_allocate_from_superblock(base, span_of_spans); } /* * Deallocate a span descriptor. */ static void mspan_destroy_span(char *base, mspan *span) { /* * XXX. As a special case, the superblock descriptor for a span of * spans is always stored within the span itself. Return the span * to be destroyed to the superblock, and then, if there's only 1 remaining * span outstanding, nuke the whole superblock. */ } /* * Ensure that there is an active superblock for the given size class. */ static void mspan_ensure_active_superblock(char *base, mspan_context *cxt, uint16 size_class) { /* * XXX. Search for an existing superblock that we can designate as the * active superblock. */ } /* * Find a previously-allocated span that is now available for reuse. */ static mspan * mspan_find_free_span(char *base, mspan_manager *mgr, Size minpages, Size maxpages) { Size start_exact_search; Size stop_exact_search; Size i; Assert(minpages > 0); /* * Every free list except the last holds spans of one particular size; * if any relevant list is non-empty, we can just return the first item. */ start_exact_search = Min(minpages, MSPAN_NUM_FREE_LISTS) - 1; stop_exact_search = maxpages == 0 || maxpages > MSPAN_NUM_FREE_LISTS - 1 ? MSPAN_NUM_FREE_LISTS - 1 : maxpages; for (i = start_exact_search; i < stop_exact_search; ++i) if (!relptr_is_null(mgr->freelist[i])) return relptr_access(base, mgr->freelist[i]); /* The very last free list holds all of the remaining objects. */ if (maxpages == 0 || maxpages > MSPAN_NUM_FREE_LISTS - 1) { mspan *span; mspan *best = NULL; span = relptr_access(base, mgr->freelist[MSPAN_NUM_FREE_LISTS - 1]); while (span != NULL) { if (span->npages >= minpages && (best == NULL || span->npages < best->npages)) best = span; span = relptr_access(base, span->nextspan); } return span; } return NULL; } /* * Initialize a span descriptor. */ static void mspan_initialize_span(char *base, mspan_manager *mgr, mspan_context *cxt, mspan *span, uint16 span_type) { /* The basics. */ span->span_type = span_type; span->firstfree = MSPAN_FIRSTFREE_NONE; /* * Normally, the span starts out empty, but a span-of-spans contains * its own descriptor, so it starts out containing one allocation. * A span-of-spans is different in another way as well: it's managed * by the manager, not the context. */ if (span_type == MSPAN_TYPE_SPAN_OF_SPANS) { Assert(cxt == NULL); span->ninitialized = 1; span->nused = 1; mspan_link_span_to_manager(base, mgr, span); } else { Assert(cxt != NULL); span->ninitialized = 0; span->nused = 0; mspan_link_span_to_context(base, cxt, span); } } /* * Add a span to a linked list of spans. * * All the linked lists we use in this module are circularly-linked lists * of relative pointers. The head of each list points to the fist element * of the list. This function inserts a new element at the head of the list * specified by ptr. */ static void mspan_link_span_internal(char *base, void *parent, Size *ptr, mspan *span) { relptr(mspan) rptr; #ifdef USE_ASSERT_CHECKING Assert(relptr_is_null(span->nextspan)); Assert(relptr_is_null(span->prevspan)); #endif relptr_store(base, span->parent, parent); if (*ptr == 0) { relptr_store(base, span->nextspan, span); relptr_store(base, span->prevspan, span); } else { mspan *head = (mspan *) (base + *ptr); mspan *tail = relptr_access(base, head->prevspan); span->nextspan.relptr_off = *ptr; span->prevspan.relptr_off = head->prevspan.relptr_off; relptr_store(base, head->prevspan, span); relptr_store(base, tail->nextspan, span); } relptr_store(base, rptr, span); *ptr = rptr.relptr_off; } /* * Add the span to one of the linked lists within an mspan_context. * * The mspan_context maintains lists of allocated superblocks and large * objects. To put an existing span object on the appropriate list, call * this function. Free spans and spans-of-spans are associated with the * manager, not the context; call mspan_link_span_to_manager for those. */ static void mspan_link_span_to_context(char *base, mspan_context *cxt, mspan *span) { Size *ptr; if (span->span_type == MSPAN_TYPE_LARGE) ptr = &cxt->large_allocation.relptr_off; else { uint16 total; int fullness_class; int aidx; Assert(span->span_type < lengthof(mspan_size_classes)); total = MSPAN_SUPERBLOCK_SIZE / mspan_size_classes[span->span_type]; Assert(span->nused <= total); if (span->nused == 0) fullness_class = 0; else { fullness_class = ((span->nused * MSPAN_NUMBER_OF_FULLNESS_CLASSES) - 1) / total; Assert(fullness_class < MSPAN_NUMBER_OF_FULLNESS_CLASSES); } aidx = span->span_type * MSPAN_NUMBER_OF_FULLNESS_CLASSES + fullness_class; ptr = &cxt->small_allocation[aidx].relptr_off; } mspan_link_span_internal(base, cxt, ptr, span); } /* * Add the span to one of the linked lists within an mspan_manager. * * The mspan_manager maintains a list of spans-of-spans, and a bunch of * free lists. To put an existing span object on the appropriate list, * call this function. Allocated superblocks and large objects are associated * with the context, not the manager; call mspan_link_span_to_context for * those. */ static void mspan_link_span_to_manager(char *base, mspan_manager *mgr, mspan *span) { Size *ptr; #ifdef USE_ASSERT_CHECKING Assert(relptr_is_null(span->nextspan)); Assert(relptr_is_null(span->prevspan)); #endif if (span->span_type == MSPAN_TYPE_SPAN_OF_SPANS) ptr = &mgr->span_of_spans.relptr_off; else { Size fidx; Assert(span->span_type == MSPAN_TYPE_FREE); fidx = Min(span->npages, MSPAN_NUM_FREE_LISTS) - 1; ptr = &mgr->freelist[fidx].relptr_off; } mspan_link_span_internal(base, mgr, ptr, span); } /* * Release the memory consumed by a span, consolidating it with adjacent free * spans if possible. */ static void mspan_release_span(char *base, mspan_manager *mgr, mspan *span) { Assert(span->span_type != MSPAN_TYPE_FREE); /* Remove this span from the list which contains it. */ mspan_unlink_span(base, span); /* * Find the spans that precede and follow the span to be released within * the address space; and if they are free, consolidate them with this * one. (In the page map, 0 means no entry and any odd value means that * the span is allocated, so we ignore those values.) */ if (span->first_page > 0) { relptr(mspan) p; p.relptr_off = aspace_map_get(&mgr->page_map, span->first_page - 1, base); if (p.relptr_off != 0 && (p.relptr_off & 1) == 0) { mspan *preceding_span = relptr_access(base, p); mspan_unlink_span(base, preceding_span); span->first_page = preceding_span->first_page; span->npages += preceding_span->npages; mspan_destroy_span(base, preceding_span); /* * XXX. This is a problem. Suppose we destroy the preceding span * and it releases a superblock just preceding the current span. * Now that span can't consolidate with this one nor visca versa. */ } } if (mgr->npages == 0 || span->first_page + span->npages < mgr->boundary) { relptr(mspan) f; f.relptr_off = aspace_map_get(&mgr->page_map, span->first_page + span->npages, base); if (f.relptr_off != 0 && (f.relptr_off & 1) == 0) { mspan *following_span = relptr_access(base, f); mspan_unlink_span(base, following_span); span->npages += following_span->npages; mspan_destroy_span(base, following_span); /* * XXX. Here again, destroying the following span could create * another free chunk that needs to be consolidated with this span. */ } } /* * Make new page map entries for the span. * * Since allocated spans have page map entries with the least significant * bit set, we need to make new entries regardless of whether we succeeded * in consolidating with adjacent spans. If we did consolidate, we need * new entries for that reason as well: the first and last pages of the * new and larger span must point to the correct object. This coding may * leave behind stale mappings between the first and last pages of the * object, but it doesn't matter. For a free span, only the first and * last pages will every be looked up in the page map; we needn't spend * time fixing whatever junk entries may exist in the middle. */ mspan_update_page_map(base, mgr, span->first_page, 1, ((char *) span) - base); if (span->npages > 1) mspan_update_page_map(base, mgr, span->first_page + span->npages - 1, 1, ((char *) span) - base); /* Mark the span as free and put it on the appropriate free list. */ span->span_type = MSPAN_TYPE_FREE; mspan_link_span_to_manager(base, mgr, span); } /* * Update the page map. */ static void mspan_update_page_map(char *base, mspan_manager *mgr, Size first_page, Size npages, Size value) { aspace_map_set_range(&mgr->page_map, first_page, npages, value, base, NULL, NULL); /* XXX: Last two args should not be NULL! */ } /* * Remove a span from the circularly-linked list that presently contains it. */ static void mspan_unlink_span(char *base, mspan *span) { void *parent; mspan *next; mspan *prev; relptr(mspan) s; Size newhead; /* * If this span is the head of the containing list, then we've got to * adjust the head pointer to reference the next element, or zero it out. */ parent = relptr_access(base, span->parent); relptr_store(base, s, span); newhead = (span == next ? 0 : span->nextspan.relptr_off); switch (span->span_type) { case MSPAN_TYPE_FREE: { mspan_manager *mgr = parent; Size fidx = Min(span->npages, MSPAN_NUM_FREE_LISTS) - 1; if (mgr->freelist[fidx].relptr_off == s.relptr_off) mgr->freelist[fidx].relptr_off = newhead; } case MSPAN_TYPE_LARGE: { mspan_context *cxt = parent; if (cxt->large_allocation.relptr_off == s.relptr_off) cxt->large_allocation.relptr_off = newhead; } case MSPAN_TYPE_SPAN_OF_SPANS: { mspan_manager *mgr = parent; if (mgr->span_of_spans.relptr_off == s.relptr_off) mgr->span_of_spans.relptr_off = newhead; } default: { mspan_context *cxt = parent; int i; int aidx; aidx = span->span_type * MSPAN_NUMBER_OF_FULLNESS_CLASSES; for (i = 0; i < MSPAN_NUMBER_OF_FULLNESS_CLASSES; ++i) { if (cxt->small_allocation[aidx + i].relptr_off == s.relptr_off) { cxt->small_allocation[aidx + i].relptr_off = newhead; break; } } } } /* Adjust next and previous pointers for our neighbors. */ next = relptr_access(base, span->nextspan); prev = relptr_access(base, span->prevspan); Assert(next != NULL && prev != NULL); next->prevspan.relptr_off = span->prevspan.relptr_off; prev->nextspan.relptr_off = span->nextspan.relptr_off; #ifdef USE_ASSERT_CHECKING { mspan *null = NULL; relptr_store(base, span->prevspan, null); relptr_store(base, span->nextspan, null); } #endif }