Subversion Repositories HelenOS

Rev

Rev 4348 | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright (c) 2006 Ondrej Palkovsky
  3.  * All rights reserved.
  4.  *
  5.  * Redistribution and use in source and binary forms, with or without
  6.  * modification, are permitted provided that the following conditions
  7.  * are met:
  8.  *
  9.  * - Redistributions of source code must retain the above copyright
  10.  *   notice, this list of conditions and the following disclaimer.
  11.  * - Redistributions in binary form must reproduce the above copyright
  12.  *   notice, this list of conditions and the following disclaimer in the
  13.  *   documentation and/or other materials provided with the distribution.
  14.  * - The name of the author may not be used to endorse or promote products
  15.  *   derived from this software without specific prior written permission.
  16.  *
  17.  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18.  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20.  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21.  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26.  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27.  */
  28.  
  29. /** @addtogroup genericmm
  30.  * @{
  31.  */
  32.  
  33. /**
  34.  * @file
  35.  * @brief   Slab allocator.
  36.  *
  37.  * The slab allocator is closely modelled after OpenSolaris slab allocator.
  38.  * @see http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick_html/
  39.  *
  40.  * with the following exceptions:
  41.  * @li empty slabs are deallocated immediately
  42.  *     (in Linux they are kept in linked list, in Solaris ???)
  43.  * @li empty magazines are deallocated when not needed
  44.  *     (in Solaris they are held in linked list in slab cache)
  45.  *
  46.  * Following features are not currently supported but would be easy to do:
  47.  * @li cache coloring
  48.  * @li dynamic magazine growing (different magazine sizes are already
  49.  *     supported, but we would need to adjust allocation strategy)
  50.  *
  51.  * The slab allocator supports per-CPU caches ('magazines') to facilitate
  52.  * good SMP scaling.
  53.  *
  54.  * When a new object is being allocated, it is first checked, if it is
  55.  * available in a CPU-bound magazine. If it is not found there, it is
  56.  * allocated from a CPU-shared slab - if a partially full one is found,
  57.  * it is used, otherwise a new one is allocated.
  58.  *
  59.  * When an object is being deallocated, it is put to a CPU-bound magazine.
  60.  * If there is no such magazine, a new one is allocated (if this fails,
  61.  * the object is deallocated into slab). If the magazine is full, it is
  62.  * put into cpu-shared list of magazines and a new one is allocated.
  63.  *
  64.  * The CPU-bound magazine is actually a pair of magazines in order to avoid
  65.  * thrashing when somebody is allocating/deallocating 1 item at the magazine
  66.  * size boundary. LIFO order is enforced, which should avoid fragmentation
  67.  * as much as possible.
  68.  *  
  69.  * Every cache contains list of full slabs and list of partially full slabs.
  70.  * Empty slabs are immediately freed (thrashing will be avoided because
  71.  * of magazines).
  72.  *
  73.  * The slab information structure is kept inside the data area, if possible.
  74.  * The cache can be marked that it should not use magazines. This is used
  75.  * only for slab related caches to avoid deadlocks and infinite recursion
  76.  * (the slab allocator uses itself for allocating all it's control structures).
  77.  *
  78.  * The slab allocator allocates a lot of space and does not free it. When
  79.  * the frame allocator fails to allocate a frame, it calls slab_reclaim().
  80.  * It tries 'light reclaim' first, then brutal reclaim. The light reclaim
  81.  * releases slabs from cpu-shared magazine-list, until at least 1 slab
  82.  * is deallocated in each cache (this algorithm should probably change).
  83.  * The brutal reclaim removes all cached objects, even from CPU-bound
  84.  * magazines.
  85.  *
  86.  * @todo
  87.  * For better CPU-scaling the magazine allocation strategy should
  88.  * be extended. Currently, if the cache does not have magazine, it asks
  89.  * for non-cpu cached magazine cache to provide one. It might be feasible
  90.  * to add cpu-cached magazine cache (which would allocate it's magazines
  91.  * from non-cpu-cached mag. cache). This would provide a nice per-cpu
  92.  * buffer. The other possibility is to use the per-cache
  93.  * 'empty-magazine-list', which decreases competing for 1 per-system
  94.  * magazine cache.
  95.  *
  96.  * @todo
  97.  * it might be good to add granularity of locks even to slab level,
  98.  * we could then try_spinlock over all partial slabs and thus improve
  99.  * scalability even on slab level
  100.  */
  101.  
  102. #include <synch/spinlock.h>
  103. #include <mm/slab.h>
  104. #include <adt/list.h>
  105. #include <memstr.h>
  106. #include <align.h>
  107. #include <mm/frame.h>
  108. #include <config.h>
  109. #include <print.h>
  110. #include <arch.h>
  111. #include <panic.h>
  112. #include <debug.h>
  113. #include <bitops.h>
  114. #include <macros.h>
  115.  
  116. SPINLOCK_INITIALIZE(slab_cache_lock);
  117. static LIST_INITIALIZE(slab_cache_list);
  118.  
  119. /** Magazine cache */
  120. static slab_cache_t mag_cache;
  121. /** Cache for cache descriptors */
  122. static slab_cache_t slab_cache_cache;
  123. /** Cache for external slab descriptors
  124.  * This time we want per-cpu cache, so do not make it static
  125.  * - using slab for internal slab structures will not deadlock,
  126.  *   as all slab structures are 'small' - control structures of
  127.  *   their caches do not require further allocation
  128.  */
  129. static slab_cache_t *slab_extern_cache;
  130. /** Caches for malloc */
  131. static slab_cache_t *malloc_caches[SLAB_MAX_MALLOC_W - SLAB_MIN_MALLOC_W + 1];
  132. static char *malloc_names[] =  {
  133.     "malloc-16",
  134.     "malloc-32",
  135.     "malloc-64",
  136.     "malloc-128",
  137.     "malloc-256",
  138.     "malloc-512",
  139.     "malloc-1K",
  140.     "malloc-2K",
  141.     "malloc-4K",
  142.     "malloc-8K",
  143.     "malloc-16K",
  144.     "malloc-32K",
  145.     "malloc-64K",
  146.     "malloc-128K",
  147.     "malloc-256K",
  148.     "malloc-512K",
  149.     "malloc-1M",
  150.     "malloc-2M",
  151.     "malloc-4M"
  152. };
  153.  
  154. /** Slab descriptor */
  155. typedef struct {
  156.     slab_cache_t *cache;    /**< Pointer to parent cache. */
  157.     link_t link;        /**< List of full/partial slabs. */
  158.     void *start;        /**< Start address of first available item. */
  159.     size_t available;   /**< Count of available items in this slab. */
  160.     size_t nextavail;   /**< The index of next available item. */
  161. } slab_t;
  162.  
  163. #ifdef CONFIG_DEBUG
  164. static int _slab_initialized = 0;
  165. #endif
  166.  
  167. /**************************************/
  168. /* Slab allocation functions          */
  169.  
  170. /**
  171.  * Allocate frames for slab space and initialize
  172.  *
  173.  */
  174. static slab_t *slab_space_alloc(slab_cache_t *cache, int flags)
  175. {
  176.     void *data;
  177.     slab_t *slab;
  178.     size_t fsize;
  179.     unsigned int i;
  180.     size_t zone = 0;
  181.    
  182.     data = frame_alloc_generic(cache->order, FRAME_KA | flags, &zone);
  183.     if (!data) {
  184.         return NULL;
  185.     }
  186.     if (!(cache->flags & SLAB_CACHE_SLINSIDE)) {
  187.         slab = slab_alloc(slab_extern_cache, flags);
  188.         if (!slab) {
  189.             frame_free(KA2PA(data));
  190.             return NULL;
  191.         }
  192.     } else {
  193.         fsize = (PAGE_SIZE << cache->order);
  194.         slab = data + fsize - sizeof(*slab);
  195.     }
  196.    
  197.     /* Fill in slab structures */
  198.     for (i = 0; i < ((unsigned int) 1 << cache->order); i++)
  199.         frame_set_parent(ADDR2PFN(KA2PA(data)) + i, slab, zone);
  200.  
  201.     slab->start = data;
  202.     slab->available = cache->objects;
  203.     slab->nextavail = 0;
  204.     slab->cache = cache;
  205.  
  206.     for (i = 0; i < cache->objects; i++)
  207.         *((int *) (slab->start + i*cache->size)) = i + 1;
  208.  
  209.     atomic_inc(&cache->allocated_slabs);
  210.     return slab;
  211. }
  212.  
  213. /**
  214.  * Deallocate space associated with slab
  215.  *
  216.  * @return number of freed frames
  217.  */
  218. static size_t slab_space_free(slab_cache_t *cache, slab_t *slab)
  219. {
  220.     frame_free(KA2PA(slab->start));
  221.     if (! (cache->flags & SLAB_CACHE_SLINSIDE))
  222.         slab_free(slab_extern_cache, slab);
  223.  
  224.     atomic_dec(&cache->allocated_slabs);
  225.    
  226.     return 1 << cache->order;
  227. }
  228.  
  229. /** Map object to slab structure */
  230. static slab_t * obj2slab(void *obj)
  231. {
  232.     return (slab_t *) frame_get_parent(ADDR2PFN(KA2PA(obj)), 0);
  233. }
  234.  
  235. /**************************************/
  236. /* Slab functions */
  237.  
  238.  
  239. /**
  240.  * Return object to slab and call a destructor
  241.  *
  242.  * @param slab If the caller knows directly slab of the object, otherwise NULL
  243.  *
  244.  * @return Number of freed pages
  245.  */
  246. static size_t slab_obj_destroy(slab_cache_t *cache, void *obj, slab_t *slab)
  247. {
  248.     int freed = 0;
  249.  
  250.     if (!slab)
  251.         slab = obj2slab(obj);
  252.  
  253.     ASSERT(slab->cache == cache);
  254.  
  255.     if (cache->destructor)
  256.         freed = cache->destructor(obj);
  257.    
  258.     spinlock_lock(&cache->slablock);
  259.     ASSERT(slab->available < cache->objects);
  260.  
  261.     *((int *)obj) = slab->nextavail;
  262.     slab->nextavail = (obj - slab->start) / cache->size;
  263.     slab->available++;
  264.  
  265.     /* Move it to correct list */
  266.     if (slab->available == cache->objects) {
  267.         /* Free associated memory */
  268.         list_remove(&slab->link);
  269.         spinlock_unlock(&cache->slablock);
  270.  
  271.         return freed + slab_space_free(cache, slab);
  272.  
  273.     } else if (slab->available == 1) {
  274.         /* It was in full, move to partial */
  275.         list_remove(&slab->link);
  276.         list_prepend(&slab->link, &cache->partial_slabs);
  277.     }
  278.     spinlock_unlock(&cache->slablock);
  279.     return freed;
  280. }
  281.  
  282. /**
  283.  * Take new object from slab or create new if needed
  284.  *
  285.  * @return Object address or null
  286.  */
  287. static void *slab_obj_create(slab_cache_t *cache, int flags)
  288. {
  289.     slab_t *slab;
  290.     void *obj;
  291.  
  292.     spinlock_lock(&cache->slablock);
  293.  
  294.     if (list_empty(&cache->partial_slabs)) {
  295.         /* Allow recursion and reclaiming
  296.          * - this should work, as the slab control structures
  297.          *   are small and do not need to allocate with anything
  298.          *   other than frame_alloc when they are allocating,
  299.          *   that's why we should get recursion at most 1-level deep
  300.          */
  301.         spinlock_unlock(&cache->slablock);
  302.         slab = slab_space_alloc(cache, flags);
  303.         if (!slab)
  304.             return NULL;
  305.         spinlock_lock(&cache->slablock);
  306.     } else {
  307.         slab = list_get_instance(cache->partial_slabs.next, slab_t,
  308.             link);
  309.         list_remove(&slab->link);
  310.     }
  311.     obj = slab->start + slab->nextavail * cache->size;
  312.     slab->nextavail = *((int *)obj);
  313.     slab->available--;
  314.  
  315.     if (!slab->available)
  316.         list_prepend(&slab->link, &cache->full_slabs);
  317.     else
  318.         list_prepend(&slab->link, &cache->partial_slabs);
  319.  
  320.     spinlock_unlock(&cache->slablock);
  321.  
  322.     if (cache->constructor && cache->constructor(obj, flags)) {
  323.         /* Bad, bad, construction failed */
  324.         slab_obj_destroy(cache, obj, slab);
  325.         return NULL;
  326.     }
  327.     return obj;
  328. }
  329.  
  330. /**************************************/
  331. /* CPU-Cache slab functions */
  332.  
  333. /**
  334.  * Finds a full magazine in cache, takes it from list
  335.  * and returns it
  336.  *
  337.  * @param first If true, return first, else last mag
  338.  */
  339. static slab_magazine_t *get_mag_from_cache(slab_cache_t *cache, int first)
  340. {
  341.     slab_magazine_t *mag = NULL;
  342.     link_t *cur;
  343.  
  344.     spinlock_lock(&cache->maglock);
  345.     if (!list_empty(&cache->magazines)) {
  346.         if (first)
  347.             cur = cache->magazines.next;
  348.         else
  349.             cur = cache->magazines.prev;
  350.         mag = list_get_instance(cur, slab_magazine_t, link);
  351.         list_remove(&mag->link);
  352.         atomic_dec(&cache->magazine_counter);
  353.     }
  354.     spinlock_unlock(&cache->maglock);
  355.     return mag;
  356. }
  357.  
  358. /** Prepend magazine to magazine list in cache */
  359. static void put_mag_to_cache(slab_cache_t *cache, slab_magazine_t *mag)
  360. {
  361.     spinlock_lock(&cache->maglock);
  362.  
  363.     list_prepend(&mag->link, &cache->magazines);
  364.     atomic_inc(&cache->magazine_counter);
  365.    
  366.     spinlock_unlock(&cache->maglock);
  367. }
  368.  
  369. /**
  370.  * Free all objects in magazine and free memory associated with magazine
  371.  *
  372.  * @return Number of freed pages
  373.  */
  374. static size_t magazine_destroy(slab_cache_t *cache, slab_magazine_t *mag)
  375. {
  376.     unsigned int i;
  377.     size_t frames = 0;
  378.  
  379.     for (i = 0; i < mag->busy; i++) {
  380.         frames += slab_obj_destroy(cache, mag->objs[i], NULL);
  381.         atomic_dec(&cache->cached_objs);
  382.     }
  383.    
  384.     slab_free(&mag_cache, mag);
  385.  
  386.     return frames;
  387. }
  388.  
  389. /**
  390.  * Find full magazine, set it as current and return it
  391.  *
  392.  * Assume cpu_magazine lock is held
  393.  */
  394. static slab_magazine_t *get_full_current_mag(slab_cache_t *cache)
  395. {
  396.     slab_magazine_t *cmag, *lastmag, *newmag;
  397.  
  398.     cmag = cache->mag_cache[CPU->id].current;
  399.     lastmag = cache->mag_cache[CPU->id].last;
  400.     if (cmag) { /* First try local CPU magazines */
  401.         if (cmag->busy)
  402.             return cmag;
  403.  
  404.         if (lastmag && lastmag->busy) {
  405.             cache->mag_cache[CPU->id].current = lastmag;
  406.             cache->mag_cache[CPU->id].last = cmag;
  407.             return lastmag;
  408.         }
  409.     }
  410.     /* Local magazines are empty, import one from magazine list */
  411.     newmag = get_mag_from_cache(cache, 1);
  412.     if (!newmag)
  413.         return NULL;
  414.  
  415.     if (lastmag)
  416.         magazine_destroy(cache, lastmag);
  417.  
  418.     cache->mag_cache[CPU->id].last = cmag;
  419.     cache->mag_cache[CPU->id].current = newmag;
  420.     return newmag;
  421. }
  422.  
  423. /**
  424.  * Try to find object in CPU-cache magazines
  425.  *
  426.  * @return Pointer to object or NULL if not available
  427.  */
  428. static void *magazine_obj_get(slab_cache_t *cache)
  429. {
  430.     slab_magazine_t *mag;
  431.     void *obj;
  432.  
  433.     if (!CPU)
  434.         return NULL;
  435.  
  436.     spinlock_lock(&cache->mag_cache[CPU->id].lock);
  437.  
  438.     mag = get_full_current_mag(cache);
  439.     if (!mag) {
  440.         spinlock_unlock(&cache->mag_cache[CPU->id].lock);
  441.         return NULL;
  442.     }
  443.     obj = mag->objs[--mag->busy];
  444.     spinlock_unlock(&cache->mag_cache[CPU->id].lock);
  445.     atomic_dec(&cache->cached_objs);
  446.    
  447.     return obj;
  448. }
  449.  
  450. /**
  451.  * Assure that the current magazine is empty, return pointer to it, or NULL if
  452.  * no empty magazine is available and cannot be allocated
  453.  *
  454.  * Assume mag_cache[CPU->id].lock is held
  455.  *
  456.  * We have 2 magazines bound to processor.
  457.  * First try the current.
  458.  *  If full, try the last.
  459.  *   If full, put to magazines list.
  460.  *   allocate new, exchange last & current
  461.  *
  462.  */
  463. static slab_magazine_t *make_empty_current_mag(slab_cache_t *cache)
  464. {
  465.     slab_magazine_t *cmag,*lastmag,*newmag;
  466.  
  467.     cmag = cache->mag_cache[CPU->id].current;
  468.     lastmag = cache->mag_cache[CPU->id].last;
  469.  
  470.     if (cmag) {
  471.         if (cmag->busy < cmag->size)
  472.             return cmag;
  473.         if (lastmag && lastmag->busy < lastmag->size) {
  474.             cache->mag_cache[CPU->id].last = cmag;
  475.             cache->mag_cache[CPU->id].current = lastmag;
  476.             return lastmag;
  477.         }
  478.     }
  479.     /* current | last are full | nonexistent, allocate new */
  480.     /* We do not want to sleep just because of caching */
  481.     /* Especially we do not want reclaiming to start, as
  482.      * this would deadlock */
  483.     newmag = slab_alloc(&mag_cache, FRAME_ATOMIC | FRAME_NO_RECLAIM);
  484.     if (!newmag)
  485.         return NULL;
  486.     newmag->size = SLAB_MAG_SIZE;
  487.     newmag->busy = 0;
  488.  
  489.     /* Flush last to magazine list */
  490.     if (lastmag)
  491.         put_mag_to_cache(cache, lastmag);
  492.  
  493.     /* Move current as last, save new as current */
  494.     cache->mag_cache[CPU->id].last = cmag; 
  495.     cache->mag_cache[CPU->id].current = newmag;
  496.  
  497.     return newmag;
  498. }
  499.  
  500. /**
  501.  * Put object into CPU-cache magazine
  502.  *
  503.  * @return 0 - success, -1 - could not get memory
  504.  */
  505. static int magazine_obj_put(slab_cache_t *cache, void *obj)
  506. {
  507.     slab_magazine_t *mag;
  508.  
  509.     if (!CPU)
  510.         return -1;
  511.  
  512.     spinlock_lock(&cache->mag_cache[CPU->id].lock);
  513.  
  514.     mag = make_empty_current_mag(cache);
  515.     if (!mag) {
  516.         spinlock_unlock(&cache->mag_cache[CPU->id].lock);
  517.         return -1;
  518.     }
  519.    
  520.     mag->objs[mag->busy++] = obj;
  521.  
  522.     spinlock_unlock(&cache->mag_cache[CPU->id].lock);
  523.     atomic_inc(&cache->cached_objs);
  524.     return 0;
  525. }
  526.  
  527.  
  528. /**************************************/
  529. /* Slab cache functions */
  530.  
  531. /** Return number of objects that fit in certain cache size */
  532. static unsigned int comp_objects(slab_cache_t *cache)
  533. {
  534.     if (cache->flags & SLAB_CACHE_SLINSIDE)
  535.         return ((PAGE_SIZE << cache->order) - sizeof(slab_t)) /
  536.             cache->size;
  537.     else
  538.         return (PAGE_SIZE << cache->order) / cache->size;
  539. }
  540.  
  541. /** Return wasted space in slab */
  542. static unsigned int badness(slab_cache_t *cache)
  543. {
  544.     unsigned int objects;
  545.     unsigned int ssize;
  546.  
  547.     objects = comp_objects(cache);
  548.     ssize = PAGE_SIZE << cache->order;
  549.     if (cache->flags & SLAB_CACHE_SLINSIDE)
  550.         ssize -= sizeof(slab_t);
  551.     return ssize - objects * cache->size;
  552. }
  553.  
  554. /**
  555.  * Initialize mag_cache structure in slab cache
  556.  */
  557. static void make_magcache(slab_cache_t *cache)
  558. {
  559.     unsigned int i;
  560.    
  561.     ASSERT(_slab_initialized >= 2);
  562.  
  563.     cache->mag_cache = malloc(sizeof(slab_mag_cache_t) * config.cpu_count,
  564.         0);
  565.     for (i = 0; i < config.cpu_count; i++) {
  566.         memsetb(&cache->mag_cache[i], sizeof(cache->mag_cache[i]), 0);
  567.         spinlock_initialize(&cache->mag_cache[i].lock,
  568.             "slab_maglock_cpu");
  569.     }
  570. }
  571.  
  572. /** Initialize allocated memory as a slab cache */
  573. static void
  574. _slab_cache_create(slab_cache_t *cache, char *name, size_t size, size_t align,
  575.     int (*constructor)(void *obj, int kmflag), int (*destructor)(void *obj),
  576.     int flags)
  577. {
  578.     int pages;
  579.     ipl_t ipl;
  580.  
  581.     memsetb(cache, sizeof(*cache), 0);
  582.     cache->name = name;
  583.  
  584.     if (align < sizeof(unative_t))
  585.         align = sizeof(unative_t);
  586.     size = ALIGN_UP(size, align);
  587.        
  588.     cache->size = size;
  589.  
  590.     cache->constructor = constructor;
  591.     cache->destructor = destructor;
  592.     cache->flags = flags;
  593.  
  594.     list_initialize(&cache->full_slabs);
  595.     list_initialize(&cache->partial_slabs);
  596.     list_initialize(&cache->magazines);
  597.     spinlock_initialize(&cache->slablock, "slab_lock");
  598.     spinlock_initialize(&cache->maglock, "slab_maglock");
  599.     if (!(cache->flags & SLAB_CACHE_NOMAGAZINE))
  600.         make_magcache(cache);
  601.  
  602.     /* Compute slab sizes, object counts in slabs etc. */
  603.     if (cache->size < SLAB_INSIDE_SIZE)
  604.         cache->flags |= SLAB_CACHE_SLINSIDE;
  605.  
  606.     /* Minimum slab order */
  607.     pages = SIZE2FRAMES(cache->size);
  608.     /* We need the 2^order >= pages */
  609.     if (pages == 1)
  610.         cache->order = 0;
  611.     else
  612.         cache->order = fnzb(pages - 1) + 1;
  613.  
  614.     while (badness(cache) > SLAB_MAX_BADNESS(cache)) {
  615.         cache->order += 1;
  616.     }
  617.     cache->objects = comp_objects(cache);
  618.     /* If info fits in, put it inside */
  619.     if (badness(cache) > sizeof(slab_t))
  620.         cache->flags |= SLAB_CACHE_SLINSIDE;
  621.  
  622.     /* Add cache to cache list */
  623.     ipl = interrupts_disable();
  624.     spinlock_lock(&slab_cache_lock);
  625.  
  626.     list_append(&cache->link, &slab_cache_list);
  627.  
  628.     spinlock_unlock(&slab_cache_lock);
  629.     interrupts_restore(ipl);
  630. }
  631.  
  632. /** Create slab cache  */
  633. slab_cache_t *
  634. slab_cache_create(char *name, size_t size, size_t align,
  635.     int (*constructor)(void *obj, int kmflag), int (*destructor)(void *obj),
  636.     int flags)
  637. {
  638.     slab_cache_t *cache;
  639.  
  640.     cache = slab_alloc(&slab_cache_cache, 0);
  641.     _slab_cache_create(cache, name, size, align, constructor, destructor,
  642.         flags);
  643.     return cache;
  644. }
  645.  
  646. /**
  647.  * Reclaim space occupied by objects that are already free
  648.  *
  649.  * @param flags If contains SLAB_RECLAIM_ALL, do aggressive freeing
  650.  * @return Number of freed pages
  651.  */
  652. static size_t _slab_reclaim(slab_cache_t *cache, int flags)
  653. {
  654.     unsigned int i;
  655.     slab_magazine_t *mag;
  656.     size_t frames = 0;
  657.     int magcount;
  658.    
  659.     if (cache->flags & SLAB_CACHE_NOMAGAZINE)
  660.         return 0; /* Nothing to do */
  661.  
  662.     /* We count up to original magazine count to avoid
  663.      * endless loop
  664.      */
  665.     magcount = atomic_get(&cache->magazine_counter);
  666.     while (magcount-- && (mag=get_mag_from_cache(cache, 0))) {
  667.         frames += magazine_destroy(cache,mag);
  668.         if (!(flags & SLAB_RECLAIM_ALL) && frames)
  669.             break;
  670.     }
  671.    
  672.     if (flags & SLAB_RECLAIM_ALL) {
  673.         /* Free cpu-bound magazines */
  674.         /* Destroy CPU magazines */
  675.         for (i = 0; i < config.cpu_count; i++) {
  676.             spinlock_lock(&cache->mag_cache[i].lock);
  677.  
  678.             mag = cache->mag_cache[i].current;
  679.             if (mag)
  680.                 frames += magazine_destroy(cache, mag);
  681.             cache->mag_cache[i].current = NULL;
  682.            
  683.             mag = cache->mag_cache[i].last;
  684.             if (mag)
  685.                 frames += magazine_destroy(cache, mag);
  686.             cache->mag_cache[i].last = NULL;
  687.  
  688.             spinlock_unlock(&cache->mag_cache[i].lock);
  689.         }
  690.     }
  691.  
  692.     return frames;
  693. }
  694.  
  695. /** Check that there are no slabs and remove cache from system  */
  696. void slab_cache_destroy(slab_cache_t *cache)
  697. {
  698.     ipl_t ipl;
  699.  
  700.     /* First remove cache from link, so that we don't need
  701.      * to disable interrupts later
  702.      */
  703.  
  704.     ipl = interrupts_disable();
  705.     spinlock_lock(&slab_cache_lock);
  706.  
  707.     list_remove(&cache->link);
  708.  
  709.     spinlock_unlock(&slab_cache_lock);
  710.     interrupts_restore(ipl);
  711.  
  712.     /* Do not lock anything, we assume the software is correct and
  713.      * does not touch the cache when it decides to destroy it */
  714.    
  715.     /* Destroy all magazines */
  716.     _slab_reclaim(cache, SLAB_RECLAIM_ALL);
  717.  
  718.     /* All slabs must be empty */
  719.     if (!list_empty(&cache->full_slabs) ||
  720.         !list_empty(&cache->partial_slabs))
  721.         panic("Destroying cache that is not empty.");
  722.  
  723.     if (!(cache->flags & SLAB_CACHE_NOMAGAZINE))
  724.         free(cache->mag_cache);
  725.     slab_free(&slab_cache_cache, cache);
  726. }
  727.  
  728. /** Allocate new object from cache - if no flags given, always returns memory */
  729. void *slab_alloc(slab_cache_t *cache, int flags)
  730. {
  731.     ipl_t ipl;
  732.     void *result = NULL;
  733.    
  734.     /* Disable interrupts to avoid deadlocks with interrupt handlers */
  735.     ipl = interrupts_disable();
  736.  
  737.     if (!(cache->flags & SLAB_CACHE_NOMAGAZINE)) {
  738.         result = magazine_obj_get(cache);
  739.     }
  740.     if (!result)
  741.         result = slab_obj_create(cache, flags);
  742.  
  743.     interrupts_restore(ipl);
  744.  
  745.     if (result)
  746.         atomic_inc(&cache->allocated_objs);
  747.  
  748.     return result;
  749. }
  750.  
  751. /** Return object to cache, use slab if known  */
  752. static void _slab_free(slab_cache_t *cache, void *obj, slab_t *slab)
  753. {
  754.     ipl_t ipl;
  755.  
  756.     ipl = interrupts_disable();
  757.  
  758.     if ((cache->flags & SLAB_CACHE_NOMAGAZINE) ||
  759.         magazine_obj_put(cache, obj)) {
  760.         slab_obj_destroy(cache, obj, slab);
  761.  
  762.     }
  763.     interrupts_restore(ipl);
  764.     atomic_dec(&cache->allocated_objs);
  765. }
  766.  
  767. /** Return slab object to cache */
  768. void slab_free(slab_cache_t *cache, void *obj)
  769. {
  770.     _slab_free(cache, obj, NULL);
  771. }
  772.  
  773. /* Go through all caches and reclaim what is possible */
  774. size_t slab_reclaim(int flags)
  775. {
  776.     slab_cache_t *cache;
  777.     link_t *cur;
  778.     size_t frames = 0;
  779.  
  780.     spinlock_lock(&slab_cache_lock);
  781.  
  782.     /* TODO: Add assert, that interrupts are disabled, otherwise
  783.      * memory allocation from interrupts can deadlock.
  784.      */
  785.  
  786.     for (cur = slab_cache_list.next; cur != &slab_cache_list;
  787.         cur = cur->next) {
  788.         cache = list_get_instance(cur, slab_cache_t, link);
  789.         frames += _slab_reclaim(cache, flags);
  790.     }
  791.  
  792.     spinlock_unlock(&slab_cache_lock);
  793.  
  794.     return frames;
  795. }
  796.  
  797.  
  798. /* Print list of slabs */
  799. void slab_print_list(void)
  800. {
  801.     int skip = 0;
  802.  
  803.     printf("slab name        size     pages  obj/pg slabs  cached allocated"
  804.         " ctl\n");
  805.     printf("---------------- -------- ------ ------ ------ ------ ---------"
  806.         " ---\n");
  807.  
  808.     while (true) {
  809.         slab_cache_t *cache;
  810.         link_t *cur;
  811.         ipl_t ipl;
  812.         int i;
  813.  
  814.         /*
  815.          * We must not hold the slab_cache_lock spinlock when printing
  816.          * the statistics. Otherwise we can easily deadlock if the print
  817.          * needs to allocate memory.
  818.          *
  819.          * Therefore, we walk through the slab cache list, skipping some
  820.          * amount of already processed caches during each iteration and
  821.          * gathering statistics about the first unprocessed cache. For
  822.          * the sake of printing the statistics, we realese the
  823.          * slab_cache_lock and reacquire it afterwards. Then the walk
  824.          * starts again.
  825.          *
  826.          * This limits both the efficiency and also accuracy of the
  827.          * obtained statistics. The efficiency is decreased because the
  828.          * time complexity of the algorithm is quadratic instead of
  829.          * linear. The accuracy is impacted because we drop the lock
  830.          * after processing one cache. If there is someone else
  831.          * manipulating the cache list, we might omit an arbitrary
  832.          * number of caches or process one cache multiple times.
  833.          * However, we don't bleed for this algorithm for it is only
  834.          * statistics.
  835.          */
  836.  
  837.         ipl = interrupts_disable();
  838.         spinlock_lock(&slab_cache_lock);
  839.  
  840.         for (i = 0, cur = slab_cache_list.next;
  841.             i < skip && cur != &slab_cache_list;
  842.             i++, cur = cur->next)
  843.             ;
  844.  
  845.         if (cur == &slab_cache_list) {
  846.             spinlock_unlock(&slab_cache_lock);
  847.             interrupts_restore(ipl);
  848.             break;
  849.         }
  850.  
  851.         skip++;
  852.  
  853.         cache = list_get_instance(cur, slab_cache_t, link);
  854.  
  855.         char *name = cache->name;
  856.         uint8_t order = cache->order;
  857.         size_t size = cache->size;
  858.         unsigned int objects = cache->objects;
  859.         long allocated_slabs = atomic_get(&cache->allocated_slabs);
  860.         long cached_objs = atomic_get(&cache->cached_objs);
  861.         long allocated_objs = atomic_get(&cache->allocated_objs);
  862.         int flags = cache->flags;
  863.        
  864.         spinlock_unlock(&slab_cache_lock);
  865.         interrupts_restore(ipl);
  866.        
  867.         printf("%-16s %8" PRIs " %6d %6u %6ld %6ld %9ld %-3s\n",
  868.             name, size, (1 << order), objects, allocated_slabs,
  869.             cached_objs, allocated_objs,
  870.             flags & SLAB_CACHE_SLINSIDE ? "in" : "out");
  871.     }
  872. }
  873.  
  874. void slab_cache_init(void)
  875. {
  876.     int i, size;
  877.  
  878.     /* Initialize magazine cache */
  879.     _slab_cache_create(&mag_cache, "slab_magazine",
  880.         sizeof(slab_magazine_t) + SLAB_MAG_SIZE * sizeof(void*),
  881.         sizeof(uintptr_t), NULL, NULL, SLAB_CACHE_NOMAGAZINE |
  882.         SLAB_CACHE_SLINSIDE);
  883.     /* Initialize slab_cache cache */
  884.     _slab_cache_create(&slab_cache_cache, "slab_cache",
  885.         sizeof(slab_cache_cache), sizeof(uintptr_t), NULL, NULL,
  886.         SLAB_CACHE_NOMAGAZINE | SLAB_CACHE_SLINSIDE);
  887.     /* Initialize external slab cache */
  888.     slab_extern_cache = slab_cache_create("slab_extern", sizeof(slab_t), 0,
  889.         NULL, NULL, SLAB_CACHE_SLINSIDE | SLAB_CACHE_MAGDEFERRED);
  890.  
  891.     /* Initialize structures for malloc */
  892.     for (i = 0, size = (1 << SLAB_MIN_MALLOC_W);
  893.         i < (SLAB_MAX_MALLOC_W - SLAB_MIN_MALLOC_W + 1);
  894.         i++, size <<= 1) {
  895.         malloc_caches[i] = slab_cache_create(malloc_names[i], size, 0,
  896.             NULL, NULL, SLAB_CACHE_MAGDEFERRED);
  897.     }
  898. #ifdef CONFIG_DEBUG      
  899.     _slab_initialized = 1;
  900. #endif
  901. }
  902.  
  903. /** Enable cpu_cache
  904.  *
  905.  * Kernel calls this function, when it knows the real number of
  906.  * processors.
  907.  * Allocate slab for cpucache and enable it on all existing
  908.  * slabs that are SLAB_CACHE_MAGDEFERRED
  909.  */
  910. void slab_enable_cpucache(void)
  911. {
  912.     link_t *cur;
  913.     slab_cache_t *s;
  914.  
  915. #ifdef CONFIG_DEBUG
  916.     _slab_initialized = 2;
  917. #endif
  918.  
  919.     spinlock_lock(&slab_cache_lock);
  920.    
  921.     for (cur = slab_cache_list.next; cur != &slab_cache_list;
  922.         cur = cur->next){
  923.         s = list_get_instance(cur, slab_cache_t, link);
  924.         if ((s->flags & SLAB_CACHE_MAGDEFERRED) !=
  925.             SLAB_CACHE_MAGDEFERRED)
  926.             continue;
  927.         make_magcache(s);
  928.         s->flags &= ~SLAB_CACHE_MAGDEFERRED;
  929.     }
  930.  
  931.     spinlock_unlock(&slab_cache_lock);
  932. }
  933.  
  934. /**************************************/
  935. /* kalloc/kfree functions             */
  936. void *malloc(unsigned int size, int flags)
  937. {
  938.     ASSERT(_slab_initialized);
  939.     ASSERT(size <= (1 << SLAB_MAX_MALLOC_W));
  940.    
  941.     if (size < (1 << SLAB_MIN_MALLOC_W))
  942.         size = (1 << SLAB_MIN_MALLOC_W);
  943.  
  944.     int idx = fnzb(size - 1) - SLAB_MIN_MALLOC_W + 1;
  945.  
  946.     return slab_alloc(malloc_caches[idx], flags);
  947. }
  948.  
  949. void *realloc(void *ptr, unsigned int size, int flags)
  950. {
  951.     ASSERT(_slab_initialized);
  952.     ASSERT(size <= (1 << SLAB_MAX_MALLOC_W));
  953.    
  954.     void *new_ptr;
  955.    
  956.     if (size > 0) {
  957.         if (size < (1 << SLAB_MIN_MALLOC_W))
  958.             size = (1 << SLAB_MIN_MALLOC_W);
  959.         int idx = fnzb(size - 1) - SLAB_MIN_MALLOC_W + 1;
  960.        
  961.         new_ptr = slab_alloc(malloc_caches[idx], flags);
  962.     } else
  963.         new_ptr = NULL;
  964.    
  965.     if ((new_ptr != NULL) && (ptr != NULL)) {
  966.         slab_t *slab = obj2slab(ptr);
  967.         memcpy(new_ptr, ptr, min(size, slab->cache->size));
  968.     }
  969.    
  970.     if (ptr != NULL)
  971.         free(ptr);
  972.    
  973.     return new_ptr;
  974. }
  975.  
  976. void free(void *ptr)
  977. {
  978.     if (!ptr)
  979.         return;
  980.  
  981.     slab_t *slab = obj2slab(ptr);
  982.     _slab_free(slab->cache, ptr, slab);
  983. }
  984.  
  985. /** @}
  986.  */
  987.