Subversion Repositories HelenOS

Rev

Rev 3207 | Rev 3972 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

/*
 * Copyright (c) 2001-2005 Jakub Jermar
 * Copyright (c) 2005 Sergey Bondari
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * - Redistributions of source code must retain the above copyright
 *   notice, this list of conditions and the following disclaimer.
 * - Redistributions in binary form must reproduce the above copyright
 *   notice, this list of conditions and the following disclaimer in the
 *   documentation and/or other materials provided with the distribution.
 * - The name of the author may not be used to endorse or promote products
 *   derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/** @addtogroup genericmm
 * @{
 */

/**
 * @file
 * @brief   Physical frame allocator.
 *
 * This file contains the physical frame allocator and memory zone management.
 * The frame allocator is built on top of the buddy allocator.
 *
 * @see buddy.c
 */

/*
 * Locking order
 *
 * In order to access particular zone, the process must first lock
 * the zones.lock, then lock the zone and then unlock the zones.lock.
 * This insures, that we can fiddle with the zones in runtime without
 * affecting the processes. 
 *
 */

#include <arch/types.h>
#include <mm/frame.h>
#include <mm/as.h>
#include <panic.h>
#include <debug.h>
#include <adt/list.h>
#include <synch/spinlock.h>
#include <synch/mutex.h>
#include <synch/condvar.h>
#include <arch/asm.h>
#include <arch.h>
#include <print.h>
#include <align.h>
#include <mm/slab.h>
#include <bitops.h>
#include <macros.h>
#include <config.h>

typedef struct {
    count_t refcount;   /**< tracking of shared frames  */
    uint8_t buddy_order;    /**< buddy system block order */
    link_t buddy_link;  /**< link to the next free block inside one
                     order */
    void *parent;           /**< If allocated by slab, this points there */
} frame_t;

typedef struct {
    SPINLOCK_DECLARE(lock); /**< this lock protects everything below */
    pfn_t base;     /**< frame_no of the first frame in the frames
                     array */
    count_t count;          /**< Size of zone */

    frame_t *frames;    /**< array of frame_t structures in this
                     zone */
    count_t free_count; /**< number of free frame_t structures */
    count_t busy_count; /**< number of busy frame_t structures */
    
    buddy_system_t *buddy_system; /**< buddy system for the zone */
    int flags;
} zone_t;

/*
 * The zoneinfo.lock must be locked when accessing zoneinfo structure.
 * Some of the attributes in zone_t structures are 'read-only'
 */

typedef struct {
    SPINLOCK_DECLARE(lock);
    unsigned int count;
    zone_t *info[ZONES_MAX];
} zones_t;

static zones_t zones;

/*
 * Synchronization primitives used to sleep when there is no memory
 * available.
 */
mutex_t mem_avail_mtx;
condvar_t mem_avail_cv;
unsigned long mem_avail_frames = 0; /**< Number of available frames. */
unsigned long mem_avail_gen = 0;    /**< Generation counter. */

/********************/
/* Helper functions */
/********************/

static inline index_t frame_index(zone_t *zone, frame_t *frame)
{
    return (index_t) (frame - zone->frames);
}

static inline index_t frame_index_abs(zone_t *zone, frame_t *frame)
{
    return (index_t) (frame - zone->frames) + zone->base;
}

static inline int frame_index_valid(zone_t *zone, index_t index)
{
    return (index < zone->count);
}

/** Compute pfn_t from frame_t pointer & zone pointer */
static index_t make_frame_index(zone_t *zone, frame_t *frame)
{
    return (frame - zone->frames);
}

/** Initialize frame structure.
 *
 * @param frame     Frame structure to be initialized.
 */
static void frame_initialize(frame_t *frame)
{
    frame->refcount = 1;
    frame->buddy_order = 0;
}

/**********************/
/* Zoneinfo functions */
/**********************/

/** Insert-sort zone into zones list.
 *
 * @param newzone   New zone to be inserted into zone list.
 * @return      Zone number on success, -1 on error.
 */
static int zones_add_zone(zone_t *newzone)
{
    unsigned int i, j;
    ipl_t ipl;
    zone_t *z;

    ipl = interrupts_disable();
    spinlock_lock(&zones.lock);
    
    /* Try to merge */
    if (zones.count + 1 == ZONES_MAX) {
        printf("Maximum zone count %u exceeded!\n", ZONES_MAX);
        spinlock_unlock(&zones.lock);
        interrupts_restore(ipl);
        return -1;
    }
    
    for (i = 0; i < zones.count; i++) {
        /* Check for overflow */
        z = zones.info[i];
        if (overlaps(newzone->base, newzone->count, z->base,
            z->count)) {
            printf("Zones overlap!\n");
            return -1;
        }
        if (newzone->base < z->base)
            break;
    }
    
    /* Move other zones up */
    for (j = i; j < zones.count; j++)
        zones.info[j + 1] = zones.info[j];
    
    zones.info[i] = newzone;
    zones.count++;
    
    spinlock_unlock(&zones.lock);
    interrupts_restore(ipl);

    return i;
}

/** Try to find a zone where can we find the frame.
 *
 * Assume interrupts are disabled.
 *
 * @param frame     Frame number contained in zone.
 * @param pzone     If not null, it is used as zone hint. Zone index is
 *          filled into the variable on success. 
 * @return      Pointer to locked zone containing frame.
 */
static zone_t *find_zone_and_lock(pfn_t frame, unsigned int *pzone)
{
    unsigned int i;
    unsigned int hint = pzone ? *pzone : 0;
    zone_t *z;
    
    spinlock_lock(&zones.lock);

    if (hint >= zones.count)
        hint = 0;
    
    i = hint;
    do {
        z = zones.info[i];
        spinlock_lock(&z->lock);
        if (z->base <= frame && z->base + z->count > frame) {
            /* Unlock the global lock */
            spinlock_unlock(&zones.lock); 
            if (pzone)
                *pzone = i;
            return z;
        }
        spinlock_unlock(&z->lock);

        i++;
        if (i >= zones.count)
            i = 0;
    } while (i != hint);

    spinlock_unlock(&zones.lock);
    return NULL;
}

/** @return True if zone can allocate specified order */
static int zone_can_alloc(zone_t *z, uint8_t order)
{
    return buddy_system_can_alloc(z->buddy_system, order);
}

/** Find and lock zone that can allocate order frames.
 *
 * Assume interrupts are disabled.
 *
 * @param order     Size (2^order) of free space we are trying to find.
 * @param flags     Required flags of the target zone.
 * @param pzone     Pointer to preferred zone or NULL, on return contains
 *          zone number.
 */
static zone_t *
find_free_zone_and_lock(uint8_t order, int flags, unsigned int *pzone)
{
    unsigned int i;
    zone_t *z;
    unsigned int hint = pzone ? *pzone : 0;
    
    /* Mask off flags that are not applicable. */
    flags &= FRAME_LOW_4_GiB;

    spinlock_lock(&zones.lock);
    if (hint >= zones.count)
        hint = 0;
    i = hint;
    do {
        z = zones.info[i];
        
        spinlock_lock(&z->lock);

        /*
         * Check whether the zone meets the search criteria.
         */
        if ((z->flags & flags) == flags) {
            /*
             * Check if the zone has 2^order frames area available.
             */
            if (zone_can_alloc(z, order)) {
                spinlock_unlock(&zones.lock);
                if (pzone)
                    *pzone = i;
                return z;
            }
        }
        spinlock_unlock(&z->lock);
        if (++i >= zones.count)
            i = 0;
    } while (i != hint);
    spinlock_unlock(&zones.lock);
    return NULL;
}

/**************************/
/* Buddy system functions */
/**************************/

/** Buddy system find_block implementation.
 *
 * Find block that is parent of current list.
 * That means go to lower addresses, until such block is found
 *
 * @param order     Order of parent must be different then this
 *          parameter!!
 */
static link_t *zone_buddy_find_block(buddy_system_t *b, link_t *child,
    uint8_t order)
{
    frame_t *frame;
    zone_t *zone;
    index_t index;
    
    frame = list_get_instance(child, frame_t, buddy_link);
    zone = (zone_t *) b->data;

    index = frame_index(zone, frame);
    do {
        if (zone->frames[index].buddy_order != order) {
            return &zone->frames[index].buddy_link;
        }
    } while(index-- > 0);
    return NULL;
}

/** Buddy system find_buddy implementation.
 *
 * @param b     Buddy system.
 * @param block     Block for which buddy should be found.
 *
 * @return      Buddy for given block if found.
 */
static link_t *zone_buddy_find_buddy(buddy_system_t *b, link_t *block) 
{
    frame_t *frame;
    zone_t *zone;
    index_t index;
    bool is_left, is_right;

    frame = list_get_instance(block, frame_t, buddy_link);
    zone = (zone_t *) b->data;
    ASSERT(IS_BUDDY_ORDER_OK(frame_index_abs(zone, frame),
        frame->buddy_order));
    
    is_left = IS_BUDDY_LEFT_BLOCK_ABS(zone, frame);
    is_right = IS_BUDDY_RIGHT_BLOCK_ABS(zone, frame);

    ASSERT(is_left ^ is_right);
    if (is_left) {
        index = (frame_index(zone, frame)) +
            (1 << frame->buddy_order);
    } else {    /* if (is_right) */
        index = (frame_index(zone, frame)) -
            (1 << frame->buddy_order);
    }
    
    if (frame_index_valid(zone, index)) {
        if (zone->frames[index].buddy_order == frame->buddy_order && 
            zone->frames[index].refcount == 0) {
            return &zone->frames[index].buddy_link;
        }
    }

    return NULL;    
}

/** Buddy system bisect implementation.
 *
 * @param b     Buddy system.
 * @param block     Block to bisect.
 *
 * @return      Right block.
 */
static link_t *zone_buddy_bisect(buddy_system_t *b, link_t *block)
{
    frame_t *frame_l, *frame_r;

    frame_l = list_get_instance(block, frame_t, buddy_link);
    frame_r = (frame_l + (1 << (frame_l->buddy_order - 1)));
    
    return &frame_r->buddy_link;
}

/** Buddy system coalesce implementation.
 *
 * @param b     Buddy system.
 * @param block_1   First block.
 * @param block_2   First block's buddy.
 *
 * @return      Coalesced block (actually block that represents lower
 *          address).
 */
static link_t *zone_buddy_coalesce(buddy_system_t *b, link_t *block_1, 
    link_t *block_2) 
{
    frame_t *frame1, *frame2;
    
    frame1 = list_get_instance(block_1, frame_t, buddy_link);
    frame2 = list_get_instance(block_2, frame_t, buddy_link);
    
    return frame1 < frame2 ? block_1 : block_2;
}

/** Buddy system set_order implementation.
 *
 * @param b     Buddy system.
 * @param block     Buddy system block.
 * @param order     Order to set.
 */
static void zone_buddy_set_order(buddy_system_t *b, link_t *block,
    uint8_t order)
{
    frame_t *frame;
    frame = list_get_instance(block, frame_t, buddy_link);
    frame->buddy_order = order;
}

/** Buddy system get_order implementation.
 *
 * @param b     Buddy system.
 * @param block     Buddy system block.
 *
 * @return      Order of block.
 */
static uint8_t zone_buddy_get_order(buddy_system_t *b, link_t *block)
{
    frame_t *frame;
    frame = list_get_instance(block, frame_t, buddy_link);
    return frame->buddy_order;
}

/** Buddy system mark_busy implementation.
 *
 * @param b     Buddy system.
 * @param block     Buddy system block.
 */
static void zone_buddy_mark_busy(buddy_system_t *b, link_t * block)
{
    frame_t * frame;

    frame = list_get_instance(block, frame_t, buddy_link);
    frame->refcount = 1;
}

/** Buddy system mark_available implementation.
 *
 * @param b     Buddy system.
 * @param block     Buddy system block.
 */
static void zone_buddy_mark_available(buddy_system_t *b, link_t *block)
{
    frame_t *frame;
    frame = list_get_instance(block, frame_t, buddy_link);
    frame->refcount = 0;
}

static buddy_system_operations_t zone_buddy_system_operations = {
    .find_buddy = zone_buddy_find_buddy,
    .bisect = zone_buddy_bisect,
    .coalesce = zone_buddy_coalesce,
    .set_order = zone_buddy_set_order,
    .get_order = zone_buddy_get_order,
    .mark_busy = zone_buddy_mark_busy,
    .mark_available = zone_buddy_mark_available,
    .find_block = zone_buddy_find_block
};

/******************/
/* Zone functions */
/******************/

/** Allocate frame in particular zone.
 *
 * Assume zone is locked.
 * Panics if allocation is impossible.
 *
 * @param zone      Zone to allocate from.
 * @param order     Allocate exactly 2^order frames.
 *
 * @return      Frame index in zone.
 *
 */
static pfn_t zone_frame_alloc(zone_t *zone, uint8_t order)
{
    pfn_t v;
    link_t *tmp;
    frame_t *frame;

    /* Allocate frames from zone buddy system */
    tmp = buddy_system_alloc(zone->buddy_system, order);
    
    ASSERT(tmp);
    
    /* Update zone information. */
    zone->free_count -= (1 << order);
    zone->busy_count += (1 << order);

    /* Frame will be actually a first frame of the block. */
    frame = list_get_instance(tmp, frame_t, buddy_link);
    
    /* get frame address */
    v = make_frame_index(zone, frame);
    return v;
}

/** Free frame from zone.
 *
 * Assume zone is locked.
 *
 * @param zone      Pointer to zone from which the frame is to be freed.
 * @param frame_idx Frame index relative to zone.
 */
static void zone_frame_free(zone_t *zone, index_t frame_idx)
{
    frame_t *frame;
    uint8_t order;

    frame = &zone->frames[frame_idx];
    
    /* remember frame order */
    order = frame->buddy_order;

    ASSERT(frame->refcount);

    if (!--frame->refcount) {
        buddy_system_free(zone->buddy_system, &frame->buddy_link);
    
        /* Update zone information. */
        zone->free_count += (1 << order);
        zone->busy_count -= (1 << order);
    }
}

/** Return frame from zone. */
static frame_t *zone_get_frame(zone_t *zone, index_t frame_idx)
{
    ASSERT(frame_idx < zone->count);
    return &zone->frames[frame_idx];
}

/** Mark frame in zone unavailable to allocation. */
static void zone_mark_unavailable(zone_t *zone, index_t frame_idx)
{
    frame_t *frame;
    link_t *link;

    frame = zone_get_frame(zone, frame_idx);
    if (frame->refcount)
        return;
    link = buddy_system_alloc_block(zone->buddy_system,
        &frame->buddy_link);
    ASSERT(link);
    zone->free_count--;

    mutex_lock(&mem_avail_mtx);
    mem_avail_frames--;
    mutex_unlock(&mem_avail_mtx);
}

/** Join two zones.
 *
 * Expect zone_t *z to point to space at least zone_conf_size large.
 *
 * Assume z1 & z2 are locked.
 *
 * @param z     Target zone structure pointer.
 * @param z1        Zone to merge.
 * @param z2        Zone to merge.
 */
static void _zone_merge(zone_t *z, zone_t *z1, zone_t *z2)
{
    uint8_t max_order;
    unsigned int i;
    int z2idx;
    pfn_t frame_idx;
    frame_t *frame;

    ASSERT(!overlaps(z1->base, z1->count, z2->base, z2->count));
    ASSERT(z1->base < z2->base);

    spinlock_initialize(&z->lock, "zone_lock");
    z->base = z1->base;
    z->count = z2->base + z2->count - z1->base;
    z->flags = z1->flags & z2->flags;

    z->free_count = z1->free_count + z2->free_count;
    z->busy_count = z1->busy_count + z2->busy_count;
    
    max_order = fnzb(z->count);

    z->buddy_system = (buddy_system_t *) &z[1];
    buddy_system_create(z->buddy_system, max_order,
        &zone_buddy_system_operations, (void *) z);

    z->frames = (frame_t *)((uint8_t *) z->buddy_system +
        buddy_conf_size(max_order));
    for (i = 0; i < z->count; i++) {
        /* This marks all frames busy */
        frame_initialize(&z->frames[i]);
    }
    /* Copy frames from both zones to preserve full frame orders,
     * parents etc. Set all free frames with refcount=0 to 1, because
     * we add all free frames to buddy allocator later again, clear
     * order to 0. Don't set busy frames with refcount=0, as they
     * will not be reallocated during merge and it would make later
     * problems with allocation/free.
     */
    for (i = 0; i < z1->count; i++)
        z->frames[i] = z1->frames[i];
    for (i = 0; i < z2->count; i++) {
        z2idx = i + (z2->base - z1->base);
        z->frames[z2idx] = z2->frames[i];
    }
    i = 0;
    while (i < z->count) {
        if (z->frames[i].refcount) {
            /* skip busy frames */
            i += 1 << z->frames[i].buddy_order;
        } else { /* Free frames, set refcount=1 */
            /* All free frames have refcount=0, we need not
             * to check the order */
            z->frames[i].refcount = 1;
            z->frames[i].buddy_order = 0;
            i++;
        }
    }
    /* Add free blocks from the 2 original zones */
    while (zone_can_alloc(z1, 0)) {
        frame_idx = zone_frame_alloc(z1, 0);
        frame = &z->frames[frame_idx];
        frame->refcount = 0;
        buddy_system_free(z->buddy_system, &frame->buddy_link);
    }
    while (zone_can_alloc(z2, 0)) {
        frame_idx = zone_frame_alloc(z2, 0);
        frame = &z->frames[frame_idx + (z2->base - z1->base)];
        frame->refcount = 0;
        buddy_system_free(z->buddy_system, &frame->buddy_link);
    }
}

/** Return old configuration frames into the zone.
 *
 * We have several cases
 * - the conf. data is outside of zone -> exit, shall we call frame_free??
 * - the conf. data was created by zone_create or 
 *   updated with reduce_region -> free every frame
 *
 * @param newzone   The actual zone where freeing should occur.
 * @param oldzone   Pointer to old zone configuration data that should
 *          be freed from new zone.
 */
static void return_config_frames(zone_t *newzone, zone_t *oldzone)
{
    pfn_t pfn;
    frame_t *frame;
    count_t cframes;
    unsigned int i;

    pfn = ADDR2PFN((uintptr_t)KA2PA(oldzone));
    cframes = SIZE2FRAMES(zone_conf_size(oldzone->count));
    
    if (pfn < newzone->base || pfn >= newzone->base + newzone->count)
        return;

    frame = &newzone->frames[pfn - newzone->base];
    ASSERT(!frame->buddy_order);

    for (i = 0; i < cframes; i++) {
        newzone->busy_count++;
        zone_frame_free(newzone, pfn+i-newzone->base);
    }
}

/** Reduce allocated block to count of order 0 frames.
 *
 * The allocated block need 2^order frames of space. Reduce all frames
 * in block to order 0 and free the unneeded frames. This means, that 
 * when freeing the previously allocated block starting with frame_idx, 
 * you have to free every frame.
 *
 * @param zone 
 * @param frame_idx     Index to block.
 * @param count         Allocated space in block.
 */
static void zone_reduce_region(zone_t *zone, pfn_t frame_idx, count_t count)
{
    count_t i;
    uint8_t order;
    frame_t *frame;
    
    ASSERT(frame_idx + count < zone->count);

    order = zone->frames[frame_idx].buddy_order;
    ASSERT((count_t) (1 << order) >= count);

    /* Reduce all blocks to order 0 */
    for (i = 0; i < (count_t) (1 << order); i++) {
        frame = &zone->frames[i + frame_idx];
        frame->buddy_order = 0;
        if (!frame->refcount)
            frame->refcount = 1;
        ASSERT(frame->refcount == 1);
    }
    /* Free unneeded frames */
    for (i = count; i < (count_t) (1 << order); i++) {
        zone_frame_free(zone, i + frame_idx);
    }
}

/** Merge zones z1 and z2.
 *
 * - the zones must be 2 zones with no zone existing in between,
 *   which means that z2 = z1+1
 *
 * - When you create a new zone, the frame allocator configuration does
 *   not to be 2^order size. Once the allocator is running it is no longer
 *   possible, merged configuration data occupies more space :-/
 */
void zone_merge(unsigned int z1, unsigned int z2)
{
    ipl_t ipl;
    zone_t *zone1, *zone2, *newzone;
    unsigned int cframes;
    uint8_t order;
    unsigned int i;
    pfn_t pfn;

    ipl = interrupts_disable();
    spinlock_lock(&zones.lock);

    if ((z1 >= zones.count) || (z2 >= zones.count))
        goto errout;
    /* We can join only 2 zones with none existing inbetween */
    if (z2 - z1 != 1)
        goto errout;

    zone1 = zones.info[z1];
    zone2 = zones.info[z2];
    spinlock_lock(&zone1->lock);
    spinlock_lock(&zone2->lock);

    cframes = SIZE2FRAMES(zone_conf_size(zone2->base + zone2->count -
        zone1->base));
    if (cframes == 1)
        order = 0;
    else 
        order = fnzb(cframes - 1) + 1;

    /* Allocate zonedata inside one of the zones */
    if (zone_can_alloc(zone1, order))
        pfn = zone1->base + zone_frame_alloc(zone1, order);
    else if (zone_can_alloc(zone2, order))
        pfn = zone2->base + zone_frame_alloc(zone2, order);
    else
        goto errout2;

    newzone = (zone_t *) PA2KA(PFN2ADDR(pfn));

    _zone_merge(newzone, zone1, zone2);

    /* Free unneeded config frames */
    zone_reduce_region(newzone, pfn - newzone->base,  cframes);
    /* Subtract zone information from busy frames */
    newzone->busy_count -= cframes;

    /* Replace existing zones in zoneinfo list */
    zones.info[z1] = newzone;
    for (i = z2 + 1; i < zones.count; i++)
        zones.info[i - 1] = zones.info[i];
    zones.count--;

    /* Free old zone information */
    return_config_frames(newzone, zone1);
    return_config_frames(newzone, zone2);
errout2:
    /* Nobody is allowed to enter to zone, so we are safe
     * to touch the spinlocks last time */
    spinlock_unlock(&zone1->lock);
    spinlock_unlock(&zone2->lock);
errout:
    spinlock_unlock(&zones.lock);
    interrupts_restore(ipl);
}

/** Merge all zones into one big zone.
 *
 * It is reasonable to do this on systems whose bios reports parts in chunks,
 * so that we could have 1 zone (it's faster).
 */
void zone_merge_all(void)
{
    int count = zones.count;

    while (zones.count > 1 && --count) {
        zone_merge(0, 1);
        break;
    }
}

/** Create new frame zone.
 *
 * @param start     Physical address of the first frame within the zone.
 * @param count     Count of frames in zone.
 * @param z     Address of configuration information of zone.
 * @param flags     Zone flags.
 *
 * @return      Initialized zone.
 */
static void zone_construct(pfn_t start, count_t count, zone_t *z, int flags)
{
    unsigned int i;
    uint8_t max_order;

    spinlock_initialize(&z->lock, "zone_lock");
    z->base = start;
    z->count = count;

    /* Mask off flags that are calculated automatically. */
    flags &= ~FRAME_LOW_4_GiB;
    /* Determine calculated flags. */
    if (z->base + count < (1ULL << (32 - FRAME_WIDTH))) /* 4 GiB */
        flags |= FRAME_LOW_4_GiB;

    z->flags = flags;

    z->free_count = count;
    z->busy_count = 0;

    /*
     * Compute order for buddy system, initialize
     */
    max_order = fnzb(count);
    z->buddy_system = (buddy_system_t *)&z[1];
    
    buddy_system_create(z->buddy_system, max_order, 
        &zone_buddy_system_operations, (void *) z);
    
    /* Allocate frames _after_ the conframe */
    /* Check sizes */
    z->frames = (frame_t *)((uint8_t *) z->buddy_system +
        buddy_conf_size(max_order));
    for (i = 0; i < count; i++) {
        frame_initialize(&z->frames[i]);
    }
    
    /* Stuffing frames */
    for (i = 0; i < count; i++) {
        z->frames[i].refcount = 0;
        buddy_system_free(z->buddy_system, &z->frames[i].buddy_link);
    }
}

/** Compute configuration data size for zone.
 *
 * @param count     Size of zone in frames.
 * @return      Size of zone configuration info (in bytes).
 */
uintptr_t zone_conf_size(count_t count)
{
    int size = sizeof(zone_t) + count * sizeof(frame_t);
    int max_order;

    max_order = fnzb(count);
    size += buddy_conf_size(max_order);
    return size;
}

/** Create and add zone to system.
 *
 * @param start     First frame number (absolute).
 * @param count     Size of zone in frames.
 * @param confframe Where configuration frames are supposed to be.
 *          Automatically checks, that we will not disturb the 
 *          kernel and possibly init.  If confframe is given
 *          _outside_ this zone, it is expected, that the area is
 *          already marked BUSY and big enough to contain
 *          zone_conf_size() amount of data.  If the confframe is
 *          inside the area, the zone free frame information is
 *          modified not to include it.
 *
 * @return      Zone number or -1 on error.
 */
int zone_create(pfn_t start, count_t count, pfn_t confframe, int flags)
{
    zone_t *z;
    uintptr_t addr;
    count_t confcount;
    unsigned int i;
    int znum;

    /* Theoretically we could have here 0, practically make sure
     * nobody tries to do that. If some platform requires, remove
     * the assert
     */
    ASSERT(confframe);
    /* If conframe is supposed to be inside our zone, then make sure
     * it does not span kernel & init
     */
    confcount = SIZE2FRAMES(zone_conf_size(count));
    if (confframe >= start && confframe < start + count) {
        for (; confframe < start + count; confframe++) {
            addr = PFN2ADDR(confframe);
            if (overlaps(addr, PFN2ADDR(confcount),
                KA2PA(config.base), config.kernel_size))
                continue;
            
            if (overlaps(addr, PFN2ADDR(confcount),
                KA2PA(config.stack_base), config.stack_size))
                continue;
            
            bool overlap = false;
            count_t i;
            for (i = 0; i < init.cnt; i++)
                if (overlaps(addr, PFN2ADDR(confcount),
                    KA2PA(init.tasks[i].addr),
                    init.tasks[i].size)) {
                    overlap = true;
                    break;
                }
            if (overlap)
                continue;
            
            break;
        }
        if (confframe >= start + count)
            panic("Cannot find configuration data for zone.");
    }

    z = (zone_t *) PA2KA(PFN2ADDR(confframe));
    zone_construct(start, count, z, flags);
    znum = zones_add_zone(z);
    if (znum == -1)
        return -1;

    mutex_lock(&mem_avail_mtx);
    mem_avail_frames += count;
    mutex_unlock(&mem_avail_mtx);

    /* If confdata in zone, mark as unavailable */
    if (confframe >= start && confframe < start + count)
        for (i = confframe; i < confframe + confcount; i++) {
            zone_mark_unavailable(z, i - z->base);
        }
    
    return znum;
}

/***************************************/
/* Frame functions */

/** Set parent of frame. */
void frame_set_parent(pfn_t pfn, void *data, unsigned int hint)
{
    zone_t *zone = find_zone_and_lock(pfn, &hint);

    ASSERT(zone);

    zone_get_frame(zone, pfn - zone->base)->parent = data;
    spinlock_unlock(&zone->lock);
}

void *frame_get_parent(pfn_t pfn, unsigned int hint)
{
    zone_t *zone = find_zone_and_lock(pfn, &hint);
    void *res;

    ASSERT(zone);
    res = zone_get_frame(zone, pfn - zone->base)->parent;
    
    spinlock_unlock(&zone->lock);
    return res;
}

/** Allocate power-of-two frames of physical memory.
 *
 * @param order     Allocate exactly 2^order frames.
 * @param flags     Flags for host zone selection and address processing.
 * @param pzone     Preferred zone.
 *
 * @return      Physical address of the allocated frame.
 *
 */
void *frame_alloc_generic(uint8_t order, int flags, unsigned int *pzone)
{
    ipl_t ipl;
    int freed;
    pfn_t v;
    zone_t *zone;
    unsigned long gen = 0;
    
loop:
    ipl = interrupts_disable();
    
    /*
     * First, find suitable frame zone.
     */
    zone = find_free_zone_and_lock(order, flags, pzone);
    
    /* If no memory, reclaim some slab memory,
       if it does not help, reclaim all */
    if (!zone && !(flags & FRAME_NO_RECLAIM)) {
        freed = slab_reclaim(0);
        if (freed)
            zone = find_free_zone_and_lock(order, flags, pzone);
        if (!zone) {
            freed = slab_reclaim(SLAB_RECLAIM_ALL);
            if (freed)
                zone = find_free_zone_and_lock(order, flags,
                    pzone);
        }
    }
    if (!zone) {
        /*
         * Sleep until some frames are available again.
         */
        if (flags & FRAME_ATOMIC) {
            interrupts_restore(ipl);
            return 0;
        }
        
#ifdef CONFIG_DEBUG
        unsigned long avail;

        mutex_lock(&mem_avail_mtx);
        avail = mem_avail_frames;
        mutex_unlock(&mem_avail_mtx);

        printf("Thread %" PRIu64 " waiting for %u frames, "
            "%u available.\n", THREAD->tid, 1ULL << order, avail);
#endif

        mutex_lock(&mem_avail_mtx);
        while ((mem_avail_frames < (1ULL << order)) ||
            gen == mem_avail_gen)
            condvar_wait(&mem_avail_cv, &mem_avail_mtx);
        gen = mem_avail_gen;
        mutex_unlock(&mem_avail_mtx);

#ifdef CONFIG_DEBUG
        mutex_lock(&mem_avail_mtx);
        avail = mem_avail_frames;
        mutex_unlock(&mem_avail_mtx);

        printf("Thread %" PRIu64 " woken up, %u frames available.\n",
            THREAD->tid, avail);
#endif

        interrupts_restore(ipl);
        goto loop;
    }
    
    v = zone_frame_alloc(zone, order);
    v += zone->base;

    spinlock_unlock(&zone->lock);
    
    mutex_lock(&mem_avail_mtx);
    mem_avail_frames -= (1ULL << order);
    mutex_unlock(&mem_avail_mtx);

    interrupts_restore(ipl);

    if (flags & FRAME_KA)
        return (void *)PA2KA(PFN2ADDR(v));
    return (void *)PFN2ADDR(v);
}

/** Free a frame.
 *
 * Find respective frame structure for supplied physical frame address.
 * Decrement frame reference count.
 * If it drops to zero, move the frame structure to free list.
 *
 * @param frame     Physical Address of of the frame to be freed.
 */
void frame_free(uintptr_t frame)
{
    ipl_t ipl;
    zone_t *zone;
    pfn_t pfn = ADDR2PFN(frame);

    ipl = interrupts_disable();

    /*
     * First, find host frame zone for addr.
     */
    zone = find_zone_and_lock(pfn, NULL);
    ASSERT(zone);
    
    zone_frame_free(zone, pfn - zone->base);
    
    spinlock_unlock(&zone->lock);
    
    /*
     * Signal that some memory has been freed.
     */
    mutex_lock(&mem_avail_mtx);
    mem_avail_frames++;
    mem_avail_gen++;
    condvar_broadcast(&mem_avail_cv);
    mutex_unlock(&mem_avail_mtx);

    interrupts_restore(ipl);
}

/** Add reference to frame.
 *
 * Find respective frame structure for supplied PFN and
 * increment frame reference count.
 *
 * @param pfn       Frame number of the frame to be freed.
 */
void frame_reference_add(pfn_t pfn)
{
    ipl_t ipl;
    zone_t *zone;
    frame_t *frame;

    ipl = interrupts_disable();
    
    /*
     * First, find host frame zone for addr.
     */
    zone = find_zone_and_lock(pfn, NULL);
    ASSERT(zone);
    
    frame = &zone->frames[pfn - zone->base];
    frame->refcount++;
    
    spinlock_unlock(&zone->lock);
    interrupts_restore(ipl);
}

/** Mark given range unavailable in frame zones. */
void frame_mark_unavailable(pfn_t start, count_t count)
{
    unsigned int i;
    zone_t *zone;
    unsigned int prefzone = 0;
    
    for (i = 0; i < count; i++) {
        zone = find_zone_and_lock(start + i, &prefzone);
        if (!zone) /* PFN not found */
            continue;
        zone_mark_unavailable(zone, start + i - zone->base);

        spinlock_unlock(&zone->lock);
    }
}

/** Initialize physical memory management. */
void frame_init(void)
{
    if (config.cpu_active == 1) {
        zones.count = 0;
        spinlock_initialize(&zones.lock, "zones.lock");
        mutex_initialize(&mem_avail_mtx, MUTEX_ACTIVE);
        condvar_initialize(&mem_avail_cv);
    }
    /* Tell the architecture to create some memory */
    frame_arch_init();
    if (config.cpu_active == 1) {
        frame_mark_unavailable(ADDR2PFN(KA2PA(config.base)),
            SIZE2FRAMES(config.kernel_size));
        frame_mark_unavailable(ADDR2PFN(KA2PA(config.stack_base)),
            SIZE2FRAMES(config.stack_size));
        
        count_t i;
        for (i = 0; i < init.cnt; i++) {
            pfn_t pfn = ADDR2PFN(KA2PA(init.tasks[i].addr));
            frame_mark_unavailable(pfn,
                SIZE2FRAMES(init.tasks[i].size));
        }

        if (ballocs.size)
            frame_mark_unavailable(ADDR2PFN(KA2PA(ballocs.base)),
                SIZE2FRAMES(ballocs.size));

        /* Black list first frame, as allocating NULL would
         * fail in some places */
        frame_mark_unavailable(0, 1);
    }
}


/** Return total size of all zones. */
uint64_t zone_total_size(void)
{
    zone_t *zone = NULL;
    unsigned int i;
    ipl_t ipl;
    uint64_t total = 0;

    ipl = interrupts_disable();
    spinlock_lock(&zones.lock);
    
    for (i = 0; i < zones.count; i++) {
        zone = zones.info[i];
        spinlock_lock(&zone->lock);
        total += (uint64_t) FRAMES2SIZE(zone->count);
        spinlock_unlock(&zone->lock);
    }
    
    spinlock_unlock(&zones.lock);
    interrupts_restore(ipl);
    
    return total;
}

/** Prints list of zones. */
void zone_print_list(void)
{
    zone_t *zone = NULL;
    unsigned int i;
    ipl_t ipl;

#ifdef __32_BITS__  
    printf("#  base address free frames  busy frames\n");
    printf("-- ------------ ------------ ------------\n");
#endif

#ifdef __64_BITS__
    printf("#  base address         free frames  busy frames\n");
    printf("-- -------------------- ------------ ------------\n");
#endif
    
    /*
     * Because printing may require allocation of memory, we may not hold
     * the frame allocator locks when printing zone statistics.  Therefore,
     * we simply gather the statistics under the protection of the locks and
     * print the statistics when the locks have been released.
     *
     * When someone adds/removes zones while we are printing the statistics,
     * we may end up with inaccurate output (e.g. a zone being skipped from
     * the listing).
     */

    for (i = 0; ; i++) {
        uintptr_t base;
        count_t free_count;
        count_t busy_count;

        ipl = interrupts_disable();
        spinlock_lock(&zones.lock);
        
        if (i >= zones.count) {
            spinlock_unlock(&zones.lock);
            interrupts_restore(ipl);
            break;
        }

        zone = zones.info[i];
        spinlock_lock(&zone->lock);

        base = PFN2ADDR(zone->base);
        free_count = zone->free_count;
        busy_count = zone->busy_count;

        spinlock_unlock(&zone->lock);
        
        spinlock_unlock(&zones.lock);
        interrupts_restore(ipl);

#ifdef __32_BITS__
        printf("%-2u   %10p %12" PRIc " %12" PRIc "\n", i, base,
            free_count, busy_count);
#endif

#ifdef __64_BITS__
        printf("%-2u   %18p %12" PRIc " %12" PRIc "\n", i, base,
            free_count, busy_count);
#endif
        
    }
}

/** Prints zone details.
 *
 * @param num       Zone base address or zone number.
 */
void zone_print_one(unsigned int num)
{
    zone_t *zone = NULL;
    ipl_t ipl;
    unsigned int i;
    uintptr_t base;
    count_t count;
    count_t busy_count;
    count_t free_count;

    ipl = interrupts_disable();
    spinlock_lock(&zones.lock);

    for (i = 0; i < zones.count; i++) {
        if ((i == num) || (PFN2ADDR(zones.info[i]->base) == num)) {
            zone = zones.info[i];
            break;
        }
    }
    if (!zone) {
        spinlock_unlock(&zones.lock);
        interrupts_restore(ipl);
        printf("Zone not found.\n");
        return;
    }
    
    spinlock_lock(&zone->lock);
    base = PFN2ADDR(zone->base);
    count = zone->count;
    busy_count = zone->busy_count;
    free_count = zone->free_count;
    spinlock_unlock(&zone->lock);
    spinlock_unlock(&zones.lock);
    interrupts_restore(ipl);

    printf("Zone base address: %p\n", base);
    printf("Zone size: %" PRIc " frames (%" PRIs " KiB)\n", count,
        SIZE2KB(FRAMES2SIZE(count)));
    printf("Allocated space: %" PRIc " frames (%" PRIs " KiB)\n",
        busy_count, SIZE2KB(FRAMES2SIZE(busy_count)));
    printf("Available space: %" PRIc " frames (%" PRIs " KiB)\n",
        free_count, SIZE2KB(FRAMES2SIZE(free_count)));
}

/** @}
 */