/branches/arm/kernel/generic/src/mm/as.c |
---|
0,0 → 1,1952 |
/* |
* Copyright (c) 2001-2006 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Address space related functions. |
* |
* This file contains address space manipulation functions. |
* Roughly speaking, this is a higher-level client of |
* Virtual Address Translation (VAT) subsystem. |
* |
* Functionality provided by this file allows one to |
* create address spaces and create, resize and share |
* address space areas. |
* |
* @see page.c |
* |
*/ |
#include <mm/as.h> |
#include <arch/mm/as.h> |
#include <mm/page.h> |
#include <mm/frame.h> |
#include <mm/slab.h> |
#include <mm/tlb.h> |
#include <arch/mm/page.h> |
#include <genarch/mm/page_pt.h> |
#include <genarch/mm/page_ht.h> |
#include <mm/asid.h> |
#include <arch/mm/asid.h> |
#include <preemption.h> |
#include <synch/spinlock.h> |
#include <synch/mutex.h> |
#include <adt/list.h> |
#include <adt/btree.h> |
#include <proc/task.h> |
#include <proc/thread.h> |
#include <arch/asm.h> |
#include <panic.h> |
#include <debug.h> |
#include <print.h> |
#include <memstr.h> |
#include <macros.h> |
#include <arch.h> |
#include <errno.h> |
#include <config.h> |
#include <align.h> |
#include <arch/types.h> |
#include <syscall/copy.h> |
#include <arch/interrupt.h> |
#ifdef CONFIG_VIRT_IDX_DCACHE |
#include <arch/mm/cache.h> |
#endif /* CONFIG_VIRT_IDX_DCACHE */ |
/** |
* Each architecture decides what functions will be used to carry out |
* address space operations such as creating or locking page tables. |
*/ |
as_operations_t *as_operations = NULL; |
/** |
* Slab for as_t objects. |
*/ |
static slab_cache_t *as_slab; |
/** |
* This lock serializes access to the ASID subsystem. |
* It protects: |
* - inactive_as_with_asid_head list |
* - as->asid for each as of the as_t type |
* - asids_allocated counter |
*/ |
SPINLOCK_INITIALIZE(asidlock); |
/** |
* This list contains address spaces that are not active on any |
* processor and that have valid ASID. |
*/ |
LIST_INITIALIZE(inactive_as_with_asid_head); |
/** Kernel address space. */ |
as_t *AS_KERNEL = NULL; |
static int area_flags_to_page_flags(int); |
static as_area_t *find_area_and_lock(as_t *, uintptr_t); |
static bool check_area_conflicts(as_t *, uintptr_t, size_t, as_area_t *); |
static void sh_info_remove_reference(share_info_t *); |
static int as_constructor(void *obj, int flags) |
{ |
as_t *as = (as_t *) obj; |
int rc; |
link_initialize(&as->inactive_as_with_asid_link); |
mutex_initialize(&as->lock, MUTEX_PASSIVE); |
rc = as_constructor_arch(as, flags); |
return rc; |
} |
static int as_destructor(void *obj) |
{ |
as_t *as = (as_t *) obj; |
return as_destructor_arch(as); |
} |
/** Initialize address space subsystem. */ |
void as_init(void) |
{ |
as_arch_init(); |
as_slab = slab_cache_create("as_slab", sizeof(as_t), 0, |
as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED); |
AS_KERNEL = as_create(FLAG_AS_KERNEL); |
if (!AS_KERNEL) |
panic("can't create kernel address space\n"); |
} |
/** Create address space. |
* |
* @param flags Flags that influence the way in wich the address space |
* is created. |
*/ |
as_t *as_create(int flags) |
{ |
as_t *as; |
as = (as_t *) slab_alloc(as_slab, 0); |
(void) as_create_arch(as, 0); |
btree_create(&as->as_area_btree); |
if (flags & FLAG_AS_KERNEL) |
as->asid = ASID_KERNEL; |
else |
as->asid = ASID_INVALID; |
atomic_set(&as->refcount, 0); |
as->cpu_refcount = 0; |
#ifdef AS_PAGE_TABLE |
as->genarch.page_table = page_table_create(flags); |
#else |
page_table_create(flags); |
#endif |
return as; |
} |
/** Destroy adress space. |
* |
* When there are no tasks referencing this address space (i.e. its refcount is |
* zero), the address space can be destroyed. |
* |
* We know that we don't hold any spinlock. |
* |
* @param as Address space to be destroyed. |
*/ |
void as_destroy(as_t *as) |
{ |
ipl_t ipl; |
bool cond; |
DEADLOCK_PROBE_INIT(p_asidlock); |
ASSERT(atomic_get(&as->refcount) == 0); |
/* |
* Since there is no reference to this area, |
* it is safe not to lock its mutex. |
*/ |
/* |
* We need to avoid deadlock between TLB shootdown and asidlock. |
* We therefore try to take asid conditionally and if we don't succeed, |
* we enable interrupts and try again. This is done while preemption is |
* disabled to prevent nested context switches. We also depend on the |
* fact that so far no spinlocks are held. |
*/ |
preemption_disable(); |
ipl = interrupts_read(); |
retry: |
interrupts_disable(); |
if (!spinlock_trylock(&asidlock)) { |
interrupts_enable(); |
DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD); |
goto retry; |
} |
preemption_enable(); /* Interrupts disabled, enable preemption */ |
if (as->asid != ASID_INVALID && as != AS_KERNEL) { |
if (as != AS && as->cpu_refcount == 0) |
list_remove(&as->inactive_as_with_asid_link); |
asid_put(as->asid); |
} |
spinlock_unlock(&asidlock); |
/* |
* Destroy address space areas of the address space. |
* The B+tree must be walked carefully because it is |
* also being destroyed. |
*/ |
for (cond = true; cond; ) { |
btree_node_t *node; |
ASSERT(!list_empty(&as->as_area_btree.leaf_head)); |
node = list_get_instance(as->as_area_btree.leaf_head.next, |
btree_node_t, leaf_link); |
if ((cond = node->keys)) { |
as_area_destroy(as, node->key[0]); |
} |
} |
btree_destroy(&as->as_area_btree); |
#ifdef AS_PAGE_TABLE |
page_table_destroy(as->genarch.page_table); |
#else |
page_table_destroy(NULL); |
#endif |
interrupts_restore(ipl); |
slab_free(as_slab, as); |
} |
/** Create address space area of common attributes. |
* |
* The created address space area is added to the target address space. |
* |
* @param as Target address space. |
* @param flags Flags of the area memory. |
* @param size Size of area. |
* @param base Base address of area. |
* @param attrs Attributes of the area. |
* @param backend Address space area backend. NULL if no backend is used. |
* @param backend_data NULL or a pointer to an array holding two void *. |
* |
* @return Address space area on success or NULL on failure. |
*/ |
as_area_t * |
as_area_create(as_t *as, int flags, size_t size, uintptr_t base, int attrs, |
mem_backend_t *backend, mem_backend_data_t *backend_data) |
{ |
ipl_t ipl; |
as_area_t *a; |
if (base % PAGE_SIZE) |
return NULL; |
if (!size) |
return NULL; |
/* Writeable executable areas are not supported. */ |
if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE)) |
return NULL; |
ipl = interrupts_disable(); |
mutex_lock(&as->lock); |
if (!check_area_conflicts(as, base, size, NULL)) { |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return NULL; |
} |
a = (as_area_t *) malloc(sizeof(as_area_t), 0); |
mutex_initialize(&a->lock, MUTEX_PASSIVE); |
a->as = as; |
a->flags = flags; |
a->attributes = attrs; |
a->pages = SIZE2FRAMES(size); |
a->base = base; |
a->sh_info = NULL; |
a->backend = backend; |
if (backend_data) |
a->backend_data = *backend_data; |
else |
memsetb(&a->backend_data, sizeof(a->backend_data), 0); |
btree_create(&a->used_space); |
btree_insert(&as->as_area_btree, base, (void *) a, NULL); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return a; |
} |
/** Find address space area and change it. |
* |
* @param as Address space. |
* @param address Virtual address belonging to the area to be changed. |
* Must be page-aligned. |
* @param size New size of the virtual memory block starting at |
* address. |
* @param flags Flags influencing the remap operation. Currently unused. |
* |
* @return Zero on success or a value from @ref errno.h otherwise. |
*/ |
int as_area_resize(as_t *as, uintptr_t address, size_t size, int flags) |
{ |
as_area_t *area; |
ipl_t ipl; |
size_t pages; |
ipl = interrupts_disable(); |
mutex_lock(&as->lock); |
/* |
* Locate the area. |
*/ |
area = find_area_and_lock(as, address); |
if (!area) { |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return ENOENT; |
} |
if (area->backend == &phys_backend) { |
/* |
* Remapping of address space areas associated |
* with memory mapped devices is not supported. |
*/ |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return ENOTSUP; |
} |
if (area->sh_info) { |
/* |
* Remapping of shared address space areas |
* is not supported. |
*/ |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return ENOTSUP; |
} |
pages = SIZE2FRAMES((address - area->base) + size); |
if (!pages) { |
/* |
* Zero size address space areas are not allowed. |
*/ |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return EPERM; |
} |
if (pages < area->pages) { |
bool cond; |
uintptr_t start_free = area->base + pages * PAGE_SIZE; |
/* |
* Shrinking the area. |
* No need to check for overlaps. |
*/ |
/* |
* Start TLB shootdown sequence. |
*/ |
tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base + |
pages * PAGE_SIZE, area->pages - pages); |
/* |
* Remove frames belonging to used space starting from |
* the highest addresses downwards until an overlap with |
* the resized address space area is found. Note that this |
* is also the right way to remove part of the used_space |
* B+tree leaf list. |
*/ |
for (cond = true; cond;) { |
btree_node_t *node; |
ASSERT(!list_empty(&area->used_space.leaf_head)); |
node = |
list_get_instance(area->used_space.leaf_head.prev, |
btree_node_t, leaf_link); |
if ((cond = (bool) node->keys)) { |
uintptr_t b = node->key[node->keys - 1]; |
count_t c = |
(count_t) node->value[node->keys - 1]; |
unsigned int i = 0; |
if (overlaps(b, c * PAGE_SIZE, area->base, |
pages * PAGE_SIZE)) { |
if (b + c * PAGE_SIZE <= start_free) { |
/* |
* The whole interval fits |
* completely in the resized |
* address space area. |
*/ |
break; |
} |
/* |
* Part of the interval corresponding |
* to b and c overlaps with the resized |
* address space area. |
*/ |
cond = false; /* we are almost done */ |
i = (start_free - b) >> PAGE_WIDTH; |
if (!used_space_remove(area, start_free, |
c - i)) |
panic("Could not remove used " |
"space.\n"); |
} else { |
/* |
* The interval of used space can be |
* completely removed. |
*/ |
if (!used_space_remove(area, b, c)) |
panic("Could not remove used " |
"space.\n"); |
} |
for (; i < c; i++) { |
pte_t *pte; |
page_table_lock(as, false); |
pte = page_mapping_find(as, b + |
i * PAGE_SIZE); |
ASSERT(pte && PTE_VALID(pte) && |
PTE_PRESENT(pte)); |
if (area->backend && |
area->backend->frame_free) { |
area->backend->frame_free(area, |
b + i * PAGE_SIZE, |
PTE_GET_FRAME(pte)); |
} |
page_mapping_remove(as, b + |
i * PAGE_SIZE); |
page_table_unlock(as, false); |
} |
} |
} |
/* |
* Finish TLB shootdown sequence. |
*/ |
tlb_invalidate_pages(as->asid, area->base + pages * PAGE_SIZE, |
area->pages - pages); |
/* |
* Invalidate software translation caches (e.g. TSB on sparc64). |
*/ |
as_invalidate_translation_cache(as, area->base + |
pages * PAGE_SIZE, area->pages - pages); |
tlb_shootdown_finalize(); |
} else { |
/* |
* Growing the area. |
* Check for overlaps with other address space areas. |
*/ |
if (!check_area_conflicts(as, address, pages * PAGE_SIZE, |
area)) { |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return EADDRNOTAVAIL; |
} |
} |
area->pages = pages; |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return 0; |
} |
/** Destroy address space area. |
* |
* @param as Address space. |
* @param address Address within the area to be deleted. |
* |
* @return Zero on success or a value from @ref errno.h on failure. |
*/ |
int as_area_destroy(as_t *as, uintptr_t address) |
{ |
as_area_t *area; |
uintptr_t base; |
link_t *cur; |
ipl_t ipl; |
ipl = interrupts_disable(); |
mutex_lock(&as->lock); |
area = find_area_and_lock(as, address); |
if (!area) { |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return ENOENT; |
} |
base = area->base; |
/* |
* Start TLB shootdown sequence. |
*/ |
tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base, area->pages); |
/* |
* Visit only the pages mapped by used_space B+tree. |
*/ |
for (cur = area->used_space.leaf_head.next; |
cur != &area->used_space.leaf_head; cur = cur->next) { |
btree_node_t *node; |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) { |
uintptr_t b = node->key[i]; |
count_t j; |
pte_t *pte; |
for (j = 0; j < (count_t) node->value[i]; j++) { |
page_table_lock(as, false); |
pte = page_mapping_find(as, b + j * PAGE_SIZE); |
ASSERT(pte && PTE_VALID(pte) && |
PTE_PRESENT(pte)); |
if (area->backend && |
area->backend->frame_free) { |
area->backend->frame_free(area, b + |
j * PAGE_SIZE, PTE_GET_FRAME(pte)); |
} |
page_mapping_remove(as, b + j * PAGE_SIZE); |
page_table_unlock(as, false); |
} |
} |
} |
/* |
* Finish TLB shootdown sequence. |
*/ |
tlb_invalidate_pages(as->asid, area->base, area->pages); |
/* |
* Invalidate potential software translation caches (e.g. TSB on |
* sparc64). |
*/ |
as_invalidate_translation_cache(as, area->base, area->pages); |
tlb_shootdown_finalize(); |
btree_destroy(&area->used_space); |
area->attributes |= AS_AREA_ATTR_PARTIAL; |
if (area->sh_info) |
sh_info_remove_reference(area->sh_info); |
mutex_unlock(&area->lock); |
/* |
* Remove the empty area from address space. |
*/ |
btree_remove(&as->as_area_btree, base, NULL); |
free(area); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return 0; |
} |
/** Share address space area with another or the same address space. |
* |
* Address space area mapping is shared with a new address space area. |
* If the source address space area has not been shared so far, |
* a new sh_info is created. The new address space area simply gets the |
* sh_info of the source area. The process of duplicating the |
* mapping is done through the backend share function. |
* |
* @param src_as Pointer to source address space. |
* @param src_base Base address of the source address space area. |
* @param acc_size Expected size of the source area. |
* @param dst_as Pointer to destination address space. |
* @param dst_base Target base address. |
* @param dst_flags_mask Destination address space area flags mask. |
* |
* @return Zero on success or ENOENT if there is no such task or if |
* there is no such address space area, EPERM if there was |
* a problem in accepting the area or ENOMEM if there was a |
* problem in allocating destination address space area. |
* ENOTSUP is returned if the address space area backend |
* does not support sharing. |
*/ |
int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size, |
as_t *dst_as, uintptr_t dst_base, int dst_flags_mask) |
{ |
ipl_t ipl; |
int src_flags; |
size_t src_size; |
as_area_t *src_area, *dst_area; |
share_info_t *sh_info; |
mem_backend_t *src_backend; |
mem_backend_data_t src_backend_data; |
ipl = interrupts_disable(); |
mutex_lock(&src_as->lock); |
src_area = find_area_and_lock(src_as, src_base); |
if (!src_area) { |
/* |
* Could not find the source address space area. |
*/ |
mutex_unlock(&src_as->lock); |
interrupts_restore(ipl); |
return ENOENT; |
} |
if (!src_area->backend || !src_area->backend->share) { |
/* |
* There is no backend or the backend does not |
* know how to share the area. |
*/ |
mutex_unlock(&src_area->lock); |
mutex_unlock(&src_as->lock); |
interrupts_restore(ipl); |
return ENOTSUP; |
} |
src_size = src_area->pages * PAGE_SIZE; |
src_flags = src_area->flags; |
src_backend = src_area->backend; |
src_backend_data = src_area->backend_data; |
/* Share the cacheable flag from the original mapping */ |
if (src_flags & AS_AREA_CACHEABLE) |
dst_flags_mask |= AS_AREA_CACHEABLE; |
if (src_size != acc_size || |
(src_flags & dst_flags_mask) != dst_flags_mask) { |
mutex_unlock(&src_area->lock); |
mutex_unlock(&src_as->lock); |
interrupts_restore(ipl); |
return EPERM; |
} |
/* |
* Now we are committed to sharing the area. |
* First, prepare the area for sharing. |
* Then it will be safe to unlock it. |
*/ |
sh_info = src_area->sh_info; |
if (!sh_info) { |
sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0); |
mutex_initialize(&sh_info->lock, MUTEX_PASSIVE); |
sh_info->refcount = 2; |
btree_create(&sh_info->pagemap); |
src_area->sh_info = sh_info; |
/* |
* Call the backend to setup sharing. |
*/ |
src_area->backend->share(src_area); |
} else { |
mutex_lock(&sh_info->lock); |
sh_info->refcount++; |
mutex_unlock(&sh_info->lock); |
} |
mutex_unlock(&src_area->lock); |
mutex_unlock(&src_as->lock); |
/* |
* Create copy of the source address space area. |
* The destination area is created with AS_AREA_ATTR_PARTIAL |
* attribute set which prevents race condition with |
* preliminary as_page_fault() calls. |
* The flags of the source area are masked against dst_flags_mask |
* to support sharing in less privileged mode. |
*/ |
dst_area = as_area_create(dst_as, dst_flags_mask, src_size, dst_base, |
AS_AREA_ATTR_PARTIAL, src_backend, &src_backend_data); |
if (!dst_area) { |
/* |
* Destination address space area could not be created. |
*/ |
sh_info_remove_reference(sh_info); |
interrupts_restore(ipl); |
return ENOMEM; |
} |
/* |
* Now the destination address space area has been |
* fully initialized. Clear the AS_AREA_ATTR_PARTIAL |
* attribute and set the sh_info. |
*/ |
mutex_lock(&dst_as->lock); |
mutex_lock(&dst_area->lock); |
dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL; |
dst_area->sh_info = sh_info; |
mutex_unlock(&dst_area->lock); |
mutex_unlock(&dst_as->lock); |
interrupts_restore(ipl); |
return 0; |
} |
/** Check access mode for address space area. |
* |
* The address space area must be locked prior to this call. |
* |
* @param area Address space area. |
* @param access Access mode. |
* |
* @return False if access violates area's permissions, true |
* otherwise. |
*/ |
bool as_area_check_access(as_area_t *area, pf_access_t access) |
{ |
int flagmap[] = { |
[PF_ACCESS_READ] = AS_AREA_READ, |
[PF_ACCESS_WRITE] = AS_AREA_WRITE, |
[PF_ACCESS_EXEC] = AS_AREA_EXEC |
}; |
if (!(area->flags & flagmap[access])) |
return false; |
return true; |
} |
/** Change adress space area flags. |
* |
* The idea is to have the same data, but with a different access mode. |
* This is needed e.g. for writing code into memory and then executing it. |
* In order for this to work properly, this may copy the data |
* into private anonymous memory (unless it's already there). |
* |
* @param as Address space. |
* @param flags Flags of the area memory. |
* @param address Address withing the area to be changed. |
* |
* @return Zero on success or a value from @ref errno.h on failure. |
*/ |
int as_area_change_flags(as_t *as, int flags, uintptr_t address) |
{ |
as_area_t *area; |
uintptr_t base; |
link_t *cur; |
ipl_t ipl; |
int page_flags; |
uintptr_t *old_frame; |
index_t frame_idx; |
count_t used_pages; |
/* Flags for the new memory mapping */ |
page_flags = area_flags_to_page_flags(flags); |
ipl = interrupts_disable(); |
mutex_lock(&as->lock); |
area = find_area_and_lock(as, address); |
if (!area) { |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return ENOENT; |
} |
if (area->sh_info || area->backend != &anon_backend) { |
/* Copying shared areas not supported yet */ |
/* Copying non-anonymous memory not supported yet */ |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return ENOTSUP; |
} |
base = area->base; |
/* |
* Compute total number of used pages in the used_space B+tree |
*/ |
used_pages = 0; |
for (cur = area->used_space.leaf_head.next; |
cur != &area->used_space.leaf_head; cur = cur->next) { |
btree_node_t *node; |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) { |
used_pages += (count_t) node->value[i]; |
} |
} |
/* An array for storing frame numbers */ |
old_frame = malloc(used_pages * sizeof(uintptr_t), 0); |
/* |
* Start TLB shootdown sequence. |
*/ |
tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base, area->pages); |
/* |
* Remove used pages from page tables and remember their frame |
* numbers. |
*/ |
frame_idx = 0; |
for (cur = area->used_space.leaf_head.next; |
cur != &area->used_space.leaf_head; cur = cur->next) { |
btree_node_t *node; |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) { |
uintptr_t b = node->key[i]; |
count_t j; |
pte_t *pte; |
for (j = 0; j < (count_t) node->value[i]; j++) { |
page_table_lock(as, false); |
pte = page_mapping_find(as, b + j * PAGE_SIZE); |
ASSERT(pte && PTE_VALID(pte) && |
PTE_PRESENT(pte)); |
old_frame[frame_idx++] = PTE_GET_FRAME(pte); |
/* Remove old mapping */ |
page_mapping_remove(as, b + j * PAGE_SIZE); |
page_table_unlock(as, false); |
} |
} |
} |
/* |
* Finish TLB shootdown sequence. |
*/ |
tlb_invalidate_pages(as->asid, area->base, area->pages); |
/* |
* Invalidate potential software translation caches (e.g. TSB on |
* sparc64). |
*/ |
as_invalidate_translation_cache(as, area->base, area->pages); |
tlb_shootdown_finalize(); |
/* |
* Set the new flags. |
*/ |
area->flags = flags; |
/* |
* Map pages back in with new flags. This step is kept separate |
* so that the memory area could not be accesed with both the old and |
* the new flags at once. |
*/ |
frame_idx = 0; |
for (cur = area->used_space.leaf_head.next; |
cur != &area->used_space.leaf_head; cur = cur->next) { |
btree_node_t *node; |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) { |
uintptr_t b = node->key[i]; |
count_t j; |
for (j = 0; j < (count_t) node->value[i]; j++) { |
page_table_lock(as, false); |
/* Insert the new mapping */ |
page_mapping_insert(as, b + j * PAGE_SIZE, |
old_frame[frame_idx++], page_flags); |
page_table_unlock(as, false); |
} |
} |
} |
free(old_frame); |
mutex_unlock(&area->lock); |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
return 0; |
} |
/** Handle page fault within the current address space. |
* |
* This is the high-level page fault handler. It decides whether the page fault |
* can be resolved by any backend and if so, it invokes the backend to resolve |
* the page fault. |
* |
* Interrupts are assumed disabled. |
* |
* @param page Faulting page. |
* @param access Access mode that caused the page fault (i.e. |
* read/write/exec). |
* @param istate Pointer to the interrupted state. |
* |
* @return AS_PF_FAULT on page fault, AS_PF_OK on success or |
* AS_PF_DEFER if the fault was caused by copy_to_uspace() |
* or copy_from_uspace(). |
*/ |
int as_page_fault(uintptr_t page, pf_access_t access, istate_t *istate) |
{ |
pte_t *pte; |
as_area_t *area; |
if (!THREAD) |
return AS_PF_FAULT; |
ASSERT(AS); |
mutex_lock(&AS->lock); |
area = find_area_and_lock(AS, page); |
if (!area) { |
/* |
* No area contained mapping for 'page'. |
* Signal page fault to low-level handler. |
*/ |
mutex_unlock(&AS->lock); |
goto page_fault; |
} |
if (area->attributes & AS_AREA_ATTR_PARTIAL) { |
/* |
* The address space area is not fully initialized. |
* Avoid possible race by returning error. |
*/ |
mutex_unlock(&area->lock); |
mutex_unlock(&AS->lock); |
goto page_fault; |
} |
if (!area->backend || !area->backend->page_fault) { |
/* |
* The address space area is not backed by any backend |
* or the backend cannot handle page faults. |
*/ |
mutex_unlock(&area->lock); |
mutex_unlock(&AS->lock); |
goto page_fault; |
} |
page_table_lock(AS, false); |
/* |
* To avoid race condition between two page faults on the same address, |
* we need to make sure the mapping has not been already inserted. |
*/ |
if ((pte = page_mapping_find(AS, page))) { |
if (PTE_PRESENT(pte)) { |
if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) || |
(access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) || |
(access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) { |
page_table_unlock(AS, false); |
mutex_unlock(&area->lock); |
mutex_unlock(&AS->lock); |
return AS_PF_OK; |
} |
} |
} |
/* |
* Resort to the backend page fault handler. |
*/ |
if (area->backend->page_fault(area, page, access) != AS_PF_OK) { |
page_table_unlock(AS, false); |
mutex_unlock(&area->lock); |
mutex_unlock(&AS->lock); |
goto page_fault; |
} |
page_table_unlock(AS, false); |
mutex_unlock(&area->lock); |
mutex_unlock(&AS->lock); |
return AS_PF_OK; |
page_fault: |
if (THREAD->in_copy_from_uspace) { |
THREAD->in_copy_from_uspace = false; |
istate_set_retaddr(istate, |
(uintptr_t) &memcpy_from_uspace_failover_address); |
} else if (THREAD->in_copy_to_uspace) { |
THREAD->in_copy_to_uspace = false; |
istate_set_retaddr(istate, |
(uintptr_t) &memcpy_to_uspace_failover_address); |
} else { |
return AS_PF_FAULT; |
} |
return AS_PF_DEFER; |
} |
/** Switch address spaces. |
* |
* Note that this function cannot sleep as it is essentially a part of |
* scheduling. Sleeping here would lead to deadlock on wakeup. Another |
* thing which is forbidden in this context is locking the address space. |
* |
* When this function is enetered, no spinlocks may be held. |
* |
* @param old Old address space or NULL. |
* @param new New address space. |
*/ |
void as_switch(as_t *old_as, as_t *new_as) |
{ |
DEADLOCK_PROBE_INIT(p_asidlock); |
preemption_disable(); |
retry: |
(void) interrupts_disable(); |
if (!spinlock_trylock(&asidlock)) { |
/* |
* Avoid deadlock with TLB shootdown. |
* We can enable interrupts here because |
* preemption is disabled. We should not be |
* holding any other lock. |
*/ |
(void) interrupts_enable(); |
DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD); |
goto retry; |
} |
preemption_enable(); |
/* |
* First, take care of the old address space. |
*/ |
if (old_as) { |
ASSERT(old_as->cpu_refcount); |
if((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) { |
/* |
* The old address space is no longer active on |
* any processor. It can be appended to the |
* list of inactive address spaces with assigned |
* ASID. |
*/ |
ASSERT(old_as->asid != ASID_INVALID); |
list_append(&old_as->inactive_as_with_asid_link, |
&inactive_as_with_asid_head); |
} |
/* |
* Perform architecture-specific tasks when the address space |
* is being removed from the CPU. |
*/ |
as_deinstall_arch(old_as); |
} |
/* |
* Second, prepare the new address space. |
*/ |
if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) { |
if (new_as->asid != ASID_INVALID) |
list_remove(&new_as->inactive_as_with_asid_link); |
else |
new_as->asid = asid_get(); |
} |
#ifdef AS_PAGE_TABLE |
SET_PTL0_ADDRESS(new_as->genarch.page_table); |
#endif |
/* |
* Perform architecture-specific steps. |
* (e.g. write ASID to hardware register etc.) |
*/ |
as_install_arch(new_as); |
spinlock_unlock(&asidlock); |
AS = new_as; |
} |
/** Convert address space area flags to page flags. |
* |
* @param aflags Flags of some address space area. |
* |
* @return Flags to be passed to page_mapping_insert(). |
*/ |
int area_flags_to_page_flags(int aflags) |
{ |
int flags; |
flags = PAGE_USER | PAGE_PRESENT; |
if (aflags & AS_AREA_READ) |
flags |= PAGE_READ; |
if (aflags & AS_AREA_WRITE) |
flags |= PAGE_WRITE; |
if (aflags & AS_AREA_EXEC) |
flags |= PAGE_EXEC; |
if (aflags & AS_AREA_CACHEABLE) |
flags |= PAGE_CACHEABLE; |
return flags; |
} |
/** Compute flags for virtual address translation subsytem. |
* |
* The address space area must be locked. |
* Interrupts must be disabled. |
* |
* @param a Address space area. |
* |
* @return Flags to be used in page_mapping_insert(). |
*/ |
int as_area_get_flags(as_area_t *a) |
{ |
return area_flags_to_page_flags(a->flags); |
} |
/** Create page table. |
* |
* Depending on architecture, create either address space private or global page |
* table. |
* |
* @param flags Flags saying whether the page table is for the kernel |
* address space. |
* |
* @return First entry of the page table. |
*/ |
pte_t *page_table_create(int flags) |
{ |
ASSERT(as_operations); |
ASSERT(as_operations->page_table_create); |
return as_operations->page_table_create(flags); |
} |
/** Destroy page table. |
* |
* Destroy page table in architecture specific way. |
* |
* @param page_table Physical address of PTL0. |
*/ |
void page_table_destroy(pte_t *page_table) |
{ |
ASSERT(as_operations); |
ASSERT(as_operations->page_table_destroy); |
as_operations->page_table_destroy(page_table); |
} |
/** Lock page table. |
* |
* This function should be called before any page_mapping_insert(), |
* page_mapping_remove() and page_mapping_find(). |
* |
* Locking order is such that address space areas must be locked |
* prior to this call. Address space can be locked prior to this |
* call in which case the lock argument is false. |
* |
* @param as Address space. |
* @param lock If false, do not attempt to lock as->lock. |
*/ |
void page_table_lock(as_t *as, bool lock) |
{ |
ASSERT(as_operations); |
ASSERT(as_operations->page_table_lock); |
as_operations->page_table_lock(as, lock); |
} |
/** Unlock page table. |
* |
* @param as Address space. |
* @param unlock If false, do not attempt to unlock as->lock. |
*/ |
void page_table_unlock(as_t *as, bool unlock) |
{ |
ASSERT(as_operations); |
ASSERT(as_operations->page_table_unlock); |
as_operations->page_table_unlock(as, unlock); |
} |
/** Find address space area and lock it. |
* |
* The address space must be locked and interrupts must be disabled. |
* |
* @param as Address space. |
* @param va Virtual address. |
* |
* @return Locked address space area containing va on success or |
* NULL on failure. |
*/ |
as_area_t *find_area_and_lock(as_t *as, uintptr_t va) |
{ |
as_area_t *a; |
btree_node_t *leaf, *lnode; |
unsigned int i; |
a = (as_area_t *) btree_search(&as->as_area_btree, va, &leaf); |
if (a) { |
/* va is the base address of an address space area */ |
mutex_lock(&a->lock); |
return a; |
} |
/* |
* Search the leaf node and the righmost record of its left neighbour |
* to find out whether this is a miss or va belongs to an address |
* space area found there. |
*/ |
/* First, search the leaf node itself. */ |
for (i = 0; i < leaf->keys; i++) { |
a = (as_area_t *) leaf->value[i]; |
mutex_lock(&a->lock); |
if ((a->base <= va) && (va < a->base + a->pages * PAGE_SIZE)) { |
return a; |
} |
mutex_unlock(&a->lock); |
} |
/* |
* Second, locate the left neighbour and test its last record. |
* Because of its position in the B+tree, it must have base < va. |
*/ |
lnode = btree_leaf_node_left_neighbour(&as->as_area_btree, leaf); |
if (lnode) { |
a = (as_area_t *) lnode->value[lnode->keys - 1]; |
mutex_lock(&a->lock); |
if (va < a->base + a->pages * PAGE_SIZE) { |
return a; |
} |
mutex_unlock(&a->lock); |
} |
return NULL; |
} |
/** Check area conflicts with other areas. |
* |
* The address space must be locked and interrupts must be disabled. |
* |
* @param as Address space. |
* @param va Starting virtual address of the area being tested. |
* @param size Size of the area being tested. |
* @param avoid_area Do not touch this area. |
* |
* @return True if there is no conflict, false otherwise. |
*/ |
bool |
check_area_conflicts(as_t *as, uintptr_t va, size_t size, as_area_t *avoid_area) |
{ |
as_area_t *a; |
btree_node_t *leaf, *node; |
unsigned int i; |
/* |
* We don't want any area to have conflicts with NULL page. |
*/ |
if (overlaps(va, size, NULL, PAGE_SIZE)) |
return false; |
/* |
* The leaf node is found in O(log n), where n is proportional to |
* the number of address space areas belonging to as. |
* The check for conflicts is then attempted on the rightmost |
* record in the left neighbour, the leftmost record in the right |
* neighbour and all records in the leaf node itself. |
*/ |
if ((a = (as_area_t *) btree_search(&as->as_area_btree, va, &leaf))) { |
if (a != avoid_area) |
return false; |
} |
/* First, check the two border cases. */ |
if ((node = btree_leaf_node_left_neighbour(&as->as_area_btree, leaf))) { |
a = (as_area_t *) node->value[node->keys - 1]; |
mutex_lock(&a->lock); |
if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) { |
mutex_unlock(&a->lock); |
return false; |
} |
mutex_unlock(&a->lock); |
} |
node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf); |
if (node) { |
a = (as_area_t *) node->value[0]; |
mutex_lock(&a->lock); |
if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) { |
mutex_unlock(&a->lock); |
return false; |
} |
mutex_unlock(&a->lock); |
} |
/* Second, check the leaf node. */ |
for (i = 0; i < leaf->keys; i++) { |
a = (as_area_t *) leaf->value[i]; |
if (a == avoid_area) |
continue; |
mutex_lock(&a->lock); |
if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) { |
mutex_unlock(&a->lock); |
return false; |
} |
mutex_unlock(&a->lock); |
} |
/* |
* So far, the area does not conflict with other areas. |
* Check if it doesn't conflict with kernel address space. |
*/ |
if (!KERNEL_ADDRESS_SPACE_SHADOWED) { |
return !overlaps(va, size, |
KERNEL_ADDRESS_SPACE_START, |
KERNEL_ADDRESS_SPACE_END - KERNEL_ADDRESS_SPACE_START); |
} |
return true; |
} |
/** Return size of the address space area with given base. |
* |
* @param base Arbitrary address insede the address space area. |
* |
* @return Size of the address space area in bytes or zero if it |
* does not exist. |
*/ |
size_t as_area_get_size(uintptr_t base) |
{ |
ipl_t ipl; |
as_area_t *src_area; |
size_t size; |
ipl = interrupts_disable(); |
src_area = find_area_and_lock(AS, base); |
if (src_area) { |
size = src_area->pages * PAGE_SIZE; |
mutex_unlock(&src_area->lock); |
} else { |
size = 0; |
} |
interrupts_restore(ipl); |
return size; |
} |
/** Mark portion of address space area as used. |
* |
* The address space area must be already locked. |
* |
* @param a Address space area. |
* @param page First page to be marked. |
* @param count Number of page to be marked. |
* |
* @return Zero on failure and non-zero on success. |
*/ |
int used_space_insert(as_area_t *a, uintptr_t page, count_t count) |
{ |
btree_node_t *leaf, *node; |
count_t pages; |
unsigned int i; |
ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE)); |
ASSERT(count); |
pages = (count_t) btree_search(&a->used_space, page, &leaf); |
if (pages) { |
/* |
* We hit the beginning of some used space. |
*/ |
return 0; |
} |
if (!leaf->keys) { |
btree_insert(&a->used_space, page, (void *) count, leaf); |
return 1; |
} |
node = btree_leaf_node_left_neighbour(&a->used_space, leaf); |
if (node) { |
uintptr_t left_pg = node->key[node->keys - 1]; |
uintptr_t right_pg = leaf->key[0]; |
count_t left_cnt = (count_t) node->value[node->keys - 1]; |
count_t right_cnt = (count_t) leaf->value[0]; |
/* |
* Examine the possibility that the interval fits |
* somewhere between the rightmost interval of |
* the left neigbour and the first interval of the leaf. |
*/ |
if (page >= right_pg) { |
/* Do nothing. */ |
} else if (overlaps(page, count * PAGE_SIZE, left_pg, |
left_cnt * PAGE_SIZE)) { |
/* The interval intersects with the left interval. */ |
return 0; |
} else if (overlaps(page, count * PAGE_SIZE, right_pg, |
right_cnt * PAGE_SIZE)) { |
/* The interval intersects with the right interval. */ |
return 0; |
} else if ((page == left_pg + left_cnt * PAGE_SIZE) && |
(page + count * PAGE_SIZE == right_pg)) { |
/* |
* The interval can be added by merging the two already |
* present intervals. |
*/ |
node->value[node->keys - 1] += count + right_cnt; |
btree_remove(&a->used_space, right_pg, leaf); |
return 1; |
} else if (page == left_pg + left_cnt * PAGE_SIZE) { |
/* |
* The interval can be added by simply growing the left |
* interval. |
*/ |
node->value[node->keys - 1] += count; |
return 1; |
} else if (page + count * PAGE_SIZE == right_pg) { |
/* |
* The interval can be addded by simply moving base of |
* the right interval down and increasing its size |
* accordingly. |
*/ |
leaf->value[0] += count; |
leaf->key[0] = page; |
return 1; |
} else { |
/* |
* The interval is between both neigbouring intervals, |
* but cannot be merged with any of them. |
*/ |
btree_insert(&a->used_space, page, (void *) count, |
leaf); |
return 1; |
} |
} else if (page < leaf->key[0]) { |
uintptr_t right_pg = leaf->key[0]; |
count_t right_cnt = (count_t) leaf->value[0]; |
/* |
* Investigate the border case in which the left neighbour does |
* not exist but the interval fits from the left. |
*/ |
if (overlaps(page, count * PAGE_SIZE, right_pg, |
right_cnt * PAGE_SIZE)) { |
/* The interval intersects with the right interval. */ |
return 0; |
} else if (page + count * PAGE_SIZE == right_pg) { |
/* |
* The interval can be added by moving the base of the |
* right interval down and increasing its size |
* accordingly. |
*/ |
leaf->key[0] = page; |
leaf->value[0] += count; |
return 1; |
} else { |
/* |
* The interval doesn't adjoin with the right interval. |
* It must be added individually. |
*/ |
btree_insert(&a->used_space, page, (void *) count, |
leaf); |
return 1; |
} |
} |
node = btree_leaf_node_right_neighbour(&a->used_space, leaf); |
if (node) { |
uintptr_t left_pg = leaf->key[leaf->keys - 1]; |
uintptr_t right_pg = node->key[0]; |
count_t left_cnt = (count_t) leaf->value[leaf->keys - 1]; |
count_t right_cnt = (count_t) node->value[0]; |
/* |
* Examine the possibility that the interval fits |
* somewhere between the leftmost interval of |
* the right neigbour and the last interval of the leaf. |
*/ |
if (page < left_pg) { |
/* Do nothing. */ |
} else if (overlaps(page, count * PAGE_SIZE, left_pg, |
left_cnt * PAGE_SIZE)) { |
/* The interval intersects with the left interval. */ |
return 0; |
} else if (overlaps(page, count * PAGE_SIZE, right_pg, |
right_cnt * PAGE_SIZE)) { |
/* The interval intersects with the right interval. */ |
return 0; |
} else if ((page == left_pg + left_cnt * PAGE_SIZE) && |
(page + count * PAGE_SIZE == right_pg)) { |
/* |
* The interval can be added by merging the two already |
* present intervals. |
* */ |
leaf->value[leaf->keys - 1] += count + right_cnt; |
btree_remove(&a->used_space, right_pg, node); |
return 1; |
} else if (page == left_pg + left_cnt * PAGE_SIZE) { |
/* |
* The interval can be added by simply growing the left |
* interval. |
* */ |
leaf->value[leaf->keys - 1] += count; |
return 1; |
} else if (page + count * PAGE_SIZE == right_pg) { |
/* |
* The interval can be addded by simply moving base of |
* the right interval down and increasing its size |
* accordingly. |
*/ |
node->value[0] += count; |
node->key[0] = page; |
return 1; |
} else { |
/* |
* The interval is between both neigbouring intervals, |
* but cannot be merged with any of them. |
*/ |
btree_insert(&a->used_space, page, (void *) count, |
leaf); |
return 1; |
} |
} else if (page >= leaf->key[leaf->keys - 1]) { |
uintptr_t left_pg = leaf->key[leaf->keys - 1]; |
count_t left_cnt = (count_t) leaf->value[leaf->keys - 1]; |
/* |
* Investigate the border case in which the right neighbour |
* does not exist but the interval fits from the right. |
*/ |
if (overlaps(page, count * PAGE_SIZE, left_pg, |
left_cnt * PAGE_SIZE)) { |
/* The interval intersects with the left interval. */ |
return 0; |
} else if (left_pg + left_cnt * PAGE_SIZE == page) { |
/* |
* The interval can be added by growing the left |
* interval. |
*/ |
leaf->value[leaf->keys - 1] += count; |
return 1; |
} else { |
/* |
* The interval doesn't adjoin with the left interval. |
* It must be added individually. |
*/ |
btree_insert(&a->used_space, page, (void *) count, |
leaf); |
return 1; |
} |
} |
/* |
* Note that if the algorithm made it thus far, the interval can fit |
* only between two other intervals of the leaf. The two border cases |
* were already resolved. |
*/ |
for (i = 1; i < leaf->keys; i++) { |
if (page < leaf->key[i]) { |
uintptr_t left_pg = leaf->key[i - 1]; |
uintptr_t right_pg = leaf->key[i]; |
count_t left_cnt = (count_t) leaf->value[i - 1]; |
count_t right_cnt = (count_t) leaf->value[i]; |
/* |
* The interval fits between left_pg and right_pg. |
*/ |
if (overlaps(page, count * PAGE_SIZE, left_pg, |
left_cnt * PAGE_SIZE)) { |
/* |
* The interval intersects with the left |
* interval. |
*/ |
return 0; |
} else if (overlaps(page, count * PAGE_SIZE, right_pg, |
right_cnt * PAGE_SIZE)) { |
/* |
* The interval intersects with the right |
* interval. |
*/ |
return 0; |
} else if ((page == left_pg + left_cnt * PAGE_SIZE) && |
(page + count * PAGE_SIZE == right_pg)) { |
/* |
* The interval can be added by merging the two |
* already present intervals. |
*/ |
leaf->value[i - 1] += count + right_cnt; |
btree_remove(&a->used_space, right_pg, leaf); |
return 1; |
} else if (page == left_pg + left_cnt * PAGE_SIZE) { |
/* |
* The interval can be added by simply growing |
* the left interval. |
*/ |
leaf->value[i - 1] += count; |
return 1; |
} else if (page + count * PAGE_SIZE == right_pg) { |
/* |
* The interval can be addded by simply moving |
* base of the right interval down and |
* increasing its size accordingly. |
*/ |
leaf->value[i] += count; |
leaf->key[i] = page; |
return 1; |
} else { |
/* |
* The interval is between both neigbouring |
* intervals, but cannot be merged with any of |
* them. |
*/ |
btree_insert(&a->used_space, page, |
(void *) count, leaf); |
return 1; |
} |
} |
} |
panic("Inconsistency detected while adding %" PRIc " pages of used " |
"space at %p.\n", count, page); |
} |
/** Mark portion of address space area as unused. |
* |
* The address space area must be already locked. |
* |
* @param a Address space area. |
* @param page First page to be marked. |
* @param count Number of page to be marked. |
* |
* @return Zero on failure and non-zero on success. |
*/ |
int used_space_remove(as_area_t *a, uintptr_t page, count_t count) |
{ |
btree_node_t *leaf, *node; |
count_t pages; |
unsigned int i; |
ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE)); |
ASSERT(count); |
pages = (count_t) btree_search(&a->used_space, page, &leaf); |
if (pages) { |
/* |
* We are lucky, page is the beginning of some interval. |
*/ |
if (count > pages) { |
return 0; |
} else if (count == pages) { |
btree_remove(&a->used_space, page, leaf); |
return 1; |
} else { |
/* |
* Find the respective interval. |
* Decrease its size and relocate its start address. |
*/ |
for (i = 0; i < leaf->keys; i++) { |
if (leaf->key[i] == page) { |
leaf->key[i] += count * PAGE_SIZE; |
leaf->value[i] -= count; |
return 1; |
} |
} |
goto error; |
} |
} |
node = btree_leaf_node_left_neighbour(&a->used_space, leaf); |
if (node && page < leaf->key[0]) { |
uintptr_t left_pg = node->key[node->keys - 1]; |
count_t left_cnt = (count_t) node->value[node->keys - 1]; |
if (overlaps(left_pg, left_cnt * PAGE_SIZE, page, |
count * PAGE_SIZE)) { |
if (page + count * PAGE_SIZE == |
left_pg + left_cnt * PAGE_SIZE) { |
/* |
* The interval is contained in the rightmost |
* interval of the left neighbour and can be |
* removed by updating the size of the bigger |
* interval. |
*/ |
node->value[node->keys - 1] -= count; |
return 1; |
} else if (page + count * PAGE_SIZE < |
left_pg + left_cnt*PAGE_SIZE) { |
count_t new_cnt; |
/* |
* The interval is contained in the rightmost |
* interval of the left neighbour but its |
* removal requires both updating the size of |
* the original interval and also inserting a |
* new interval. |
*/ |
new_cnt = ((left_pg + left_cnt * PAGE_SIZE) - |
(page + count*PAGE_SIZE)) >> PAGE_WIDTH; |
node->value[node->keys - 1] -= count + new_cnt; |
btree_insert(&a->used_space, page + |
count * PAGE_SIZE, (void *) new_cnt, leaf); |
return 1; |
} |
} |
return 0; |
} else if (page < leaf->key[0]) { |
return 0; |
} |
if (page > leaf->key[leaf->keys - 1]) { |
uintptr_t left_pg = leaf->key[leaf->keys - 1]; |
count_t left_cnt = (count_t) leaf->value[leaf->keys - 1]; |
if (overlaps(left_pg, left_cnt * PAGE_SIZE, page, |
count * PAGE_SIZE)) { |
if (page + count * PAGE_SIZE == |
left_pg + left_cnt * PAGE_SIZE) { |
/* |
* The interval is contained in the rightmost |
* interval of the leaf and can be removed by |
* updating the size of the bigger interval. |
*/ |
leaf->value[leaf->keys - 1] -= count; |
return 1; |
} else if (page + count * PAGE_SIZE < left_pg + |
left_cnt * PAGE_SIZE) { |
count_t new_cnt; |
/* |
* The interval is contained in the rightmost |
* interval of the leaf but its removal |
* requires both updating the size of the |
* original interval and also inserting a new |
* interval. |
*/ |
new_cnt = ((left_pg + left_cnt * PAGE_SIZE) - |
(page + count * PAGE_SIZE)) >> PAGE_WIDTH; |
leaf->value[leaf->keys - 1] -= count + new_cnt; |
btree_insert(&a->used_space, page + |
count * PAGE_SIZE, (void *) new_cnt, leaf); |
return 1; |
} |
} |
return 0; |
} |
/* |
* The border cases have been already resolved. |
* Now the interval can be only between intervals of the leaf. |
*/ |
for (i = 1; i < leaf->keys - 1; i++) { |
if (page < leaf->key[i]) { |
uintptr_t left_pg = leaf->key[i - 1]; |
count_t left_cnt = (count_t) leaf->value[i - 1]; |
/* |
* Now the interval is between intervals corresponding |
* to (i - 1) and i. |
*/ |
if (overlaps(left_pg, left_cnt * PAGE_SIZE, page, |
count * PAGE_SIZE)) { |
if (page + count * PAGE_SIZE == |
left_pg + left_cnt*PAGE_SIZE) { |
/* |
* The interval is contained in the |
* interval (i - 1) of the leaf and can |
* be removed by updating the size of |
* the bigger interval. |
*/ |
leaf->value[i - 1] -= count; |
return 1; |
} else if (page + count * PAGE_SIZE < |
left_pg + left_cnt * PAGE_SIZE) { |
count_t new_cnt; |
/* |
* The interval is contained in the |
* interval (i - 1) of the leaf but its |
* removal requires both updating the |
* size of the original interval and |
* also inserting a new interval. |
*/ |
new_cnt = ((left_pg + |
left_cnt * PAGE_SIZE) - |
(page + count * PAGE_SIZE)) >> |
PAGE_WIDTH; |
leaf->value[i - 1] -= count + new_cnt; |
btree_insert(&a->used_space, page + |
count * PAGE_SIZE, (void *) new_cnt, |
leaf); |
return 1; |
} |
} |
return 0; |
} |
} |
error: |
panic("Inconsistency detected while removing %" PRIc " pages of used " |
"space from %p.\n", count, page); |
} |
/** Remove reference to address space area share info. |
* |
* If the reference count drops to 0, the sh_info is deallocated. |
* |
* @param sh_info Pointer to address space area share info. |
*/ |
void sh_info_remove_reference(share_info_t *sh_info) |
{ |
bool dealloc = false; |
mutex_lock(&sh_info->lock); |
ASSERT(sh_info->refcount); |
if (--sh_info->refcount == 0) { |
dealloc = true; |
link_t *cur; |
/* |
* Now walk carefully the pagemap B+tree and free/remove |
* reference from all frames found there. |
*/ |
for (cur = sh_info->pagemap.leaf_head.next; |
cur != &sh_info->pagemap.leaf_head; cur = cur->next) { |
btree_node_t *node; |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) |
frame_free((uintptr_t) node->value[i]); |
} |
} |
mutex_unlock(&sh_info->lock); |
if (dealloc) { |
btree_destroy(&sh_info->pagemap); |
free(sh_info); |
} |
} |
/* |
* Address space related syscalls. |
*/ |
/** Wrapper for as_area_create(). */ |
unative_t sys_as_area_create(uintptr_t address, size_t size, int flags) |
{ |
if (as_area_create(AS, flags | AS_AREA_CACHEABLE, size, address, |
AS_AREA_ATTR_NONE, &anon_backend, NULL)) |
return (unative_t) address; |
else |
return (unative_t) -1; |
} |
/** Wrapper for as_area_resize(). */ |
unative_t sys_as_area_resize(uintptr_t address, size_t size, int flags) |
{ |
return (unative_t) as_area_resize(AS, address, size, 0); |
} |
/** Wrapper for as_area_change_flags(). */ |
unative_t sys_as_area_change_flags(uintptr_t address, int flags) |
{ |
return (unative_t) as_area_change_flags(AS, flags, address); |
} |
/** Wrapper for as_area_destroy(). */ |
unative_t sys_as_area_destroy(uintptr_t address) |
{ |
return (unative_t) as_area_destroy(AS, address); |
} |
/** Print out information about address space. |
* |
* @param as Address space. |
*/ |
void as_print(as_t *as) |
{ |
ipl_t ipl; |
ipl = interrupts_disable(); |
mutex_lock(&as->lock); |
/* print out info about address space areas */ |
link_t *cur; |
for (cur = as->as_area_btree.leaf_head.next; |
cur != &as->as_area_btree.leaf_head; cur = cur->next) { |
btree_node_t *node; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
unsigned int i; |
for (i = 0; i < node->keys; i++) { |
as_area_t *area = node->value[i]; |
mutex_lock(&area->lock); |
printf("as_area: %p, base=%p, pages=%" PRIc |
" (%p - %p)\n", area, area->base, area->pages, |
area->base, area->base + FRAMES2SIZE(area->pages)); |
mutex_unlock(&area->lock); |
} |
} |
mutex_unlock(&as->lock); |
interrupts_restore(ipl); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/backend_anon.c |
---|
0,0 → 1,224 |
/* |
* Copyright (c) 2006 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Backend for anonymous memory address space areas. |
* |
*/ |
#include <mm/as.h> |
#include <mm/page.h> |
#include <genarch/mm/page_pt.h> |
#include <genarch/mm/page_ht.h> |
#include <mm/frame.h> |
#include <mm/slab.h> |
#include <synch/mutex.h> |
#include <adt/list.h> |
#include <adt/btree.h> |
#include <errno.h> |
#include <arch/types.h> |
#include <align.h> |
#include <arch.h> |
#ifdef CONFIG_VIRT_IDX_DCACHE |
#include <arch/mm/cache.h> |
#endif |
static int anon_page_fault(as_area_t *area, uintptr_t addr, pf_access_t access); |
static void anon_frame_free(as_area_t *area, uintptr_t page, uintptr_t frame); |
static void anon_share(as_area_t *area); |
mem_backend_t anon_backend = { |
.page_fault = anon_page_fault, |
.frame_free = anon_frame_free, |
.share = anon_share |
}; |
/** Service a page fault in the anonymous memory address space area. |
* |
* The address space area and page tables must be already locked. |
* |
* @param area Pointer to the address space area. |
* @param addr Faulting virtual address. |
* @param access Access mode that caused the fault (i.e. read/write/exec). |
* |
* @return AS_PF_FAULT on failure (i.e. page fault) or AS_PF_OK on success (i.e. |
* serviced). |
*/ |
int anon_page_fault(as_area_t *area, uintptr_t addr, pf_access_t access) |
{ |
uintptr_t frame; |
if (!as_area_check_access(area, access)) |
return AS_PF_FAULT; |
if (area->sh_info) { |
btree_node_t *leaf; |
/* |
* The area is shared, chances are that the mapping can be found |
* in the pagemap of the address space area share info |
* structure. |
* In the case that the pagemap does not contain the respective |
* mapping, a new frame is allocated and the mapping is created. |
*/ |
mutex_lock(&area->sh_info->lock); |
frame = (uintptr_t) btree_search(&area->sh_info->pagemap, |
ALIGN_DOWN(addr, PAGE_SIZE) - area->base, &leaf); |
if (!frame) { |
bool allocate = true; |
unsigned int i; |
/* |
* Zero can be returned as a valid frame address. |
* Just a small workaround. |
*/ |
for (i = 0; i < leaf->keys; i++) { |
if (leaf->key[i] == |
ALIGN_DOWN(addr, PAGE_SIZE) - area->base) { |
allocate = false; |
break; |
} |
} |
if (allocate) { |
frame = (uintptr_t) frame_alloc(ONE_FRAME, 0); |
memsetb((void *) PA2KA(frame), FRAME_SIZE, 0); |
/* |
* Insert the address of the newly allocated |
* frame to the pagemap. |
*/ |
btree_insert(&area->sh_info->pagemap, |
ALIGN_DOWN(addr, PAGE_SIZE) - area->base, |
(void *) frame, leaf); |
} |
} |
frame_reference_add(ADDR2PFN(frame)); |
mutex_unlock(&area->sh_info->lock); |
} else { |
/* |
* In general, there can be several reasons that |
* can have caused this fault. |
* |
* - non-existent mapping: the area is an anonymous |
* area (e.g. heap or stack) and so far has not been |
* allocated a frame for the faulting page |
* |
* - non-present mapping: another possibility, |
* currently not implemented, would be frame |
* reuse; when this becomes a possibility, |
* do not forget to distinguish between |
* the different causes |
*/ |
frame = (uintptr_t) frame_alloc(ONE_FRAME, 0); |
memsetb((void *) PA2KA(frame), FRAME_SIZE, 0); |
} |
/* |
* Map 'page' to 'frame'. |
* Note that TLB shootdown is not attempted as only new information is |
* being inserted into page tables. |
*/ |
page_mapping_insert(AS, addr, frame, as_area_get_flags(area)); |
if (!used_space_insert(area, ALIGN_DOWN(addr, PAGE_SIZE), 1)) |
panic("Could not insert used space.\n"); |
return AS_PF_OK; |
} |
/** Free a frame that is backed by the anonymous memory backend. |
* |
* The address space area and page tables must be already locked. |
* |
* @param area Ignored. |
* @param page Virtual address of the page corresponding to the frame. |
* @param frame Frame to be released. |
*/ |
void anon_frame_free(as_area_t *area, uintptr_t page, uintptr_t frame) |
{ |
frame_free(frame); |
} |
/** Share the anonymous address space area. |
* |
* Sharing of anonymous area is done by duplicating its entire mapping |
* to the pagemap. Page faults will primarily search for frames there. |
* |
* The address space and address space area must be already locked. |
* |
* @param area Address space area to be shared. |
*/ |
void anon_share(as_area_t *area) |
{ |
link_t *cur; |
/* |
* Copy used portions of the area to sh_info's page map. |
*/ |
mutex_lock(&area->sh_info->lock); |
for (cur = area->used_space.leaf_head.next; |
cur != &area->used_space.leaf_head; cur = cur->next) { |
btree_node_t *node; |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) { |
uintptr_t base = node->key[i]; |
count_t count = (count_t) node->value[i]; |
unsigned int j; |
for (j = 0; j < count; j++) { |
pte_t *pte; |
page_table_lock(area->as, false); |
pte = page_mapping_find(area->as, |
base + j * PAGE_SIZE); |
ASSERT(pte && PTE_VALID(pte) && |
PTE_PRESENT(pte)); |
btree_insert(&area->sh_info->pagemap, |
(base + j * PAGE_SIZE) - area->base, |
(void *) PTE_GET_FRAME(pte), NULL); |
page_table_unlock(area->as, false); |
pfn_t pfn = ADDR2PFN(PTE_GET_FRAME(pte)); |
frame_reference_add(pfn); |
} |
} |
} |
mutex_unlock(&area->sh_info->lock); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/backend_elf.c |
---|
0,0 → 1,352 |
/* |
* Copyright (c) 2006 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Backend for address space areas backed by an ELF image. |
*/ |
#include <lib/elf.h> |
#include <debug.h> |
#include <arch/types.h> |
#include <mm/as.h> |
#include <mm/frame.h> |
#include <mm/slab.h> |
#include <mm/page.h> |
#include <genarch/mm/page_pt.h> |
#include <genarch/mm/page_ht.h> |
#include <align.h> |
#include <memstr.h> |
#include <macros.h> |
#include <arch.h> |
#include <arch/barrier.h> |
#ifdef CONFIG_VIRT_IDX_DCACHE |
#include <arch/mm/cache.h> |
#endif |
static int elf_page_fault(as_area_t *area, uintptr_t addr, pf_access_t access); |
static void elf_frame_free(as_area_t *area, uintptr_t page, uintptr_t frame); |
static void elf_share(as_area_t *area); |
mem_backend_t elf_backend = { |
.page_fault = elf_page_fault, |
.frame_free = elf_frame_free, |
.share = elf_share |
}; |
/** Service a page fault in the ELF backend address space area. |
* |
* The address space area and page tables must be already locked. |
* |
* @param area Pointer to the address space area. |
* @param addr Faulting virtual address. |
* @param access Access mode that caused the fault (i.e. |
* read/write/exec). |
* |
* @return AS_PF_FAULT on failure (i.e. page fault) or AS_PF_OK |
* on success (i.e. serviced). |
*/ |
int elf_page_fault(as_area_t *area, uintptr_t addr, pf_access_t access) |
{ |
elf_header_t *elf = area->backend_data.elf; |
elf_segment_header_t *entry = area->backend_data.segment; |
btree_node_t *leaf; |
uintptr_t base, frame, page, start_anon; |
index_t i; |
bool dirty = false; |
if (!as_area_check_access(area, access)) |
return AS_PF_FAULT; |
ASSERT((addr >= ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE)) && |
(addr < entry->p_vaddr + entry->p_memsz)); |
i = (addr - ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE)) >> PAGE_WIDTH; |
base = (uintptr_t) |
(((void *) elf) + ALIGN_DOWN(entry->p_offset, PAGE_SIZE)); |
/* Virtual address of faulting page*/ |
page = ALIGN_DOWN(addr, PAGE_SIZE); |
/* Virtual address of the end of initialized part of segment */ |
start_anon = entry->p_vaddr + entry->p_filesz; |
if (area->sh_info) { |
bool found = false; |
/* |
* The address space area is shared. |
*/ |
mutex_lock(&area->sh_info->lock); |
frame = (uintptr_t) btree_search(&area->sh_info->pagemap, |
page - area->base, &leaf); |
if (!frame) { |
unsigned int i; |
/* |
* Workaround for valid NULL address. |
*/ |
for (i = 0; i < leaf->keys; i++) { |
if (leaf->key[i] == page - area->base) { |
found = true; |
break; |
} |
} |
} |
if (frame || found) { |
frame_reference_add(ADDR2PFN(frame)); |
page_mapping_insert(AS, addr, frame, |
as_area_get_flags(area)); |
if (!used_space_insert(area, page, 1)) |
panic("Could not insert used space.\n"); |
mutex_unlock(&area->sh_info->lock); |
return AS_PF_OK; |
} |
} |
/* |
* The area is either not shared or the pagemap does not contain the |
* mapping. |
*/ |
if (page >= entry->p_vaddr && page + PAGE_SIZE <= start_anon) { |
/* |
* Initialized portion of the segment. The memory is backed |
* directly by the content of the ELF image. Pages are |
* only copied if the segment is writable so that there |
* can be more instantions of the same memory ELF image |
* used at a time. Note that this could be later done |
* as COW. |
*/ |
if (entry->p_flags & PF_W) { |
frame = (uintptr_t)frame_alloc(ONE_FRAME, 0); |
memcpy((void *) PA2KA(frame), |
(void *) (base + i * FRAME_SIZE), FRAME_SIZE); |
if (entry->p_flags & PF_X) { |
smc_coherence_block((void *) PA2KA(frame), |
FRAME_SIZE); |
} |
dirty = true; |
} else { |
frame = KA2PA(base + i * FRAME_SIZE); |
} |
} else if (page >= start_anon) { |
/* |
* This is the uninitialized portion of the segment. |
* It is not physically present in the ELF image. |
* To resolve the situation, a frame must be allocated |
* and cleared. |
*/ |
frame = (uintptr_t)frame_alloc(ONE_FRAME, 0); |
memsetb((void *) PA2KA(frame), FRAME_SIZE, 0); |
dirty = true; |
} else { |
size_t pad_lo, pad_hi; |
/* |
* The mixed case. |
* |
* The middle part is backed by the ELF image and |
* the lower and upper parts are anonymous memory. |
* (The segment can be and often is shorter than 1 page). |
*/ |
if (page < entry->p_vaddr) |
pad_lo = entry->p_vaddr - page; |
else |
pad_lo = 0; |
if (start_anon < page + PAGE_SIZE) |
pad_hi = page + PAGE_SIZE - start_anon; |
else |
pad_hi = 0; |
frame = (uintptr_t)frame_alloc(ONE_FRAME, 0); |
memcpy((void *) (PA2KA(frame) + pad_lo), |
(void *) (base + i * FRAME_SIZE + pad_lo), |
FRAME_SIZE - pad_lo - pad_hi); |
if (entry->p_flags & PF_X) { |
smc_coherence_block((void *) (PA2KA(frame) + pad_lo), |
FRAME_SIZE - pad_lo - pad_hi); |
} |
memsetb((void *) PA2KA(frame), pad_lo, 0); |
memsetb((void *) (PA2KA(frame) + FRAME_SIZE - pad_hi), pad_hi, |
0); |
dirty = true; |
} |
if (dirty && area->sh_info) { |
frame_reference_add(ADDR2PFN(frame)); |
btree_insert(&area->sh_info->pagemap, page - area->base, |
(void *) frame, leaf); |
} |
if (area->sh_info) |
mutex_unlock(&area->sh_info->lock); |
page_mapping_insert(AS, addr, frame, as_area_get_flags(area)); |
if (!used_space_insert(area, page, 1)) |
panic("Could not insert used space.\n"); |
return AS_PF_OK; |
} |
/** Free a frame that is backed by the ELF backend. |
* |
* The address space area and page tables must be already locked. |
* |
* @param area Pointer to the address space area. |
* @param page Page that is mapped to frame. Must be aligned to |
* PAGE_SIZE. |
* @param frame Frame to be released. |
* |
*/ |
void elf_frame_free(as_area_t *area, uintptr_t page, uintptr_t frame) |
{ |
elf_header_t *elf = area->backend_data.elf; |
elf_segment_header_t *entry = area->backend_data.segment; |
uintptr_t base, start_anon; |
index_t i; |
ASSERT((page >= ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE)) && |
(page < entry->p_vaddr + entry->p_memsz)); |
i = (page - ALIGN_DOWN(entry->p_vaddr, PAGE_SIZE)) >> PAGE_WIDTH; |
base = (uintptr_t) (((void *) elf) + |
ALIGN_DOWN(entry->p_offset, FRAME_SIZE)); |
start_anon = entry->p_vaddr + entry->p_filesz; |
if (page >= entry->p_vaddr && page + PAGE_SIZE <= start_anon) { |
if (entry->p_flags & PF_W) { |
/* |
* Free the frame with the copy of writable segment |
* data. |
*/ |
frame_free(frame); |
} |
} else { |
/* |
* The frame is either anonymous memory or the mixed case (i.e. |
* lower part is backed by the ELF image and the upper is |
* anonymous). In any case, a frame needs to be freed. |
*/ |
frame_free(frame); |
} |
} |
/** Share ELF image backed address space area. |
* |
* If the area is writable, then all mapped pages are duplicated in the pagemap. |
* Otherwise only portions of the area that are not backed by the ELF image |
* are put into the pagemap. |
* |
* The address space and address space area must be locked prior to the call. |
* |
* @param area Address space area. |
*/ |
void elf_share(as_area_t *area) |
{ |
elf_segment_header_t *entry = area->backend_data.segment; |
link_t *cur; |
btree_node_t *leaf, *node; |
uintptr_t start_anon = entry->p_vaddr + entry->p_filesz; |
/* |
* Find the node in which to start linear search. |
*/ |
if (area->flags & AS_AREA_WRITE) { |
node = list_get_instance(area->used_space.leaf_head.next, |
btree_node_t, leaf_link); |
} else { |
(void) btree_search(&area->sh_info->pagemap, start_anon, &leaf); |
node = btree_leaf_node_left_neighbour(&area->sh_info->pagemap, |
leaf); |
if (!node) |
node = leaf; |
} |
/* |
* Copy used anonymous portions of the area to sh_info's page map. |
*/ |
mutex_lock(&area->sh_info->lock); |
for (cur = &node->leaf_link; cur != &area->used_space.leaf_head; |
cur = cur->next) { |
unsigned int i; |
node = list_get_instance(cur, btree_node_t, leaf_link); |
for (i = 0; i < node->keys; i++) { |
uintptr_t base = node->key[i]; |
count_t count = (count_t) node->value[i]; |
unsigned int j; |
/* |
* Skip read-only areas of used space that are backed |
* by the ELF image. |
*/ |
if (!(area->flags & AS_AREA_WRITE)) |
if (base >= entry->p_vaddr && |
base + count * PAGE_SIZE <= start_anon) |
continue; |
for (j = 0; j < count; j++) { |
pte_t *pte; |
/* |
* Skip read-only pages that are backed by the |
* ELF image. |
*/ |
if (!(area->flags & AS_AREA_WRITE)) |
if (base >= entry->p_vaddr && |
base + (j + 1) * PAGE_SIZE <= |
start_anon) |
continue; |
page_table_lock(area->as, false); |
pte = page_mapping_find(area->as, |
base + j * PAGE_SIZE); |
ASSERT(pte && PTE_VALID(pte) && |
PTE_PRESENT(pte)); |
btree_insert(&area->sh_info->pagemap, |
(base + j * PAGE_SIZE) - area->base, |
(void *) PTE_GET_FRAME(pte), NULL); |
page_table_unlock(area->as, false); |
pfn_t pfn = ADDR2PFN(PTE_GET_FRAME(pte)); |
frame_reference_add(pfn); |
} |
} |
} |
mutex_unlock(&area->sh_info->lock); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/frame.c |
---|
0,0 → 1,1334 |
/* |
* Copyright (c) 2001-2005 Jakub Jermar |
* Copyright (c) 2005 Sergey Bondari |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Physical frame allocator. |
* |
* This file contains the physical frame allocator and memory zone management. |
* The frame allocator is built on top of the buddy allocator. |
* |
* @see buddy.c |
*/ |
/* |
* Locking order |
* |
* In order to access particular zone, the process must first lock |
* the zones.lock, then lock the zone and then unlock the zones.lock. |
* This insures, that we can fiddle with the zones in runtime without |
* affecting the processes. |
* |
*/ |
#include <arch/types.h> |
#include <mm/frame.h> |
#include <mm/as.h> |
#include <panic.h> |
#include <debug.h> |
#include <adt/list.h> |
#include <synch/spinlock.h> |
#include <synch/mutex.h> |
#include <synch/condvar.h> |
#include <arch/asm.h> |
#include <arch.h> |
#include <print.h> |
#include <align.h> |
#include <mm/slab.h> |
#include <bitops.h> |
#include <macros.h> |
#include <config.h> |
typedef struct { |
count_t refcount; /**< tracking of shared frames */ |
uint8_t buddy_order; /**< buddy system block order */ |
link_t buddy_link; /**< link to the next free block inside one |
order */ |
void *parent; /**< If allocated by slab, this points there */ |
} frame_t; |
typedef struct { |
SPINLOCK_DECLARE(lock); /**< this lock protects everything below */ |
pfn_t base; /**< frame_no of the first frame in the frames |
array */ |
count_t count; /**< Size of zone */ |
frame_t *frames; /**< array of frame_t structures in this |
zone */ |
count_t free_count; /**< number of free frame_t structures */ |
count_t busy_count; /**< number of busy frame_t structures */ |
buddy_system_t *buddy_system; /**< buddy system for the zone */ |
int flags; |
} zone_t; |
/* |
* The zoneinfo.lock must be locked when accessing zoneinfo structure. |
* Some of the attributes in zone_t structures are 'read-only' |
*/ |
typedef struct { |
SPINLOCK_DECLARE(lock); |
unsigned int count; |
zone_t *info[ZONES_MAX]; |
} zones_t; |
static zones_t zones; |
/* |
* Synchronization primitives used to sleep when there is no memory |
* available. |
*/ |
mutex_t mem_avail_mtx; |
condvar_t mem_avail_cv; |
unsigned long mem_avail_frames = 0; /**< Number of available frames. */ |
unsigned long mem_avail_gen = 0; /**< Generation counter. */ |
/********************/ |
/* Helper functions */ |
/********************/ |
static inline index_t frame_index(zone_t *zone, frame_t *frame) |
{ |
return (index_t) (frame - zone->frames); |
} |
static inline index_t frame_index_abs(zone_t *zone, frame_t *frame) |
{ |
return (index_t) (frame - zone->frames) + zone->base; |
} |
static inline int frame_index_valid(zone_t *zone, index_t index) |
{ |
return (index < zone->count); |
} |
/** Compute pfn_t from frame_t pointer & zone pointer */ |
static index_t make_frame_index(zone_t *zone, frame_t *frame) |
{ |
return (frame - zone->frames); |
} |
/** Initialize frame structure. |
* |
* @param frame Frame structure to be initialized. |
*/ |
static void frame_initialize(frame_t *frame) |
{ |
frame->refcount = 1; |
frame->buddy_order = 0; |
} |
/**********************/ |
/* Zoneinfo functions */ |
/**********************/ |
/** Insert-sort zone into zones list. |
* |
* @param newzone New zone to be inserted into zone list. |
* @return Zone number on success, -1 on error. |
*/ |
static int zones_add_zone(zone_t *newzone) |
{ |
unsigned int i, j; |
ipl_t ipl; |
zone_t *z; |
ipl = interrupts_disable(); |
spinlock_lock(&zones.lock); |
/* Try to merge */ |
if (zones.count + 1 == ZONES_MAX) { |
printf("Maximum zone count %u exceeded!\n", ZONES_MAX); |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
return -1; |
} |
for (i = 0; i < zones.count; i++) { |
/* Check for overflow */ |
z = zones.info[i]; |
if (overlaps(newzone->base, newzone->count, z->base, |
z->count)) { |
printf("Zones overlap!\n"); |
return -1; |
} |
if (newzone->base < z->base) |
break; |
} |
/* Move other zones up */ |
for (j = i; j < zones.count; j++) |
zones.info[j + 1] = zones.info[j]; |
zones.info[i] = newzone; |
zones.count++; |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
return i; |
} |
/** Try to find a zone where can we find the frame. |
* |
* Assume interrupts are disabled. |
* |
* @param frame Frame number contained in zone. |
* @param pzone If not null, it is used as zone hint. Zone index is |
* filled into the variable on success. |
* @return Pointer to locked zone containing frame. |
*/ |
static zone_t *find_zone_and_lock(pfn_t frame, unsigned int *pzone) |
{ |
unsigned int i; |
unsigned int hint = pzone ? *pzone : 0; |
zone_t *z; |
spinlock_lock(&zones.lock); |
if (hint >= zones.count) |
hint = 0; |
i = hint; |
do { |
z = zones.info[i]; |
spinlock_lock(&z->lock); |
if (z->base <= frame && z->base + z->count > frame) { |
/* Unlock the global lock */ |
spinlock_unlock(&zones.lock); |
if (pzone) |
*pzone = i; |
return z; |
} |
spinlock_unlock(&z->lock); |
i++; |
if (i >= zones.count) |
i = 0; |
} while (i != hint); |
spinlock_unlock(&zones.lock); |
return NULL; |
} |
/** @return True if zone can allocate specified order */ |
static int zone_can_alloc(zone_t *z, uint8_t order) |
{ |
return buddy_system_can_alloc(z->buddy_system, order); |
} |
/** Find and lock zone that can allocate order frames. |
* |
* Assume interrupts are disabled. |
* |
* @param order Size (2^order) of free space we are trying to find. |
* @param flags Required flags of the target zone. |
* @param pzone Pointer to preferred zone or NULL, on return contains |
* zone number. |
*/ |
static zone_t * |
find_free_zone_and_lock(uint8_t order, int flags, unsigned int *pzone) |
{ |
unsigned int i; |
zone_t *z; |
unsigned int hint = pzone ? *pzone : 0; |
/* Mask off flags that are not applicable. */ |
flags &= FRAME_LOW_4_GiB; |
spinlock_lock(&zones.lock); |
if (hint >= zones.count) |
hint = 0; |
i = hint; |
do { |
z = zones.info[i]; |
spinlock_lock(&z->lock); |
/* |
* Check whether the zone meets the search criteria. |
*/ |
if ((z->flags & flags) == flags) { |
/* |
* Check if the zone has 2^order frames area available. |
*/ |
if (zone_can_alloc(z, order)) { |
spinlock_unlock(&zones.lock); |
if (pzone) |
*pzone = i; |
return z; |
} |
} |
spinlock_unlock(&z->lock); |
if (++i >= zones.count) |
i = 0; |
} while (i != hint); |
spinlock_unlock(&zones.lock); |
return NULL; |
} |
/**************************/ |
/* Buddy system functions */ |
/**************************/ |
/** Buddy system find_block implementation. |
* |
* Find block that is parent of current list. |
* That means go to lower addresses, until such block is found |
* |
* @param order Order of parent must be different then this |
* parameter!! |
*/ |
static link_t *zone_buddy_find_block(buddy_system_t *b, link_t *child, |
uint8_t order) |
{ |
frame_t *frame; |
zone_t *zone; |
index_t index; |
frame = list_get_instance(child, frame_t, buddy_link); |
zone = (zone_t *) b->data; |
index = frame_index(zone, frame); |
do { |
if (zone->frames[index].buddy_order != order) { |
return &zone->frames[index].buddy_link; |
} |
} while(index-- > 0); |
return NULL; |
} |
/** Buddy system find_buddy implementation. |
* |
* @param b Buddy system. |
* @param block Block for which buddy should be found. |
* |
* @return Buddy for given block if found. |
*/ |
static link_t *zone_buddy_find_buddy(buddy_system_t *b, link_t *block) |
{ |
frame_t *frame; |
zone_t *zone; |
index_t index; |
bool is_left, is_right; |
frame = list_get_instance(block, frame_t, buddy_link); |
zone = (zone_t *) b->data; |
ASSERT(IS_BUDDY_ORDER_OK(frame_index_abs(zone, frame), |
frame->buddy_order)); |
is_left = IS_BUDDY_LEFT_BLOCK_ABS(zone, frame); |
is_right = IS_BUDDY_RIGHT_BLOCK_ABS(zone, frame); |
ASSERT(is_left ^ is_right); |
if (is_left) { |
index = (frame_index(zone, frame)) + |
(1 << frame->buddy_order); |
} else { /* if (is_right) */ |
index = (frame_index(zone, frame)) - |
(1 << frame->buddy_order); |
} |
if (frame_index_valid(zone, index)) { |
if (zone->frames[index].buddy_order == frame->buddy_order && |
zone->frames[index].refcount == 0) { |
return &zone->frames[index].buddy_link; |
} |
} |
return NULL; |
} |
/** Buddy system bisect implementation. |
* |
* @param b Buddy system. |
* @param block Block to bisect. |
* |
* @return Right block. |
*/ |
static link_t *zone_buddy_bisect(buddy_system_t *b, link_t *block) |
{ |
frame_t *frame_l, *frame_r; |
frame_l = list_get_instance(block, frame_t, buddy_link); |
frame_r = (frame_l + (1 << (frame_l->buddy_order - 1))); |
return &frame_r->buddy_link; |
} |
/** Buddy system coalesce implementation. |
* |
* @param b Buddy system. |
* @param block_1 First block. |
* @param block_2 First block's buddy. |
* |
* @return Coalesced block (actually block that represents lower |
* address). |
*/ |
static link_t *zone_buddy_coalesce(buddy_system_t *b, link_t *block_1, |
link_t *block_2) |
{ |
frame_t *frame1, *frame2; |
frame1 = list_get_instance(block_1, frame_t, buddy_link); |
frame2 = list_get_instance(block_2, frame_t, buddy_link); |
return frame1 < frame2 ? block_1 : block_2; |
} |
/** Buddy system set_order implementation. |
* |
* @param b Buddy system. |
* @param block Buddy system block. |
* @param order Order to set. |
*/ |
static void zone_buddy_set_order(buddy_system_t *b, link_t *block, |
uint8_t order) |
{ |
frame_t *frame; |
frame = list_get_instance(block, frame_t, buddy_link); |
frame->buddy_order = order; |
} |
/** Buddy system get_order implementation. |
* |
* @param b Buddy system. |
* @param block Buddy system block. |
* |
* @return Order of block. |
*/ |
static uint8_t zone_buddy_get_order(buddy_system_t *b, link_t *block) |
{ |
frame_t *frame; |
frame = list_get_instance(block, frame_t, buddy_link); |
return frame->buddy_order; |
} |
/** Buddy system mark_busy implementation. |
* |
* @param b Buddy system. |
* @param block Buddy system block. |
*/ |
static void zone_buddy_mark_busy(buddy_system_t *b, link_t * block) |
{ |
frame_t * frame; |
frame = list_get_instance(block, frame_t, buddy_link); |
frame->refcount = 1; |
} |
/** Buddy system mark_available implementation. |
* |
* @param b Buddy system. |
* @param block Buddy system block. |
*/ |
static void zone_buddy_mark_available(buddy_system_t *b, link_t *block) |
{ |
frame_t *frame; |
frame = list_get_instance(block, frame_t, buddy_link); |
frame->refcount = 0; |
} |
static buddy_system_operations_t zone_buddy_system_operations = { |
.find_buddy = zone_buddy_find_buddy, |
.bisect = zone_buddy_bisect, |
.coalesce = zone_buddy_coalesce, |
.set_order = zone_buddy_set_order, |
.get_order = zone_buddy_get_order, |
.mark_busy = zone_buddy_mark_busy, |
.mark_available = zone_buddy_mark_available, |
.find_block = zone_buddy_find_block |
}; |
/******************/ |
/* Zone functions */ |
/******************/ |
/** Allocate frame in particular zone. |
* |
* Assume zone is locked. |
* Panics if allocation is impossible. |
* |
* @param zone Zone to allocate from. |
* @param order Allocate exactly 2^order frames. |
* |
* @return Frame index in zone. |
* |
*/ |
static pfn_t zone_frame_alloc(zone_t *zone, uint8_t order) |
{ |
pfn_t v; |
link_t *tmp; |
frame_t *frame; |
/* Allocate frames from zone buddy system */ |
tmp = buddy_system_alloc(zone->buddy_system, order); |
ASSERT(tmp); |
/* Update zone information. */ |
zone->free_count -= (1 << order); |
zone->busy_count += (1 << order); |
/* Frame will be actually a first frame of the block. */ |
frame = list_get_instance(tmp, frame_t, buddy_link); |
/* get frame address */ |
v = make_frame_index(zone, frame); |
return v; |
} |
/** Free frame from zone. |
* |
* Assume zone is locked. |
* |
* @param zone Pointer to zone from which the frame is to be freed. |
* @param frame_idx Frame index relative to zone. |
*/ |
static void zone_frame_free(zone_t *zone, index_t frame_idx) |
{ |
frame_t *frame; |
uint8_t order; |
frame = &zone->frames[frame_idx]; |
/* remember frame order */ |
order = frame->buddy_order; |
ASSERT(frame->refcount); |
if (!--frame->refcount) { |
buddy_system_free(zone->buddy_system, &frame->buddy_link); |
/* Update zone information. */ |
zone->free_count += (1 << order); |
zone->busy_count -= (1 << order); |
} |
} |
/** Return frame from zone. */ |
static frame_t *zone_get_frame(zone_t *zone, index_t frame_idx) |
{ |
ASSERT(frame_idx < zone->count); |
return &zone->frames[frame_idx]; |
} |
/** Mark frame in zone unavailable to allocation. */ |
static void zone_mark_unavailable(zone_t *zone, index_t frame_idx) |
{ |
frame_t *frame; |
link_t *link; |
frame = zone_get_frame(zone, frame_idx); |
if (frame->refcount) |
return; |
link = buddy_system_alloc_block(zone->buddy_system, |
&frame->buddy_link); |
ASSERT(link); |
zone->free_count--; |
mutex_lock(&mem_avail_mtx); |
mem_avail_frames--; |
mutex_unlock(&mem_avail_mtx); |
} |
/** Join two zones. |
* |
* Expect zone_t *z to point to space at least zone_conf_size large. |
* |
* Assume z1 & z2 are locked. |
* |
* @param z Target zone structure pointer. |
* @param z1 Zone to merge. |
* @param z2 Zone to merge. |
*/ |
static void _zone_merge(zone_t *z, zone_t *z1, zone_t *z2) |
{ |
uint8_t max_order; |
unsigned int i; |
int z2idx; |
pfn_t frame_idx; |
frame_t *frame; |
ASSERT(!overlaps(z1->base, z1->count, z2->base, z2->count)); |
ASSERT(z1->base < z2->base); |
spinlock_initialize(&z->lock, "zone_lock"); |
z->base = z1->base; |
z->count = z2->base + z2->count - z1->base; |
z->flags = z1->flags & z2->flags; |
z->free_count = z1->free_count + z2->free_count; |
z->busy_count = z1->busy_count + z2->busy_count; |
max_order = fnzb(z->count); |
z->buddy_system = (buddy_system_t *) &z[1]; |
buddy_system_create(z->buddy_system, max_order, |
&zone_buddy_system_operations, (void *) z); |
z->frames = (frame_t *)((uint8_t *) z->buddy_system + |
buddy_conf_size(max_order)); |
for (i = 0; i < z->count; i++) { |
/* This marks all frames busy */ |
frame_initialize(&z->frames[i]); |
} |
/* Copy frames from both zones to preserve full frame orders, |
* parents etc. Set all free frames with refcount=0 to 1, because |
* we add all free frames to buddy allocator later again, clear |
* order to 0. Don't set busy frames with refcount=0, as they |
* will not be reallocated during merge and it would make later |
* problems with allocation/free. |
*/ |
for (i = 0; i < z1->count; i++) |
z->frames[i] = z1->frames[i]; |
for (i = 0; i < z2->count; i++) { |
z2idx = i + (z2->base - z1->base); |
z->frames[z2idx] = z2->frames[i]; |
} |
i = 0; |
while (i < z->count) { |
if (z->frames[i].refcount) { |
/* skip busy frames */ |
i += 1 << z->frames[i].buddy_order; |
} else { /* Free frames, set refcount=1 */ |
/* All free frames have refcount=0, we need not |
* to check the order */ |
z->frames[i].refcount = 1; |
z->frames[i].buddy_order = 0; |
i++; |
} |
} |
/* Add free blocks from the 2 original zones */ |
while (zone_can_alloc(z1, 0)) { |
frame_idx = zone_frame_alloc(z1, 0); |
frame = &z->frames[frame_idx]; |
frame->refcount = 0; |
buddy_system_free(z->buddy_system, &frame->buddy_link); |
} |
while (zone_can_alloc(z2, 0)) { |
frame_idx = zone_frame_alloc(z2, 0); |
frame = &z->frames[frame_idx + (z2->base - z1->base)]; |
frame->refcount = 0; |
buddy_system_free(z->buddy_system, &frame->buddy_link); |
} |
} |
/** Return old configuration frames into the zone. |
* |
* We have several cases |
* - the conf. data is outside of zone -> exit, shall we call frame_free?? |
* - the conf. data was created by zone_create or |
* updated with reduce_region -> free every frame |
* |
* @param newzone The actual zone where freeing should occur. |
* @param oldzone Pointer to old zone configuration data that should |
* be freed from new zone. |
*/ |
static void return_config_frames(zone_t *newzone, zone_t *oldzone) |
{ |
pfn_t pfn; |
frame_t *frame; |
count_t cframes; |
unsigned int i; |
pfn = ADDR2PFN((uintptr_t)KA2PA(oldzone)); |
cframes = SIZE2FRAMES(zone_conf_size(oldzone->count)); |
if (pfn < newzone->base || pfn >= newzone->base + newzone->count) |
return; |
frame = &newzone->frames[pfn - newzone->base]; |
ASSERT(!frame->buddy_order); |
for (i = 0; i < cframes; i++) { |
newzone->busy_count++; |
zone_frame_free(newzone, pfn+i-newzone->base); |
} |
} |
/** Reduce allocated block to count of order 0 frames. |
* |
* The allocated block need 2^order frames of space. Reduce all frames |
* in block to order 0 and free the unneeded frames. This means, that |
* when freeing the previously allocated block starting with frame_idx, |
* you have to free every frame. |
* |
* @param zone |
* @param frame_idx Index to block. |
* @param count Allocated space in block. |
*/ |
static void zone_reduce_region(zone_t *zone, pfn_t frame_idx, count_t count) |
{ |
count_t i; |
uint8_t order; |
frame_t *frame; |
ASSERT(frame_idx + count < zone->count); |
order = zone->frames[frame_idx].buddy_order; |
ASSERT((count_t) (1 << order) >= count); |
/* Reduce all blocks to order 0 */ |
for (i = 0; i < (count_t) (1 << order); i++) { |
frame = &zone->frames[i + frame_idx]; |
frame->buddy_order = 0; |
if (!frame->refcount) |
frame->refcount = 1; |
ASSERT(frame->refcount == 1); |
} |
/* Free unneeded frames */ |
for (i = count; i < (count_t) (1 << order); i++) { |
zone_frame_free(zone, i + frame_idx); |
} |
} |
/** Merge zones z1 and z2. |
* |
* - the zones must be 2 zones with no zone existing in between, |
* which means that z2 = z1+1 |
* |
* - When you create a new zone, the frame allocator configuration does |
* not to be 2^order size. Once the allocator is running it is no longer |
* possible, merged configuration data occupies more space :-/ |
*/ |
void zone_merge(unsigned int z1, unsigned int z2) |
{ |
ipl_t ipl; |
zone_t *zone1, *zone2, *newzone; |
unsigned int cframes; |
uint8_t order; |
unsigned int i; |
pfn_t pfn; |
ipl = interrupts_disable(); |
spinlock_lock(&zones.lock); |
if ((z1 >= zones.count) || (z2 >= zones.count)) |
goto errout; |
/* We can join only 2 zones with none existing inbetween */ |
if (z2 - z1 != 1) |
goto errout; |
zone1 = zones.info[z1]; |
zone2 = zones.info[z2]; |
spinlock_lock(&zone1->lock); |
spinlock_lock(&zone2->lock); |
cframes = SIZE2FRAMES(zone_conf_size(zone2->base + zone2->count - |
zone1->base)); |
if (cframes == 1) |
order = 0; |
else |
order = fnzb(cframes - 1) + 1; |
/* Allocate zonedata inside one of the zones */ |
if (zone_can_alloc(zone1, order)) |
pfn = zone1->base + zone_frame_alloc(zone1, order); |
else if (zone_can_alloc(zone2, order)) |
pfn = zone2->base + zone_frame_alloc(zone2, order); |
else |
goto errout2; |
newzone = (zone_t *) PA2KA(PFN2ADDR(pfn)); |
_zone_merge(newzone, zone1, zone2); |
/* Free unneeded config frames */ |
zone_reduce_region(newzone, pfn - newzone->base, cframes); |
/* Subtract zone information from busy frames */ |
newzone->busy_count -= cframes; |
/* Replace existing zones in zoneinfo list */ |
zones.info[z1] = newzone; |
for (i = z2 + 1; i < zones.count; i++) |
zones.info[i - 1] = zones.info[i]; |
zones.count--; |
/* Free old zone information */ |
return_config_frames(newzone, zone1); |
return_config_frames(newzone, zone2); |
errout2: |
/* Nobody is allowed to enter to zone, so we are safe |
* to touch the spinlocks last time */ |
spinlock_unlock(&zone1->lock); |
spinlock_unlock(&zone2->lock); |
errout: |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
} |
/** Merge all zones into one big zone. |
* |
* It is reasonable to do this on systems whose bios reports parts in chunks, |
* so that we could have 1 zone (it's faster). |
*/ |
void zone_merge_all(void) |
{ |
int count = zones.count; |
while (zones.count > 1 && --count) { |
zone_merge(0, 1); |
break; |
} |
} |
/** Create new frame zone. |
* |
* @param start Physical address of the first frame within the zone. |
* @param count Count of frames in zone. |
* @param z Address of configuration information of zone. |
* @param flags Zone flags. |
* |
* @return Initialized zone. |
*/ |
static void zone_construct(pfn_t start, count_t count, zone_t *z, int flags) |
{ |
unsigned int i; |
uint8_t max_order; |
spinlock_initialize(&z->lock, "zone_lock"); |
z->base = start; |
z->count = count; |
/* Mask off flags that are calculated automatically. */ |
flags &= ~FRAME_LOW_4_GiB; |
/* Determine calculated flags. */ |
if (z->base + count < (1ULL << (32 - FRAME_WIDTH))) /* 4 GiB */ |
flags |= FRAME_LOW_4_GiB; |
z->flags = flags; |
z->free_count = count; |
z->busy_count = 0; |
/* |
* Compute order for buddy system, initialize |
*/ |
max_order = fnzb(count); |
z->buddy_system = (buddy_system_t *)&z[1]; |
buddy_system_create(z->buddy_system, max_order, |
&zone_buddy_system_operations, (void *) z); |
/* Allocate frames _after_ the conframe */ |
/* Check sizes */ |
z->frames = (frame_t *)((uint8_t *) z->buddy_system + |
buddy_conf_size(max_order)); |
for (i = 0; i < count; i++) { |
frame_initialize(&z->frames[i]); |
} |
/* Stuffing frames */ |
for (i = 0; i < count; i++) { |
z->frames[i].refcount = 0; |
buddy_system_free(z->buddy_system, &z->frames[i].buddy_link); |
} |
} |
/** Compute configuration data size for zone. |
* |
* @param count Size of zone in frames. |
* @return Size of zone configuration info (in bytes). |
*/ |
uintptr_t zone_conf_size(count_t count) |
{ |
int size = sizeof(zone_t) + count * sizeof(frame_t); |
int max_order; |
max_order = fnzb(count); |
size += buddy_conf_size(max_order); |
return size; |
} |
/** Create and add zone to system. |
* |
* @param start First frame number (absolute). |
* @param count Size of zone in frames. |
* @param confframe Where configuration frames are supposed to be. |
* Automatically checks, that we will not disturb the |
* kernel and possibly init. If confframe is given |
* _outside_ this zone, it is expected, that the area is |
* already marked BUSY and big enough to contain |
* zone_conf_size() amount of data. If the confframe is |
* inside the area, the zone free frame information is |
* modified not to include it. |
* |
* @return Zone number or -1 on error. |
*/ |
int zone_create(pfn_t start, count_t count, pfn_t confframe, int flags) |
{ |
zone_t *z; |
uintptr_t addr; |
count_t confcount; |
unsigned int i; |
int znum; |
/* Theoretically we could have here 0, practically make sure |
* nobody tries to do that. If some platform requires, remove |
* the assert |
*/ |
ASSERT(confframe); |
/* If conframe is supposed to be inside our zone, then make sure |
* it does not span kernel & init |
*/ |
confcount = SIZE2FRAMES(zone_conf_size(count)); |
if (confframe >= start && confframe < start + count) { |
for (; confframe < start + count; confframe++) { |
addr = PFN2ADDR(confframe); |
if (overlaps(addr, PFN2ADDR(confcount), |
KA2PA(config.base), config.kernel_size)) |
continue; |
if (overlaps(addr, PFN2ADDR(confcount), |
KA2PA(config.stack_base), config.stack_size)) |
continue; |
bool overlap = false; |
count_t i; |
for (i = 0; i < init.cnt; i++) |
if (overlaps(addr, PFN2ADDR(confcount), |
KA2PA(init.tasks[i].addr), |
init.tasks[i].size)) { |
overlap = true; |
break; |
} |
if (overlap) |
continue; |
break; |
} |
if (confframe >= start + count) |
panic("Cannot find configuration data for zone."); |
} |
z = (zone_t *) PA2KA(PFN2ADDR(confframe)); |
zone_construct(start, count, z, flags); |
znum = zones_add_zone(z); |
if (znum == -1) |
return -1; |
mutex_lock(&mem_avail_mtx); |
mem_avail_frames += count; |
mutex_unlock(&mem_avail_mtx); |
/* If confdata in zone, mark as unavailable */ |
if (confframe >= start && confframe < start + count) |
for (i = confframe; i < confframe + confcount; i++) { |
zone_mark_unavailable(z, i - z->base); |
} |
return znum; |
} |
/***************************************/ |
/* Frame functions */ |
/** Set parent of frame. */ |
void frame_set_parent(pfn_t pfn, void *data, unsigned int hint) |
{ |
zone_t *zone = find_zone_and_lock(pfn, &hint); |
ASSERT(zone); |
zone_get_frame(zone, pfn - zone->base)->parent = data; |
spinlock_unlock(&zone->lock); |
} |
void *frame_get_parent(pfn_t pfn, unsigned int hint) |
{ |
zone_t *zone = find_zone_and_lock(pfn, &hint); |
void *res; |
ASSERT(zone); |
res = zone_get_frame(zone, pfn - zone->base)->parent; |
spinlock_unlock(&zone->lock); |
return res; |
} |
/** Allocate power-of-two frames of physical memory. |
* |
* @param order Allocate exactly 2^order frames. |
* @param flags Flags for host zone selection and address processing. |
* @param pzone Preferred zone. |
* |
* @return Physical address of the allocated frame. |
* |
*/ |
void *frame_alloc_generic(uint8_t order, int flags, unsigned int *pzone) |
{ |
ipl_t ipl; |
int freed; |
pfn_t v; |
zone_t *zone; |
unsigned long gen = 0; |
loop: |
ipl = interrupts_disable(); |
/* |
* First, find suitable frame zone. |
*/ |
zone = find_free_zone_and_lock(order, flags, pzone); |
/* If no memory, reclaim some slab memory, |
if it does not help, reclaim all */ |
if (!zone && !(flags & FRAME_NO_RECLAIM)) { |
freed = slab_reclaim(0); |
if (freed) |
zone = find_free_zone_and_lock(order, flags, pzone); |
if (!zone) { |
freed = slab_reclaim(SLAB_RECLAIM_ALL); |
if (freed) |
zone = find_free_zone_and_lock(order, flags, |
pzone); |
} |
} |
if (!zone) { |
/* |
* Sleep until some frames are available again. |
*/ |
if (flags & FRAME_ATOMIC) { |
interrupts_restore(ipl); |
return 0; |
} |
#ifdef CONFIG_DEBUG |
unsigned long avail; |
mutex_lock(&mem_avail_mtx); |
avail = mem_avail_frames; |
mutex_unlock(&mem_avail_mtx); |
printf("Thread %" PRIu64 " waiting for %u frames, " |
"%u available.\n", THREAD->tid, 1ULL << order, avail); |
#endif |
mutex_lock(&mem_avail_mtx); |
while ((mem_avail_frames < (1ULL << order)) || |
gen == mem_avail_gen) |
condvar_wait(&mem_avail_cv, &mem_avail_mtx); |
gen = mem_avail_gen; |
mutex_unlock(&mem_avail_mtx); |
#ifdef CONFIG_DEBUG |
mutex_lock(&mem_avail_mtx); |
avail = mem_avail_frames; |
mutex_unlock(&mem_avail_mtx); |
printf("Thread %" PRIu64 " woken up, %u frames available.\n", |
THREAD->tid, avail); |
#endif |
interrupts_restore(ipl); |
goto loop; |
} |
v = zone_frame_alloc(zone, order); |
v += zone->base; |
spinlock_unlock(&zone->lock); |
mutex_lock(&mem_avail_mtx); |
mem_avail_frames -= (1ULL << order); |
mutex_unlock(&mem_avail_mtx); |
interrupts_restore(ipl); |
if (flags & FRAME_KA) |
return (void *)PA2KA(PFN2ADDR(v)); |
return (void *)PFN2ADDR(v); |
} |
/** Free a frame. |
* |
* Find respective frame structure for supplied physical frame address. |
* Decrement frame reference count. |
* If it drops to zero, move the frame structure to free list. |
* |
* @param frame Physical Address of of the frame to be freed. |
*/ |
void frame_free(uintptr_t frame) |
{ |
ipl_t ipl; |
zone_t *zone; |
pfn_t pfn = ADDR2PFN(frame); |
ipl = interrupts_disable(); |
/* |
* First, find host frame zone for addr. |
*/ |
zone = find_zone_and_lock(pfn, NULL); |
ASSERT(zone); |
zone_frame_free(zone, pfn - zone->base); |
spinlock_unlock(&zone->lock); |
/* |
* Signal that some memory has been freed. |
*/ |
mutex_lock(&mem_avail_mtx); |
mem_avail_frames++; |
mem_avail_gen++; |
condvar_broadcast(&mem_avail_cv); |
mutex_unlock(&mem_avail_mtx); |
interrupts_restore(ipl); |
} |
/** Add reference to frame. |
* |
* Find respective frame structure for supplied PFN and |
* increment frame reference count. |
* |
* @param pfn Frame number of the frame to be freed. |
*/ |
void frame_reference_add(pfn_t pfn) |
{ |
ipl_t ipl; |
zone_t *zone; |
frame_t *frame; |
ipl = interrupts_disable(); |
/* |
* First, find host frame zone for addr. |
*/ |
zone = find_zone_and_lock(pfn, NULL); |
ASSERT(zone); |
frame = &zone->frames[pfn - zone->base]; |
frame->refcount++; |
spinlock_unlock(&zone->lock); |
interrupts_restore(ipl); |
} |
/** Mark given range unavailable in frame zones. */ |
void frame_mark_unavailable(pfn_t start, count_t count) |
{ |
unsigned int i; |
zone_t *zone; |
unsigned int prefzone = 0; |
for (i = 0; i < count; i++) { |
zone = find_zone_and_lock(start + i, &prefzone); |
if (!zone) /* PFN not found */ |
continue; |
zone_mark_unavailable(zone, start + i - zone->base); |
spinlock_unlock(&zone->lock); |
} |
} |
/** Initialize physical memory management. */ |
void frame_init(void) |
{ |
if (config.cpu_active == 1) { |
zones.count = 0; |
spinlock_initialize(&zones.lock, "zones.lock"); |
mutex_initialize(&mem_avail_mtx, MUTEX_ACTIVE); |
condvar_initialize(&mem_avail_cv); |
} |
/* Tell the architecture to create some memory */ |
frame_arch_init(); |
if (config.cpu_active == 1) { |
frame_mark_unavailable(ADDR2PFN(KA2PA(config.base)), |
SIZE2FRAMES(config.kernel_size)); |
frame_mark_unavailable(ADDR2PFN(KA2PA(config.stack_base)), |
SIZE2FRAMES(config.stack_size)); |
count_t i; |
for (i = 0; i < init.cnt; i++) { |
pfn_t pfn = ADDR2PFN(KA2PA(init.tasks[i].addr)); |
frame_mark_unavailable(pfn, |
SIZE2FRAMES(init.tasks[i].size)); |
} |
if (ballocs.size) |
frame_mark_unavailable(ADDR2PFN(KA2PA(ballocs.base)), |
SIZE2FRAMES(ballocs.size)); |
/* Black list first frame, as allocating NULL would |
* fail in some places */ |
frame_mark_unavailable(0, 1); |
} |
} |
/** Return total size of all zones. */ |
uint64_t zone_total_size(void) |
{ |
zone_t *zone = NULL; |
unsigned int i; |
ipl_t ipl; |
uint64_t total = 0; |
ipl = interrupts_disable(); |
spinlock_lock(&zones.lock); |
for (i = 0; i < zones.count; i++) { |
zone = zones.info[i]; |
spinlock_lock(&zone->lock); |
total += (uint64_t) FRAMES2SIZE(zone->count); |
spinlock_unlock(&zone->lock); |
} |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
return total; |
} |
/** Prints list of zones. */ |
void zone_print_list(void) |
{ |
zone_t *zone = NULL; |
unsigned int i; |
ipl_t ipl; |
#ifdef __32_BITS__ |
printf("# base address free frames busy frames\n"); |
printf("-- ------------ ------------ ------------\n"); |
#endif |
#ifdef __64_BITS__ |
printf("# base address free frames busy frames\n"); |
printf("-- -------------------- ------------ ------------\n"); |
#endif |
/* |
* Because printing may require allocation of memory, we may not hold |
* the frame allocator locks when printing zone statistics. Therefore, |
* we simply gather the statistics under the protection of the locks and |
* print the statistics when the locks have been released. |
* |
* When someone adds/removes zones while we are printing the statistics, |
* we may end up with inaccurate output (e.g. a zone being skipped from |
* the listing). |
*/ |
for (i = 0; ; i++) { |
uintptr_t base; |
count_t free_count; |
count_t busy_count; |
ipl = interrupts_disable(); |
spinlock_lock(&zones.lock); |
if (i >= zones.count) { |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
break; |
} |
zone = zones.info[i]; |
spinlock_lock(&zone->lock); |
base = PFN2ADDR(zone->base); |
free_count = zone->free_count; |
busy_count = zone->busy_count; |
spinlock_unlock(&zone->lock); |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
#ifdef __32_BITS__ |
printf("%-2u %10p %12" PRIc " %12" PRIc "\n", i, base, |
free_count, busy_count); |
#endif |
#ifdef __64_BITS__ |
printf("%-2u %18p %12" PRIc " %12" PRIc "\n", i, base, |
free_count, busy_count); |
#endif |
} |
} |
/** Prints zone details. |
* |
* @param num Zone base address or zone number. |
*/ |
void zone_print_one(unsigned int num) |
{ |
zone_t *zone = NULL; |
ipl_t ipl; |
unsigned int i; |
uintptr_t base; |
count_t count; |
count_t busy_count; |
count_t free_count; |
ipl = interrupts_disable(); |
spinlock_lock(&zones.lock); |
for (i = 0; i < zones.count; i++) { |
if ((i == num) || (PFN2ADDR(zones.info[i]->base) == num)) { |
zone = zones.info[i]; |
break; |
} |
} |
if (!zone) { |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
printf("Zone not found.\n"); |
return; |
} |
spinlock_lock(&zone->lock); |
base = PFN2ADDR(zone->base); |
count = zone->count; |
busy_count = zone->busy_count; |
free_count = zone->free_count; |
spinlock_unlock(&zone->lock); |
spinlock_unlock(&zones.lock); |
interrupts_restore(ipl); |
printf("Zone base address: %p\n", base); |
printf("Zone size: %" PRIc " frames (%" PRIs " KiB)\n", count, |
SIZE2KB(FRAMES2SIZE(count))); |
printf("Allocated space: %" PRIc " frames (%" PRIs " KiB)\n", |
busy_count, SIZE2KB(FRAMES2SIZE(busy_count))); |
printf("Available space: %" PRIc " frames (%" PRIs " KiB)\n", |
free_count, SIZE2KB(FRAMES2SIZE(free_count))); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/buddy.c |
---|
0,0 → 1,284 |
/* |
* Copyright (c) 2005 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Buddy allocator framework. |
* |
* This file contains buddy system allocator framework. |
* Specialized functions are needed for this abstract framework to be useful. |
*/ |
#include <mm/buddy.h> |
#include <mm/frame.h> |
#include <arch/types.h> |
#include <debug.h> |
#include <print.h> |
#include <macros.h> |
/** Return size needed for the buddy configuration data. */ |
size_t buddy_conf_size(int max_order) |
{ |
return sizeof(buddy_system_t) + (max_order + 1) * sizeof(link_t); |
} |
/** Create buddy system. |
* |
* Allocate memory for and initialize new buddy system. |
* |
* @param b Preallocated buddy system control data. |
* @param max_order The biggest allocable size will be 2^max_order. |
* @param op Operations for new buddy system. |
* @param data Pointer to be used by implementation. |
* |
* @return New buddy system. |
*/ |
void |
buddy_system_create(buddy_system_t *b, uint8_t max_order, |
buddy_system_operations_t *op, void *data) |
{ |
int i; |
ASSERT(max_order < BUDDY_SYSTEM_INNER_BLOCK); |
ASSERT(op->find_buddy); |
ASSERT(op->set_order); |
ASSERT(op->get_order); |
ASSERT(op->bisect); |
ASSERT(op->coalesce); |
ASSERT(op->mark_busy); |
/* |
* Use memory after our own structure. |
*/ |
b->order = (link_t *) (&b[1]); |
for (i = 0; i <= max_order; i++) |
list_initialize(&b->order[i]); |
b->max_order = max_order; |
b->op = op; |
b->data = data; |
} |
/** Check if buddy system can allocate block. |
* |
* @param b Buddy system pointer. |
* @param i Size of the block (2^i). |
* |
* @return True if block can be allocated. |
*/ |
bool buddy_system_can_alloc(buddy_system_t *b, uint8_t i) |
{ |
uint8_t k; |
/* |
* If requested block is greater then maximal block |
* we know immediatly that we cannot satisfy the request. |
*/ |
if (i > b->max_order) |
return false; |
/* |
* Check if any bigger or equal order has free elements |
*/ |
for (k = i; k <= b->max_order; k++) { |
if (!list_empty(&b->order[k])) { |
return true; |
} |
} |
return false; |
} |
/** Allocate PARTICULAR block from buddy system. |
* |
* @return Block of data or NULL if no such block was found. |
*/ |
link_t *buddy_system_alloc_block(buddy_system_t *b, link_t *block) |
{ |
link_t *left,*right, *tmp; |
uint8_t order; |
left = b->op->find_block(b, block, BUDDY_SYSTEM_INNER_BLOCK); |
ASSERT(left); |
list_remove(left); |
while (1) { |
if (!b->op->get_order(b, left)) { |
b->op->mark_busy(b, left); |
return left; |
} |
order = b->op->get_order(b, left); |
right = b->op->bisect(b, left); |
b->op->set_order(b, left, order - 1); |
b->op->set_order(b, right, order - 1); |
tmp = b->op->find_block(b, block, BUDDY_SYSTEM_INNER_BLOCK); |
if (tmp == right) { |
right = left; |
left = tmp; |
} |
ASSERT(tmp == left); |
b->op->mark_busy(b, left); |
buddy_system_free(b, right); |
b->op->mark_available(b, left); |
} |
} |
/** Allocate block from buddy system. |
* |
* @param b Buddy system pointer. |
* @param i Returned block will be 2^i big. |
* |
* @return Block of data represented by link_t. |
*/ |
link_t *buddy_system_alloc(buddy_system_t *b, uint8_t i) |
{ |
link_t *res, *hlp; |
ASSERT(i <= b->max_order); |
/* |
* If the list of order i is not empty, |
* the request can be immediatelly satisfied. |
*/ |
if (!list_empty(&b->order[i])) { |
res = b->order[i].next; |
list_remove(res); |
b->op->mark_busy(b, res); |
return res; |
} |
/* |
* If order i is already the maximal order, |
* the request cannot be satisfied. |
*/ |
if (i == b->max_order) |
return NULL; |
/* |
* Try to recursively satisfy the request from higher order lists. |
*/ |
hlp = buddy_system_alloc(b, i + 1); |
/* |
* The request could not be satisfied |
* from higher order lists. |
*/ |
if (!hlp) |
return NULL; |
res = hlp; |
/* |
* Bisect the block and set order of both of its parts to i. |
*/ |
hlp = b->op->bisect(b, res); |
b->op->set_order(b, res, i); |
b->op->set_order(b, hlp, i); |
/* |
* Return the other half to buddy system. Mark the first part |
* full, so that it won't coalesce again. |
*/ |
b->op->mark_busy(b, res); |
buddy_system_free(b, hlp); |
return res; |
} |
/** Return block to buddy system. |
* |
* @param b Buddy system pointer. |
* @param block Block to return. |
*/ |
void buddy_system_free(buddy_system_t *b, link_t *block) |
{ |
link_t *buddy, *hlp; |
uint8_t i; |
/* |
* Determine block's order. |
*/ |
i = b->op->get_order(b, block); |
ASSERT(i <= b->max_order); |
if (i != b->max_order) { |
/* |
* See if there is any buddy in the list of order i. |
*/ |
buddy = b->op->find_buddy(b, block); |
if (buddy) { |
ASSERT(b->op->get_order(b, buddy) == i); |
/* |
* Remove buddy from the list of order i. |
*/ |
list_remove(buddy); |
/* |
* Invalidate order of both block and buddy. |
*/ |
b->op->set_order(b, block, BUDDY_SYSTEM_INNER_BLOCK); |
b->op->set_order(b, buddy, BUDDY_SYSTEM_INNER_BLOCK); |
/* |
* Coalesce block and buddy into one block. |
*/ |
hlp = b->op->coalesce(b, block, buddy); |
/* |
* Set order of the coalesced block to i + 1. |
*/ |
b->op->set_order(b, hlp, i + 1); |
/* |
* Recursively add the coalesced block to the list of |
* order i + 1. |
*/ |
buddy_system_free(b, hlp); |
return; |
} |
} |
/* |
* Insert block into the list of order i. |
*/ |
list_append(block, &b->order[i]); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/slab.c |
---|
0,0 → 1,982 |
/* |
* Copyright (c) 2006 Ondrej Palkovsky |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Slab allocator. |
* |
* The slab allocator is closely modelled after OpenSolaris slab allocator. |
* @see http://www.usenix.org/events/usenix01/full_papers/bonwick/bonwick_html/ |
* |
* with the following exceptions: |
* @li empty slabs are deallocated immediately |
* (in Linux they are kept in linked list, in Solaris ???) |
* @li empty magazines are deallocated when not needed |
* (in Solaris they are held in linked list in slab cache) |
* |
* Following features are not currently supported but would be easy to do: |
* @li cache coloring |
* @li dynamic magazine growing (different magazine sizes are already |
* supported, but we would need to adjust allocation strategy) |
* |
* The slab allocator supports per-CPU caches ('magazines') to facilitate |
* good SMP scaling. |
* |
* When a new object is being allocated, it is first checked, if it is |
* available in a CPU-bound magazine. If it is not found there, it is |
* allocated from a CPU-shared slab - if a partially full one is found, |
* it is used, otherwise a new one is allocated. |
* |
* When an object is being deallocated, it is put to a CPU-bound magazine. |
* If there is no such magazine, a new one is allocated (if this fails, |
* the object is deallocated into slab). If the magazine is full, it is |
* put into cpu-shared list of magazines and a new one is allocated. |
* |
* The CPU-bound magazine is actually a pair of magazines in order to avoid |
* thrashing when somebody is allocating/deallocating 1 item at the magazine |
* size boundary. LIFO order is enforced, which should avoid fragmentation |
* as much as possible. |
* |
* Every cache contains list of full slabs and list of partially full slabs. |
* Empty slabs are immediately freed (thrashing will be avoided because |
* of magazines). |
* |
* The slab information structure is kept inside the data area, if possible. |
* The cache can be marked that it should not use magazines. This is used |
* only for slab related caches to avoid deadlocks and infinite recursion |
* (the slab allocator uses itself for allocating all it's control structures). |
* |
* The slab allocator allocates a lot of space and does not free it. When |
* the frame allocator fails to allocate a frame, it calls slab_reclaim(). |
* It tries 'light reclaim' first, then brutal reclaim. The light reclaim |
* releases slabs from cpu-shared magazine-list, until at least 1 slab |
* is deallocated in each cache (this algorithm should probably change). |
* The brutal reclaim removes all cached objects, even from CPU-bound |
* magazines. |
* |
* @todo |
* For better CPU-scaling the magazine allocation strategy should |
* be extended. Currently, if the cache does not have magazine, it asks |
* for non-cpu cached magazine cache to provide one. It might be feasible |
* to add cpu-cached magazine cache (which would allocate it's magazines |
* from non-cpu-cached mag. cache). This would provide a nice per-cpu |
* buffer. The other possibility is to use the per-cache |
* 'empty-magazine-list', which decreases competing for 1 per-system |
* magazine cache. |
* |
* @todo |
* it might be good to add granularity of locks even to slab level, |
* we could then try_spinlock over all partial slabs and thus improve |
* scalability even on slab level |
*/ |
#include <synch/spinlock.h> |
#include <mm/slab.h> |
#include <adt/list.h> |
#include <memstr.h> |
#include <align.h> |
#include <mm/frame.h> |
#include <config.h> |
#include <print.h> |
#include <arch.h> |
#include <panic.h> |
#include <debug.h> |
#include <bitops.h> |
#include <macros.h> |
SPINLOCK_INITIALIZE(slab_cache_lock); |
static LIST_INITIALIZE(slab_cache_list); |
/** Magazine cache */ |
static slab_cache_t mag_cache; |
/** Cache for cache descriptors */ |
static slab_cache_t slab_cache_cache; |
/** Cache for external slab descriptors |
* This time we want per-cpu cache, so do not make it static |
* - using slab for internal slab structures will not deadlock, |
* as all slab structures are 'small' - control structures of |
* their caches do not require further allocation |
*/ |
static slab_cache_t *slab_extern_cache; |
/** Caches for malloc */ |
static slab_cache_t *malloc_caches[SLAB_MAX_MALLOC_W - SLAB_MIN_MALLOC_W + 1]; |
char *malloc_names[] = { |
"malloc-16", |
"malloc-32", |
"malloc-64", |
"malloc-128", |
"malloc-256", |
"malloc-512", |
"malloc-1K", |
"malloc-2K", |
"malloc-4K", |
"malloc-8K", |
"malloc-16K", |
"malloc-32K", |
"malloc-64K", |
"malloc-128K", |
"malloc-256K" |
}; |
/** Slab descriptor */ |
typedef struct { |
slab_cache_t *cache; /**< Pointer to parent cache. */ |
link_t link; /**< List of full/partial slabs. */ |
void *start; /**< Start address of first available item. */ |
count_t available; /**< Count of available items in this slab. */ |
index_t nextavail; /**< The index of next available item. */ |
} slab_t; |
#ifdef CONFIG_DEBUG |
static int _slab_initialized = 0; |
#endif |
/**************************************/ |
/* Slab allocation functions */ |
/** |
* Allocate frames for slab space and initialize |
* |
*/ |
static slab_t *slab_space_alloc(slab_cache_t *cache, int flags) |
{ |
void *data; |
slab_t *slab; |
size_t fsize; |
unsigned int i; |
unsigned int zone = 0; |
data = frame_alloc_generic(cache->order, FRAME_KA | flags, &zone); |
if (!data) { |
return NULL; |
} |
if (!(cache->flags & SLAB_CACHE_SLINSIDE)) { |
slab = slab_alloc(slab_extern_cache, flags); |
if (!slab) { |
frame_free(KA2PA(data)); |
return NULL; |
} |
} else { |
fsize = (PAGE_SIZE << cache->order); |
slab = data + fsize - sizeof(*slab); |
} |
/* Fill in slab structures */ |
for (i = 0; i < ((unsigned int) 1 << cache->order); i++) |
frame_set_parent(ADDR2PFN(KA2PA(data)) + i, slab, zone); |
slab->start = data; |
slab->available = cache->objects; |
slab->nextavail = 0; |
slab->cache = cache; |
for (i = 0; i < cache->objects; i++) |
*((int *) (slab->start + i*cache->size)) = i + 1; |
atomic_inc(&cache->allocated_slabs); |
return slab; |
} |
/** |
* Deallocate space associated with slab |
* |
* @return number of freed frames |
*/ |
static count_t slab_space_free(slab_cache_t *cache, slab_t *slab) |
{ |
frame_free(KA2PA(slab->start)); |
if (! (cache->flags & SLAB_CACHE_SLINSIDE)) |
slab_free(slab_extern_cache, slab); |
atomic_dec(&cache->allocated_slabs); |
return 1 << cache->order; |
} |
/** Map object to slab structure */ |
static slab_t * obj2slab(void *obj) |
{ |
return (slab_t *) frame_get_parent(ADDR2PFN(KA2PA(obj)), 0); |
} |
/**************************************/ |
/* Slab functions */ |
/** |
* Return object to slab and call a destructor |
* |
* @param slab If the caller knows directly slab of the object, otherwise NULL |
* |
* @return Number of freed pages |
*/ |
static count_t slab_obj_destroy(slab_cache_t *cache, void *obj, slab_t *slab) |
{ |
int freed = 0; |
if (!slab) |
slab = obj2slab(obj); |
ASSERT(slab->cache == cache); |
if (cache->destructor) |
freed = cache->destructor(obj); |
spinlock_lock(&cache->slablock); |
ASSERT(slab->available < cache->objects); |
*((int *)obj) = slab->nextavail; |
slab->nextavail = (obj - slab->start) / cache->size; |
slab->available++; |
/* Move it to correct list */ |
if (slab->available == cache->objects) { |
/* Free associated memory */ |
list_remove(&slab->link); |
spinlock_unlock(&cache->slablock); |
return freed + slab_space_free(cache, slab); |
} else if (slab->available == 1) { |
/* It was in full, move to partial */ |
list_remove(&slab->link); |
list_prepend(&slab->link, &cache->partial_slabs); |
} |
spinlock_unlock(&cache->slablock); |
return freed; |
} |
/** |
* Take new object from slab or create new if needed |
* |
* @return Object address or null |
*/ |
static void *slab_obj_create(slab_cache_t *cache, int flags) |
{ |
slab_t *slab; |
void *obj; |
spinlock_lock(&cache->slablock); |
if (list_empty(&cache->partial_slabs)) { |
/* Allow recursion and reclaiming |
* - this should work, as the slab control structures |
* are small and do not need to allocate with anything |
* other than frame_alloc when they are allocating, |
* that's why we should get recursion at most 1-level deep |
*/ |
spinlock_unlock(&cache->slablock); |
slab = slab_space_alloc(cache, flags); |
if (!slab) |
return NULL; |
spinlock_lock(&cache->slablock); |
} else { |
slab = list_get_instance(cache->partial_slabs.next, slab_t, |
link); |
list_remove(&slab->link); |
} |
obj = slab->start + slab->nextavail * cache->size; |
slab->nextavail = *((int *)obj); |
slab->available--; |
if (!slab->available) |
list_prepend(&slab->link, &cache->full_slabs); |
else |
list_prepend(&slab->link, &cache->partial_slabs); |
spinlock_unlock(&cache->slablock); |
if (cache->constructor && cache->constructor(obj, flags)) { |
/* Bad, bad, construction failed */ |
slab_obj_destroy(cache, obj, slab); |
return NULL; |
} |
return obj; |
} |
/**************************************/ |
/* CPU-Cache slab functions */ |
/** |
* Finds a full magazine in cache, takes it from list |
* and returns it |
* |
* @param first If true, return first, else last mag |
*/ |
static slab_magazine_t *get_mag_from_cache(slab_cache_t *cache, int first) |
{ |
slab_magazine_t *mag = NULL; |
link_t *cur; |
spinlock_lock(&cache->maglock); |
if (!list_empty(&cache->magazines)) { |
if (first) |
cur = cache->magazines.next; |
else |
cur = cache->magazines.prev; |
mag = list_get_instance(cur, slab_magazine_t, link); |
list_remove(&mag->link); |
atomic_dec(&cache->magazine_counter); |
} |
spinlock_unlock(&cache->maglock); |
return mag; |
} |
/** Prepend magazine to magazine list in cache */ |
static void put_mag_to_cache(slab_cache_t *cache, slab_magazine_t *mag) |
{ |
spinlock_lock(&cache->maglock); |
list_prepend(&mag->link, &cache->magazines); |
atomic_inc(&cache->magazine_counter); |
spinlock_unlock(&cache->maglock); |
} |
/** |
* Free all objects in magazine and free memory associated with magazine |
* |
* @return Number of freed pages |
*/ |
static count_t magazine_destroy(slab_cache_t *cache, slab_magazine_t *mag) |
{ |
unsigned int i; |
count_t frames = 0; |
for (i = 0; i < mag->busy; i++) { |
frames += slab_obj_destroy(cache, mag->objs[i], NULL); |
atomic_dec(&cache->cached_objs); |
} |
slab_free(&mag_cache, mag); |
return frames; |
} |
/** |
* Find full magazine, set it as current and return it |
* |
* Assume cpu_magazine lock is held |
*/ |
static slab_magazine_t *get_full_current_mag(slab_cache_t *cache) |
{ |
slab_magazine_t *cmag, *lastmag, *newmag; |
cmag = cache->mag_cache[CPU->id].current; |
lastmag = cache->mag_cache[CPU->id].last; |
if (cmag) { /* First try local CPU magazines */ |
if (cmag->busy) |
return cmag; |
if (lastmag && lastmag->busy) { |
cache->mag_cache[CPU->id].current = lastmag; |
cache->mag_cache[CPU->id].last = cmag; |
return lastmag; |
} |
} |
/* Local magazines are empty, import one from magazine list */ |
newmag = get_mag_from_cache(cache, 1); |
if (!newmag) |
return NULL; |
if (lastmag) |
magazine_destroy(cache, lastmag); |
cache->mag_cache[CPU->id].last = cmag; |
cache->mag_cache[CPU->id].current = newmag; |
return newmag; |
} |
/** |
* Try to find object in CPU-cache magazines |
* |
* @return Pointer to object or NULL if not available |
*/ |
static void *magazine_obj_get(slab_cache_t *cache) |
{ |
slab_magazine_t *mag; |
void *obj; |
if (!CPU) |
return NULL; |
spinlock_lock(&cache->mag_cache[CPU->id].lock); |
mag = get_full_current_mag(cache); |
if (!mag) { |
spinlock_unlock(&cache->mag_cache[CPU->id].lock); |
return NULL; |
} |
obj = mag->objs[--mag->busy]; |
spinlock_unlock(&cache->mag_cache[CPU->id].lock); |
atomic_dec(&cache->cached_objs); |
return obj; |
} |
/** |
* Assure that the current magazine is empty, return pointer to it, or NULL if |
* no empty magazine is available and cannot be allocated |
* |
* Assume mag_cache[CPU->id].lock is held |
* |
* We have 2 magazines bound to processor. |
* First try the current. |
* If full, try the last. |
* If full, put to magazines list. |
* allocate new, exchange last & current |
* |
*/ |
static slab_magazine_t *make_empty_current_mag(slab_cache_t *cache) |
{ |
slab_magazine_t *cmag,*lastmag,*newmag; |
cmag = cache->mag_cache[CPU->id].current; |
lastmag = cache->mag_cache[CPU->id].last; |
if (cmag) { |
if (cmag->busy < cmag->size) |
return cmag; |
if (lastmag && lastmag->busy < lastmag->size) { |
cache->mag_cache[CPU->id].last = cmag; |
cache->mag_cache[CPU->id].current = lastmag; |
return lastmag; |
} |
} |
/* current | last are full | nonexistent, allocate new */ |
/* We do not want to sleep just because of caching */ |
/* Especially we do not want reclaiming to start, as |
* this would deadlock */ |
newmag = slab_alloc(&mag_cache, FRAME_ATOMIC | FRAME_NO_RECLAIM); |
if (!newmag) |
return NULL; |
newmag->size = SLAB_MAG_SIZE; |
newmag->busy = 0; |
/* Flush last to magazine list */ |
if (lastmag) |
put_mag_to_cache(cache, lastmag); |
/* Move current as last, save new as current */ |
cache->mag_cache[CPU->id].last = cmag; |
cache->mag_cache[CPU->id].current = newmag; |
return newmag; |
} |
/** |
* Put object into CPU-cache magazine |
* |
* @return 0 - success, -1 - could not get memory |
*/ |
static int magazine_obj_put(slab_cache_t *cache, void *obj) |
{ |
slab_magazine_t *mag; |
if (!CPU) |
return -1; |
spinlock_lock(&cache->mag_cache[CPU->id].lock); |
mag = make_empty_current_mag(cache); |
if (!mag) { |
spinlock_unlock(&cache->mag_cache[CPU->id].lock); |
return -1; |
} |
mag->objs[mag->busy++] = obj; |
spinlock_unlock(&cache->mag_cache[CPU->id].lock); |
atomic_inc(&cache->cached_objs); |
return 0; |
} |
/**************************************/ |
/* Slab cache functions */ |
/** Return number of objects that fit in certain cache size */ |
static unsigned int comp_objects(slab_cache_t *cache) |
{ |
if (cache->flags & SLAB_CACHE_SLINSIDE) |
return ((PAGE_SIZE << cache->order) - sizeof(slab_t)) / |
cache->size; |
else |
return (PAGE_SIZE << cache->order) / cache->size; |
} |
/** Return wasted space in slab */ |
static unsigned int badness(slab_cache_t *cache) |
{ |
unsigned int objects; |
unsigned int ssize; |
objects = comp_objects(cache); |
ssize = PAGE_SIZE << cache->order; |
if (cache->flags & SLAB_CACHE_SLINSIDE) |
ssize -= sizeof(slab_t); |
return ssize - objects * cache->size; |
} |
/** |
* Initialize mag_cache structure in slab cache |
*/ |
static void make_magcache(slab_cache_t *cache) |
{ |
unsigned int i; |
ASSERT(_slab_initialized >= 2); |
cache->mag_cache = malloc(sizeof(slab_mag_cache_t) * config.cpu_count, |
0); |
for (i = 0; i < config.cpu_count; i++) { |
memsetb(&cache->mag_cache[i], sizeof(cache->mag_cache[i]), 0); |
spinlock_initialize(&cache->mag_cache[i].lock, |
"slab_maglock_cpu"); |
} |
} |
/** Initialize allocated memory as a slab cache */ |
static void |
_slab_cache_create(slab_cache_t *cache, char *name, size_t size, size_t align, |
int (*constructor)(void *obj, int kmflag), int (*destructor)(void *obj), |
int flags) |
{ |
int pages; |
ipl_t ipl; |
memsetb(cache, sizeof(*cache), 0); |
cache->name = name; |
if (align < sizeof(unative_t)) |
align = sizeof(unative_t); |
size = ALIGN_UP(size, align); |
cache->size = size; |
cache->constructor = constructor; |
cache->destructor = destructor; |
cache->flags = flags; |
list_initialize(&cache->full_slabs); |
list_initialize(&cache->partial_slabs); |
list_initialize(&cache->magazines); |
spinlock_initialize(&cache->slablock, "slab_lock"); |
spinlock_initialize(&cache->maglock, "slab_maglock"); |
if (!(cache->flags & SLAB_CACHE_NOMAGAZINE)) |
make_magcache(cache); |
/* Compute slab sizes, object counts in slabs etc. */ |
if (cache->size < SLAB_INSIDE_SIZE) |
cache->flags |= SLAB_CACHE_SLINSIDE; |
/* Minimum slab order */ |
pages = SIZE2FRAMES(cache->size); |
/* We need the 2^order >= pages */ |
if (pages == 1) |
cache->order = 0; |
else |
cache->order = fnzb(pages - 1) + 1; |
while (badness(cache) > SLAB_MAX_BADNESS(cache)) { |
cache->order += 1; |
} |
cache->objects = comp_objects(cache); |
/* If info fits in, put it inside */ |
if (badness(cache) > sizeof(slab_t)) |
cache->flags |= SLAB_CACHE_SLINSIDE; |
/* Add cache to cache list */ |
ipl = interrupts_disable(); |
spinlock_lock(&slab_cache_lock); |
list_append(&cache->link, &slab_cache_list); |
spinlock_unlock(&slab_cache_lock); |
interrupts_restore(ipl); |
} |
/** Create slab cache */ |
slab_cache_t * |
slab_cache_create(char *name, size_t size, size_t align, |
int (*constructor)(void *obj, int kmflag), int (*destructor)(void *obj), |
int flags) |
{ |
slab_cache_t *cache; |
cache = slab_alloc(&slab_cache_cache, 0); |
_slab_cache_create(cache, name, size, align, constructor, destructor, |
flags); |
return cache; |
} |
/** |
* Reclaim space occupied by objects that are already free |
* |
* @param flags If contains SLAB_RECLAIM_ALL, do aggressive freeing |
* @return Number of freed pages |
*/ |
static count_t _slab_reclaim(slab_cache_t *cache, int flags) |
{ |
unsigned int i; |
slab_magazine_t *mag; |
count_t frames = 0; |
int magcount; |
if (cache->flags & SLAB_CACHE_NOMAGAZINE) |
return 0; /* Nothing to do */ |
/* We count up to original magazine count to avoid |
* endless loop |
*/ |
magcount = atomic_get(&cache->magazine_counter); |
while (magcount-- && (mag=get_mag_from_cache(cache, 0))) { |
frames += magazine_destroy(cache,mag); |
if (!(flags & SLAB_RECLAIM_ALL) && frames) |
break; |
} |
if (flags & SLAB_RECLAIM_ALL) { |
/* Free cpu-bound magazines */ |
/* Destroy CPU magazines */ |
for (i = 0; i < config.cpu_count; i++) { |
spinlock_lock(&cache->mag_cache[i].lock); |
mag = cache->mag_cache[i].current; |
if (mag) |
frames += magazine_destroy(cache, mag); |
cache->mag_cache[i].current = NULL; |
mag = cache->mag_cache[i].last; |
if (mag) |
frames += magazine_destroy(cache, mag); |
cache->mag_cache[i].last = NULL; |
spinlock_unlock(&cache->mag_cache[i].lock); |
} |
} |
return frames; |
} |
/** Check that there are no slabs and remove cache from system */ |
void slab_cache_destroy(slab_cache_t *cache) |
{ |
ipl_t ipl; |
/* First remove cache from link, so that we don't need |
* to disable interrupts later |
*/ |
ipl = interrupts_disable(); |
spinlock_lock(&slab_cache_lock); |
list_remove(&cache->link); |
spinlock_unlock(&slab_cache_lock); |
interrupts_restore(ipl); |
/* Do not lock anything, we assume the software is correct and |
* does not touch the cache when it decides to destroy it */ |
/* Destroy all magazines */ |
_slab_reclaim(cache, SLAB_RECLAIM_ALL); |
/* All slabs must be empty */ |
if (!list_empty(&cache->full_slabs) || |
!list_empty(&cache->partial_slabs)) |
panic("Destroying cache that is not empty."); |
if (!(cache->flags & SLAB_CACHE_NOMAGAZINE)) |
free(cache->mag_cache); |
slab_free(&slab_cache_cache, cache); |
} |
/** Allocate new object from cache - if no flags given, always returns memory */ |
void *slab_alloc(slab_cache_t *cache, int flags) |
{ |
ipl_t ipl; |
void *result = NULL; |
/* Disable interrupts to avoid deadlocks with interrupt handlers */ |
ipl = interrupts_disable(); |
if (!(cache->flags & SLAB_CACHE_NOMAGAZINE)) { |
result = magazine_obj_get(cache); |
} |
if (!result) |
result = slab_obj_create(cache, flags); |
interrupts_restore(ipl); |
if (result) |
atomic_inc(&cache->allocated_objs); |
return result; |
} |
/** Return object to cache, use slab if known */ |
static void _slab_free(slab_cache_t *cache, void *obj, slab_t *slab) |
{ |
ipl_t ipl; |
ipl = interrupts_disable(); |
if ((cache->flags & SLAB_CACHE_NOMAGAZINE) || |
magazine_obj_put(cache, obj)) { |
slab_obj_destroy(cache, obj, slab); |
} |
interrupts_restore(ipl); |
atomic_dec(&cache->allocated_objs); |
} |
/** Return slab object to cache */ |
void slab_free(slab_cache_t *cache, void *obj) |
{ |
_slab_free(cache, obj, NULL); |
} |
/* Go through all caches and reclaim what is possible */ |
count_t slab_reclaim(int flags) |
{ |
slab_cache_t *cache; |
link_t *cur; |
count_t frames = 0; |
spinlock_lock(&slab_cache_lock); |
/* TODO: Add assert, that interrupts are disabled, otherwise |
* memory allocation from interrupts can deadlock. |
*/ |
for (cur = slab_cache_list.next; cur != &slab_cache_list; |
cur = cur->next) { |
cache = list_get_instance(cur, slab_cache_t, link); |
frames += _slab_reclaim(cache, flags); |
} |
spinlock_unlock(&slab_cache_lock); |
return frames; |
} |
/* Print list of slabs */ |
void slab_print_list(void) |
{ |
int skip = 0; |
printf("slab name size pages obj/pg slabs cached allocated" |
" ctl\n"); |
printf("---------------- -------- ------ ------ ------ ------ ---------" |
" ---\n"); |
while (true) { |
slab_cache_t *cache; |
link_t *cur; |
ipl_t ipl; |
int i; |
/* |
* We must not hold the slab_cache_lock spinlock when printing |
* the statistics. Otherwise we can easily deadlock if the print |
* needs to allocate memory. |
* |
* Therefore, we walk through the slab cache list, skipping some |
* amount of already processed caches during each iteration and |
* gathering statistics about the first unprocessed cache. For |
* the sake of printing the statistics, we realese the |
* slab_cache_lock and reacquire it afterwards. Then the walk |
* starts again. |
* |
* This limits both the efficiency and also accuracy of the |
* obtained statistics. The efficiency is decreased because the |
* time complexity of the algorithm is quadratic instead of |
* linear. The accuracy is impacted because we drop the lock |
* after processing one cache. If there is someone else |
* manipulating the cache list, we might omit an arbitrary |
* number of caches or process one cache multiple times. |
* However, we don't bleed for this algorithm for it is only |
* statistics. |
*/ |
ipl = interrupts_disable(); |
spinlock_lock(&slab_cache_lock); |
for (i = 0, cur = slab_cache_list.next; |
i < skip && cur != &slab_cache_list; |
i++, cur = cur->next) |
; |
if (cur == &slab_cache_list) { |
spinlock_unlock(&slab_cache_lock); |
interrupts_restore(ipl); |
break; |
} |
skip++; |
cache = list_get_instance(cur, slab_cache_t, link); |
char *name = cache->name; |
uint8_t order = cache->order; |
size_t size = cache->size; |
unsigned int objects = cache->objects; |
long allocated_slabs = atomic_get(&cache->allocated_slabs); |
long cached_objs = atomic_get(&cache->cached_objs); |
long allocated_objs = atomic_get(&cache->allocated_objs); |
int flags = cache->flags; |
spinlock_unlock(&slab_cache_lock); |
interrupts_restore(ipl); |
printf("%-16s %8" PRIs " %6d %6u %6ld %6ld %9ld %-3s\n", |
name, size, (1 << order), objects, allocated_slabs, |
cached_objs, allocated_objs, |
flags & SLAB_CACHE_SLINSIDE ? "in" : "out"); |
} |
} |
void slab_cache_init(void) |
{ |
int i, size; |
/* Initialize magazine cache */ |
_slab_cache_create(&mag_cache, "slab_magazine", |
sizeof(slab_magazine_t) + SLAB_MAG_SIZE * sizeof(void*), |
sizeof(uintptr_t), NULL, NULL, SLAB_CACHE_NOMAGAZINE | |
SLAB_CACHE_SLINSIDE); |
/* Initialize slab_cache cache */ |
_slab_cache_create(&slab_cache_cache, "slab_cache", |
sizeof(slab_cache_cache), sizeof(uintptr_t), NULL, NULL, |
SLAB_CACHE_NOMAGAZINE | SLAB_CACHE_SLINSIDE); |
/* Initialize external slab cache */ |
slab_extern_cache = slab_cache_create("slab_extern", sizeof(slab_t), 0, |
NULL, NULL, SLAB_CACHE_SLINSIDE | SLAB_CACHE_MAGDEFERRED); |
/* Initialize structures for malloc */ |
for (i = 0, size = (1 << SLAB_MIN_MALLOC_W); |
i < (SLAB_MAX_MALLOC_W - SLAB_MIN_MALLOC_W + 1); |
i++, size <<= 1) { |
malloc_caches[i] = slab_cache_create(malloc_names[i], size, 0, |
NULL, NULL, SLAB_CACHE_MAGDEFERRED); |
} |
#ifdef CONFIG_DEBUG |
_slab_initialized = 1; |
#endif |
} |
/** Enable cpu_cache |
* |
* Kernel calls this function, when it knows the real number of |
* processors. |
* Allocate slab for cpucache and enable it on all existing |
* slabs that are SLAB_CACHE_MAGDEFERRED |
*/ |
void slab_enable_cpucache(void) |
{ |
link_t *cur; |
slab_cache_t *s; |
#ifdef CONFIG_DEBUG |
_slab_initialized = 2; |
#endif |
spinlock_lock(&slab_cache_lock); |
for (cur = slab_cache_list.next; cur != &slab_cache_list; |
cur = cur->next){ |
s = list_get_instance(cur, slab_cache_t, link); |
if ((s->flags & SLAB_CACHE_MAGDEFERRED) != |
SLAB_CACHE_MAGDEFERRED) |
continue; |
make_magcache(s); |
s->flags &= ~SLAB_CACHE_MAGDEFERRED; |
} |
spinlock_unlock(&slab_cache_lock); |
} |
/**************************************/ |
/* kalloc/kfree functions */ |
void *malloc(unsigned int size, int flags) |
{ |
ASSERT(_slab_initialized); |
ASSERT(size && size <= (1 << SLAB_MAX_MALLOC_W)); |
if (size < (1 << SLAB_MIN_MALLOC_W)) |
size = (1 << SLAB_MIN_MALLOC_W); |
int idx = fnzb(size - 1) - SLAB_MIN_MALLOC_W + 1; |
return slab_alloc(malloc_caches[idx], flags); |
} |
void *realloc(void *ptr, unsigned int size, int flags) |
{ |
ASSERT(_slab_initialized); |
ASSERT(size <= (1 << SLAB_MAX_MALLOC_W)); |
void *new_ptr; |
if (size > 0) { |
if (size < (1 << SLAB_MIN_MALLOC_W)) |
size = (1 << SLAB_MIN_MALLOC_W); |
int idx = fnzb(size - 1) - SLAB_MIN_MALLOC_W + 1; |
new_ptr = slab_alloc(malloc_caches[idx], flags); |
} else |
new_ptr = NULL; |
if ((new_ptr != NULL) && (ptr != NULL)) { |
slab_t *slab = obj2slab(ptr); |
memcpy(new_ptr, ptr, min(size, slab->cache->size)); |
} |
if (ptr != NULL) |
free(ptr); |
return new_ptr; |
} |
void free(void *ptr) |
{ |
if (!ptr) |
return; |
slab_t *slab = obj2slab(ptr); |
_slab_free(slab->cache, ptr, slab); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/page.c |
---|
0,0 → 1,170 |
/* |
* Copyright (c) 2001-2006 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Virtual Address Translation subsystem. |
* |
* This file contains code for creating, destroying and searching |
* mappings between virtual addresses and physical addresses. |
* Functions here are mere wrappers that call the real implementation. |
* They however, define the single interface. |
*/ |
/* |
* Note on memory prefetching and updating memory mappings, also described in: |
* AMD x86-64 Architecture Programmer's Manual, Volume 2, System Programming, |
* 7.2.1 Special Coherency Considerations. |
* |
* The processor which modifies a page table mapping can access prefetched data |
* from the old mapping. In order to prevent this, we place a memory barrier |
* after a mapping is updated. |
* |
* We assume that the other processors are either not using the mapping yet |
* (i.e. during the bootstrap) or are executing the TLB shootdown code. While |
* we don't care much about the former case, the processors in the latter case |
* will do an implicit serialization by virtue of running the TLB shootdown |
* interrupt handler. |
*/ |
#include <mm/page.h> |
#include <arch/mm/page.h> |
#include <arch/mm/asid.h> |
#include <mm/as.h> |
#include <mm/frame.h> |
#include <arch/barrier.h> |
#include <arch/types.h> |
#include <arch/asm.h> |
#include <memstr.h> |
#include <debug.h> |
#include <arch.h> |
/** Virtual operations for page subsystem. */ |
page_mapping_operations_t *page_mapping_operations = NULL; |
void page_init(void) |
{ |
page_arch_init(); |
} |
/** Map memory structure |
* |
* Identity-map memory structure |
* considering possible crossings |
* of page boundaries. |
* |
* @param s Address of the structure. |
* @param size Size of the structure. |
*/ |
void map_structure(uintptr_t s, size_t size) |
{ |
int i, cnt, length; |
length = size + (s - (s & ~(PAGE_SIZE - 1))); |
cnt = length / PAGE_SIZE + (length % PAGE_SIZE > 0); |
for (i = 0; i < cnt; i++) |
page_mapping_insert(AS_KERNEL, s + i * PAGE_SIZE, |
s + i * PAGE_SIZE, PAGE_NOT_CACHEABLE | PAGE_WRITE); |
/* Repel prefetched accesses to the old mapping. */ |
memory_barrier(); |
} |
/** Insert mapping of page to frame. |
* |
* Map virtual address page to physical address frame |
* using flags. Allocate and setup any missing page tables. |
* |
* The page table must be locked and interrupts must be disabled. |
* |
* @param as Address space to wich page belongs. |
* @param page Virtual address of the page to be mapped. |
* @param frame Physical address of memory frame to which the mapping is |
* done. |
* @param flags Flags to be used for mapping. |
*/ |
void page_mapping_insert(as_t *as, uintptr_t page, uintptr_t frame, int flags) |
{ |
ASSERT(page_mapping_operations); |
ASSERT(page_mapping_operations->mapping_insert); |
page_mapping_operations->mapping_insert(as, page, frame, flags); |
/* Repel prefetched accesses to the old mapping. */ |
memory_barrier(); |
} |
/** Remove mapping of page. |
* |
* Remove any mapping of page within address space as. |
* TLB shootdown should follow in order to make effects of |
* this call visible. |
* |
* The page table must be locked and interrupts must be disabled. |
* |
* @param as Address space to wich page belongs. |
* @param page Virtual address of the page to be demapped. |
*/ |
void page_mapping_remove(as_t *as, uintptr_t page) |
{ |
ASSERT(page_mapping_operations); |
ASSERT(page_mapping_operations->mapping_remove); |
page_mapping_operations->mapping_remove(as, page); |
/* Repel prefetched accesses to the old mapping. */ |
memory_barrier(); |
} |
/** Find mapping for virtual page |
* |
* Find mapping for virtual page. |
* |
* The page table must be locked and interrupts must be disabled. |
* |
* @param as Address space to wich page belongs. |
* @param page Virtual page. |
* |
* @return NULL if there is no such mapping; requested mapping |
* otherwise. |
*/ |
pte_t *page_mapping_find(as_t *as, uintptr_t page) |
{ |
ASSERT(page_mapping_operations); |
ASSERT(page_mapping_operations->mapping_find); |
return page_mapping_operations->mapping_find(as, page); |
} |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/tlb.c |
---|
0,0 → 1,190 |
/* |
* Copyright (c) 2001-2004 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Generic TLB shootdown algorithm. |
* |
* The algorithm implemented here is based on the CMU TLB shootdown |
* algorithm and is further simplified (e.g. all CPUs receive all TLB |
* shootdown messages). |
*/ |
#include <mm/tlb.h> |
#include <mm/asid.h> |
#include <arch/mm/tlb.h> |
#include <smp/ipi.h> |
#include <synch/spinlock.h> |
#include <atomic.h> |
#include <arch/interrupt.h> |
#include <config.h> |
#include <arch.h> |
#include <panic.h> |
#include <debug.h> |
#include <cpu.h> |
/** |
* This lock is used for synchronisation between sender and |
* recipients of TLB shootdown message. It must be acquired |
* before CPU structure lock. |
*/ |
SPINLOCK_INITIALIZE(tlblock); |
void tlb_init(void) |
{ |
tlb_arch_init(); |
} |
#ifdef CONFIG_SMP |
/** Send TLB shootdown message. |
* |
* This function attempts to deliver TLB shootdown message |
* to all other processors. |
* |
* This function must be called with interrupts disabled. |
* |
* @param type Type describing scope of shootdown. |
* @param asid Address space, if required by type. |
* @param page Virtual page address, if required by type. |
* @param count Number of pages, if required by type. |
*/ |
void tlb_shootdown_start(tlb_invalidate_type_t type, asid_t asid, |
uintptr_t page, count_t count) |
{ |
unsigned int i; |
CPU->tlb_active = 0; |
spinlock_lock(&tlblock); |
for (i = 0; i < config.cpu_count; i++) { |
cpu_t *cpu; |
if (i == CPU->id) |
continue; |
cpu = &cpus[i]; |
spinlock_lock(&cpu->lock); |
if (cpu->tlb_messages_count == TLB_MESSAGE_QUEUE_LEN) { |
/* |
* The message queue is full. |
* Erase the queue and store one TLB_INVL_ALL message. |
*/ |
cpu->tlb_messages_count = 1; |
cpu->tlb_messages[0].type = TLB_INVL_ALL; |
cpu->tlb_messages[0].asid = ASID_INVALID; |
cpu->tlb_messages[0].page = 0; |
cpu->tlb_messages[0].count = 0; |
} else { |
/* |
* Enqueue the message. |
*/ |
index_t idx = cpu->tlb_messages_count++; |
cpu->tlb_messages[idx].type = type; |
cpu->tlb_messages[idx].asid = asid; |
cpu->tlb_messages[idx].page = page; |
cpu->tlb_messages[idx].count = count; |
} |
spinlock_unlock(&cpu->lock); |
} |
tlb_shootdown_ipi_send(); |
busy_wait: |
for (i = 0; i < config.cpu_count; i++) |
if (cpus[i].tlb_active) |
goto busy_wait; |
} |
/** Finish TLB shootdown sequence. */ |
void tlb_shootdown_finalize(void) |
{ |
spinlock_unlock(&tlblock); |
CPU->tlb_active = 1; |
} |
void tlb_shootdown_ipi_send(void) |
{ |
ipi_broadcast(VECTOR_TLB_SHOOTDOWN_IPI); |
} |
/** Receive TLB shootdown message. */ |
void tlb_shootdown_ipi_recv(void) |
{ |
tlb_invalidate_type_t type; |
asid_t asid; |
uintptr_t page; |
count_t count; |
unsigned int i; |
ASSERT(CPU); |
CPU->tlb_active = 0; |
spinlock_lock(&tlblock); |
spinlock_unlock(&tlblock); |
spinlock_lock(&CPU->lock); |
ASSERT(CPU->tlb_messages_count <= TLB_MESSAGE_QUEUE_LEN); |
for (i = 0; i < CPU->tlb_messages_count; CPU->tlb_messages_count--) { |
type = CPU->tlb_messages[i].type; |
asid = CPU->tlb_messages[i].asid; |
page = CPU->tlb_messages[i].page; |
count = CPU->tlb_messages[i].count; |
switch (type) { |
case TLB_INVL_ALL: |
tlb_invalidate_all(); |
break; |
case TLB_INVL_ASID: |
tlb_invalidate_asid(asid); |
break; |
case TLB_INVL_PAGES: |
ASSERT(count); |
tlb_invalidate_pages(asid, page, count); |
break; |
default: |
panic("unknown type (%d)\n", type); |
break; |
} |
if (type == TLB_INVL_ALL) |
break; |
} |
spinlock_unlock(&CPU->lock); |
CPU->tlb_active = 1; |
} |
#endif /* CONFIG_SMP */ |
/** @} |
*/ |
/branches/arm/kernel/generic/src/mm/backend_phys.c |
---|
0,0 → 1,97 |
/* |
* Copyright (c) 2006 Jakub Jermar |
* All rights reserved. |
* |
* Redistribution and use in source and binary forms, with or without |
* modification, are permitted provided that the following conditions |
* are met: |
* |
* - Redistributions of source code must retain the above copyright |
* notice, this list of conditions and the following disclaimer. |
* - Redistributions in binary form must reproduce the above copyright |
* notice, this list of conditions and the following disclaimer in the |
* documentation and/or other materials provided with the distribution. |
* - The name of the author may not be used to endorse or promote products |
* derived from this software without specific prior written permission. |
* |
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
*/ |
/** @addtogroup genericmm |
* @{ |
*/ |
/** |
* @file |
* @brief Backend for address space areas backed by continuous physical |
* memory. |
*/ |
#include <debug.h> |
#include <arch/types.h> |
#include <mm/as.h> |
#include <mm/frame.h> |
#include <mm/slab.h> |
#include <memstr.h> |
#include <macros.h> |
#include <arch.h> |
#include <align.h> |
static int phys_page_fault(as_area_t *area, uintptr_t addr, pf_access_t access); |
static void phys_share(as_area_t *area); |
mem_backend_t phys_backend = { |
.page_fault = phys_page_fault, |
.frame_free = NULL, |
.share = phys_share |
}; |
/** Service a page fault in the address space area backed by physical memory. |
* |
* The address space area and page tables must be already locked. |
* |
* @param area Pointer to the address space area. |
* @param addr Faulting virtual address. |
* @param access Access mode that caused the fault (i.e. read/write/exec). |
* |
* @return AS_PF_FAULT on failure (i.e. page fault) or AS_PF_OK on success (i.e. |
* serviced). |
*/ |
int phys_page_fault(as_area_t *area, uintptr_t addr, pf_access_t access) |
{ |
uintptr_t base = area->backend_data.base; |
if (!as_area_check_access(area, access)) |
return AS_PF_FAULT; |
ASSERT(addr - area->base < area->backend_data.frames * FRAME_SIZE); |
page_mapping_insert(AS, addr, base + (addr - area->base), |
as_area_get_flags(area)); |
if (!used_space_insert(area, ALIGN_DOWN(addr, PAGE_SIZE), 1)) |
panic("Could not insert used space.\n"); |
return AS_PF_OK; |
} |
/** Share address space area backed by physical memory. |
* |
* Do actually nothing as sharing of address space areas |
* that are backed up by physical memory is very easy. |
* Note that the function must be defined so that |
* as_area_share() will succeed. |
*/ |
void phys_share(as_area_t *area) |
{ |
} |
/** @} |
*/ |