Subversion Repositories HelenOS

Rev

Rev 3742 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright (c) 2001-2006 Jakub Jermar
  3.  * All rights reserved.
  4.  *
  5.  * Redistribution and use in source and binary forms, with or without
  6.  * modification, are permitted provided that the following conditions
  7.  * are met:
  8.  *
  9.  * - Redistributions of source code must retain the above copyright
  10.  *   notice, this list of conditions and the following disclaimer.
  11.  * - Redistributions in binary form must reproduce the above copyright
  12.  *   notice, this list of conditions and the following disclaimer in the
  13.  *   documentation and/or other materials provided with the distribution.
  14.  * - The name of the author may not be used to endorse or promote products
  15.  *   derived from this software without specific prior written permission.
  16.  *
  17.  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18.  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20.  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21.  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26.  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27.  */
  28.  
  29. /** @addtogroup genericmm
  30.  * @{
  31.  */
  32.  
  33. /**
  34.  * @file
  35.  * @brief   Address space related functions.
  36.  *
  37.  * This file contains address space manipulation functions.
  38.  * Roughly speaking, this is a higher-level client of
  39.  * Virtual Address Translation (VAT) subsystem.
  40.  *
  41.  * Functionality provided by this file allows one to
  42.  * create address spaces and create, resize and share
  43.  * address space areas.
  44.  *
  45.  * @see page.c
  46.  *
  47.  */
  48.  
  49. #include <mm/as.h>
  50. #include <arch/mm/as.h>
  51. #include <mm/page.h>
  52. #include <mm/frame.h>
  53. #include <mm/slab.h>
  54. #include <mm/tlb.h>
  55. #include <arch/mm/page.h>
  56. #include <genarch/mm/page_pt.h>
  57. #include <genarch/mm/page_ht.h>
  58. #include <mm/asid.h>
  59. #include <arch/mm/asid.h>
  60. #include <preemption.h>
  61. #include <synch/spinlock.h>
  62. #include <synch/mutex.h>
  63. #include <adt/list.h>
  64. #include <adt/btree.h>
  65. #include <proc/task.h>
  66. #include <proc/thread.h>
  67. #include <arch/asm.h>
  68. #include <panic.h>
  69. #include <debug.h>
  70. #include <print.h>
  71. #include <memstr.h>
  72. #include <macros.h>
  73. #include <arch.h>
  74. #include <errno.h>
  75. #include <config.h>
  76. #include <align.h>
  77. #include <arch/types.h>
  78. #include <syscall/copy.h>
  79. #include <arch/interrupt.h>
  80.  
  81. #ifdef CONFIG_VIRT_IDX_DCACHE
  82. #include <arch/mm/cache.h>
  83. #endif /* CONFIG_VIRT_IDX_DCACHE */
  84.  
  85. /**
  86.  * Each architecture decides what functions will be used to carry out
  87.  * address space operations such as creating or locking page tables.
  88.  */
  89. as_operations_t *as_operations = NULL;
  90.  
  91. /**
  92.  * Slab for as_t objects.
  93.  */
  94. static slab_cache_t *as_slab;
  95.  
  96. /**
  97.  * This lock serializes access to the ASID subsystem.
  98.  * It protects:
  99.  * - inactive_as_with_asid_head list
  100.  * - as->asid for each as of the as_t type
  101.  * - asids_allocated counter
  102.  */
  103. SPINLOCK_INITIALIZE(asidlock);
  104.  
  105. /**
  106.  * This list contains address spaces that are not active on any
  107.  * processor and that have valid ASID.
  108.  */
  109. LIST_INITIALIZE(inactive_as_with_asid_head);
  110.  
  111. /** Kernel address space. */
  112. as_t *AS_KERNEL = NULL;
  113.  
  114. static int area_flags_to_page_flags(int);
  115. static as_area_t *find_area_and_lock(as_t *, uintptr_t);
  116. static bool check_area_conflicts(as_t *, uintptr_t, size_t, as_area_t *);
  117. static void sh_info_remove_reference(share_info_t *);
  118.  
  119. static int as_constructor(void *obj, int flags)
  120. {
  121.     as_t *as = (as_t *) obj;
  122.     int rc;
  123.  
  124.     link_initialize(&as->inactive_as_with_asid_link);
  125.     mutex_initialize(&as->lock, MUTEX_PASSIVE);
  126.    
  127.     rc = as_constructor_arch(as, flags);
  128.    
  129.     return rc;
  130. }
  131.  
  132. static int as_destructor(void *obj)
  133. {
  134.     as_t *as = (as_t *) obj;
  135.  
  136.     return as_destructor_arch(as);
  137. }
  138.  
  139. /** Initialize address space subsystem. */
  140. void as_init(void)
  141. {
  142.     as_arch_init();
  143.  
  144.     as_slab = slab_cache_create("as_slab", sizeof(as_t), 0,
  145.         as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
  146.    
  147.     AS_KERNEL = as_create(FLAG_AS_KERNEL);
  148.     if (!AS_KERNEL)
  149.         panic("Cannot create kernel address space\n");
  150.    
  151.     /* Make sure the kernel address space
  152.      * reference count never drops to zero.
  153.      */
  154.     atomic_set(&AS_KERNEL->refcount, 1);
  155. }
  156.  
  157. /** Create address space.
  158.  *
  159.  * @param flags     Flags that influence the way in wich the address space
  160.  *          is created.
  161.  */
  162. as_t *as_create(int flags)
  163. {
  164.     as_t *as;
  165.  
  166.     as = (as_t *) slab_alloc(as_slab, 0);
  167.     (void) as_create_arch(as, 0);
  168.    
  169.     btree_create(&as->as_area_btree);
  170.    
  171.     if (flags & FLAG_AS_KERNEL)
  172.         as->asid = ASID_KERNEL;
  173.     else
  174.         as->asid = ASID_INVALID;
  175.    
  176.     atomic_set(&as->refcount, 0);
  177.     as->cpu_refcount = 0;
  178. #ifdef AS_PAGE_TABLE
  179.     as->genarch.page_table = page_table_create(flags);
  180. #else
  181.     page_table_create(flags);
  182. #endif
  183.    
  184.     return as;
  185. }
  186.  
  187. /** Destroy adress space.
  188.  *
  189.  * When there are no tasks referencing this address space (i.e. its refcount is
  190.  * zero), the address space can be destroyed.
  191.  *
  192.  * We know that we don't hold any spinlock.
  193.  *
  194.  * @param as        Address space to be destroyed.
  195.  */
  196. void as_destroy(as_t *as)
  197. {
  198.     ipl_t ipl;
  199.     bool cond;
  200.     DEADLOCK_PROBE_INIT(p_asidlock);
  201.  
  202.     ASSERT(atomic_get(&as->refcount) == 0);
  203.    
  204.     /*
  205.      * Since there is no reference to this area,
  206.      * it is safe not to lock its mutex.
  207.      */
  208.  
  209.     /*
  210.      * We need to avoid deadlock between TLB shootdown and asidlock.
  211.      * We therefore try to take asid conditionally and if we don't succeed,
  212.      * we enable interrupts and try again. This is done while preemption is
  213.      * disabled to prevent nested context switches. We also depend on the
  214.      * fact that so far no spinlocks are held.
  215.      */
  216.     preemption_disable();
  217.     ipl = interrupts_read();
  218. retry:
  219.     interrupts_disable();
  220.     if (!spinlock_trylock(&asidlock)) {
  221.         interrupts_enable();
  222.         DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
  223.         goto retry;
  224.     }
  225.     preemption_enable();    /* Interrupts disabled, enable preemption */
  226.     if (as->asid != ASID_INVALID && as != AS_KERNEL) {
  227.         if (as != AS && as->cpu_refcount == 0)
  228.             list_remove(&as->inactive_as_with_asid_link);
  229.         asid_put(as->asid);
  230.     }
  231.     spinlock_unlock(&asidlock);
  232.  
  233.     /*
  234.      * Destroy address space areas of the address space.
  235.      * The B+tree must be walked carefully because it is
  236.      * also being destroyed.
  237.      */
  238.     for (cond = true; cond; ) {
  239.         btree_node_t *node;
  240.  
  241.         ASSERT(!list_empty(&as->as_area_btree.leaf_head));
  242.         node = list_get_instance(as->as_area_btree.leaf_head.next,
  243.             btree_node_t, leaf_link);
  244.  
  245.         if ((cond = node->keys)) {
  246.             as_area_destroy(as, node->key[0]);
  247.         }
  248.     }
  249.  
  250.     btree_destroy(&as->as_area_btree);
  251. #ifdef AS_PAGE_TABLE
  252.     page_table_destroy(as->genarch.page_table);
  253. #else
  254.     page_table_destroy(NULL);
  255. #endif
  256.  
  257.     interrupts_restore(ipl);
  258.  
  259.     slab_free(as_slab, as);
  260. }
  261.  
  262. /** Create address space area of common attributes.
  263.  *
  264.  * The created address space area is added to the target address space.
  265.  *
  266.  * @param as        Target address space.
  267.  * @param flags     Flags of the area memory.
  268.  * @param size      Size of area.
  269.  * @param base      Base address of area.
  270.  * @param attrs     Attributes of the area.
  271.  * @param backend   Address space area backend. NULL if no backend is used.
  272.  * @param backend_data  NULL or a pointer to an array holding two void *.
  273.  *
  274.  * @return      Address space area on success or NULL on failure.
  275.  */
  276. as_area_t *
  277. as_area_create(as_t *as, int flags, size_t size, uintptr_t base, int attrs,
  278.     mem_backend_t *backend, mem_backend_data_t *backend_data)
  279. {
  280.     ipl_t ipl;
  281.     as_area_t *a;
  282.    
  283.     if (base % PAGE_SIZE)
  284.         return NULL;
  285.  
  286.     if (!size)
  287.         return NULL;
  288.  
  289.     /* Writeable executable areas are not supported. */
  290.     if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
  291.         return NULL;
  292.    
  293.     ipl = interrupts_disable();
  294.     mutex_lock(&as->lock);
  295.    
  296.     if (!check_area_conflicts(as, base, size, NULL)) {
  297.         mutex_unlock(&as->lock);
  298.         interrupts_restore(ipl);
  299.         return NULL;
  300.     }
  301.    
  302.     a = (as_area_t *) malloc(sizeof(as_area_t), 0);
  303.  
  304.     mutex_initialize(&a->lock, MUTEX_PASSIVE);
  305.    
  306.     a->as = as;
  307.     a->flags = flags;
  308.     a->attributes = attrs;
  309.     a->pages = SIZE2FRAMES(size);
  310.     a->base = base;
  311.     a->sh_info = NULL;
  312.     a->backend = backend;
  313.     if (backend_data)
  314.         a->backend_data = *backend_data;
  315.     else
  316.         memsetb(&a->backend_data, sizeof(a->backend_data), 0);
  317.  
  318.     btree_create(&a->used_space);
  319.    
  320.     btree_insert(&as->as_area_btree, base, (void *) a, NULL);
  321.  
  322.     mutex_unlock(&as->lock);
  323.     interrupts_restore(ipl);
  324.  
  325.     return a;
  326. }
  327.  
  328. /** Find address space area and change it.
  329.  *
  330.  * @param as        Address space.
  331.  * @param address   Virtual address belonging to the area to be changed.
  332.  *          Must be page-aligned.
  333.  * @param size      New size of the virtual memory block starting at
  334.  *          address.
  335.  * @param flags     Flags influencing the remap operation. Currently unused.
  336.  *
  337.  * @return      Zero on success or a value from @ref errno.h otherwise.
  338.  */
  339. int as_area_resize(as_t *as, uintptr_t address, size_t size, int flags)
  340. {
  341.     as_area_t *area;
  342.     ipl_t ipl;
  343.     size_t pages;
  344.    
  345.     ipl = interrupts_disable();
  346.     mutex_lock(&as->lock);
  347.    
  348.     /*
  349.      * Locate the area.
  350.      */
  351.     area = find_area_and_lock(as, address);
  352.     if (!area) {
  353.         mutex_unlock(&as->lock);
  354.         interrupts_restore(ipl);
  355.         return ENOENT;
  356.     }
  357.  
  358.     if (area->backend == &phys_backend) {
  359.         /*
  360.          * Remapping of address space areas associated
  361.          * with memory mapped devices is not supported.
  362.          */
  363.         mutex_unlock(&area->lock);
  364.         mutex_unlock(&as->lock);
  365.         interrupts_restore(ipl);
  366.         return ENOTSUP;
  367.     }
  368.     if (area->sh_info) {
  369.         /*
  370.          * Remapping of shared address space areas
  371.          * is not supported.
  372.          */
  373.         mutex_unlock(&area->lock);
  374.         mutex_unlock(&as->lock);
  375.         interrupts_restore(ipl);
  376.         return ENOTSUP;
  377.     }
  378.  
  379.     pages = SIZE2FRAMES((address - area->base) + size);
  380.     if (!pages) {
  381.         /*
  382.          * Zero size address space areas are not allowed.
  383.          */
  384.         mutex_unlock(&area->lock);
  385.         mutex_unlock(&as->lock);
  386.         interrupts_restore(ipl);
  387.         return EPERM;
  388.     }
  389.    
  390.     if (pages < area->pages) {
  391.         bool cond;
  392.         uintptr_t start_free = area->base + pages * PAGE_SIZE;
  393.  
  394.         /*
  395.          * Shrinking the area.
  396.          * No need to check for overlaps.
  397.          */
  398.  
  399.         /*
  400.          * Start TLB shootdown sequence.
  401.          */
  402.         tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base +
  403.             pages * PAGE_SIZE, area->pages - pages);
  404.  
  405.         /*
  406.          * Remove frames belonging to used space starting from
  407.          * the highest addresses downwards until an overlap with
  408.          * the resized address space area is found. Note that this
  409.          * is also the right way to remove part of the used_space
  410.          * B+tree leaf list.
  411.          */    
  412.         for (cond = true; cond;) {
  413.             btree_node_t *node;
  414.        
  415.             ASSERT(!list_empty(&area->used_space.leaf_head));
  416.             node =
  417.                 list_get_instance(area->used_space.leaf_head.prev,
  418.                 btree_node_t, leaf_link);
  419.             if ((cond = (bool) node->keys)) {
  420.                 uintptr_t b = node->key[node->keys - 1];
  421.                 count_t c =
  422.                     (count_t) node->value[node->keys - 1];
  423.                 unsigned int i = 0;
  424.            
  425.                 if (overlaps(b, c * PAGE_SIZE, area->base,
  426.                     pages * PAGE_SIZE)) {
  427.                    
  428.                     if (b + c * PAGE_SIZE <= start_free) {
  429.                         /*
  430.                          * The whole interval fits
  431.                          * completely in the resized
  432.                          * address space area.
  433.                          */
  434.                         break;
  435.                     }
  436.        
  437.                     /*
  438.                      * Part of the interval corresponding
  439.                      * to b and c overlaps with the resized
  440.                      * address space area.
  441.                      */
  442.        
  443.                     cond = false;   /* we are almost done */
  444.                     i = (start_free - b) >> PAGE_WIDTH;
  445.                     if (!used_space_remove(area, start_free,
  446.                         c - i))
  447.                         panic("Could not remove used "
  448.                             "space.\n");
  449.                 } else {
  450.                     /*
  451.                      * The interval of used space can be
  452.                      * completely removed.
  453.                      */
  454.                     if (!used_space_remove(area, b, c))
  455.                         panic("Could not remove used "
  456.                             "space.\n");
  457.                 }
  458.            
  459.                 for (; i < c; i++) {
  460.                     pte_t *pte;
  461.            
  462.                     page_table_lock(as, false);
  463.                     pte = page_mapping_find(as, b +
  464.                         i * PAGE_SIZE);
  465.                     ASSERT(pte && PTE_VALID(pte) &&
  466.                         PTE_PRESENT(pte));
  467.                     if (area->backend &&
  468.                         area->backend->frame_free) {
  469.                         area->backend->frame_free(area,
  470.                             b + i * PAGE_SIZE,
  471.                             PTE_GET_FRAME(pte));
  472.                     }
  473.                     page_mapping_remove(as, b +
  474.                         i * PAGE_SIZE);
  475.                     page_table_unlock(as, false);
  476.                 }
  477.             }
  478.         }
  479.  
  480.         /*
  481.          * Finish TLB shootdown sequence.
  482.          */
  483.  
  484.         tlb_invalidate_pages(as->asid, area->base + pages * PAGE_SIZE,
  485.             area->pages - pages);
  486.         /*
  487.          * Invalidate software translation caches (e.g. TSB on sparc64).
  488.          */
  489.         as_invalidate_translation_cache(as, area->base +
  490.             pages * PAGE_SIZE, area->pages - pages);
  491.         tlb_shootdown_finalize();
  492.        
  493.     } else {
  494.         /*
  495.          * Growing the area.
  496.          * Check for overlaps with other address space areas.
  497.          */
  498.         if (!check_area_conflicts(as, address, pages * PAGE_SIZE,
  499.             area)) {
  500.             mutex_unlock(&area->lock);
  501.             mutex_unlock(&as->lock);       
  502.             interrupts_restore(ipl);
  503.             return EADDRNOTAVAIL;
  504.         }
  505.     }
  506.  
  507.     area->pages = pages;
  508.    
  509.     mutex_unlock(&area->lock);
  510.     mutex_unlock(&as->lock);
  511.     interrupts_restore(ipl);
  512.  
  513.     return 0;
  514. }
  515.  
  516. /** Destroy address space area.
  517.  *
  518.  * @param as        Address space.
  519.  * @param address   Address within the area to be deleted.
  520.  *
  521.  * @return      Zero on success or a value from @ref errno.h on failure.
  522.  */
  523. int as_area_destroy(as_t *as, uintptr_t address)
  524. {
  525.     as_area_t *area;
  526.     uintptr_t base;
  527.     link_t *cur;
  528.     ipl_t ipl;
  529.  
  530.     ipl = interrupts_disable();
  531.     mutex_lock(&as->lock);
  532.  
  533.     area = find_area_and_lock(as, address);
  534.     if (!area) {
  535.         mutex_unlock(&as->lock);
  536.         interrupts_restore(ipl);
  537.         return ENOENT;
  538.     }
  539.  
  540.     base = area->base;
  541.  
  542.     /*
  543.      * Start TLB shootdown sequence.
  544.      */
  545.     tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base, area->pages);
  546.  
  547.     /*
  548.      * Visit only the pages mapped by used_space B+tree.
  549.      */
  550.     for (cur = area->used_space.leaf_head.next;
  551.         cur != &area->used_space.leaf_head; cur = cur->next) {
  552.         btree_node_t *node;
  553.         unsigned int i;
  554.        
  555.         node = list_get_instance(cur, btree_node_t, leaf_link);
  556.         for (i = 0; i < node->keys; i++) {
  557.             uintptr_t b = node->key[i];
  558.             count_t j;
  559.             pte_t *pte;
  560.            
  561.             for (j = 0; j < (count_t) node->value[i]; j++) {
  562.                 page_table_lock(as, false);
  563.                 pte = page_mapping_find(as, b + j * PAGE_SIZE);
  564.                 ASSERT(pte && PTE_VALID(pte) &&
  565.                     PTE_PRESENT(pte));
  566.                 if (area->backend &&
  567.                     area->backend->frame_free) {
  568.                     area->backend->frame_free(area, b +
  569.                         j * PAGE_SIZE, PTE_GET_FRAME(pte));
  570.                 }
  571.                 page_mapping_remove(as, b + j * PAGE_SIZE);            
  572.                 page_table_unlock(as, false);
  573.             }
  574.         }
  575.     }
  576.  
  577.     /*
  578.      * Finish TLB shootdown sequence.
  579.      */
  580.  
  581.     tlb_invalidate_pages(as->asid, area->base, area->pages);
  582.     /*
  583.      * Invalidate potential software translation caches (e.g. TSB on
  584.      * sparc64).
  585.      */
  586.     as_invalidate_translation_cache(as, area->base, area->pages);
  587.     tlb_shootdown_finalize();
  588.    
  589.     btree_destroy(&area->used_space);
  590.  
  591.     area->attributes |= AS_AREA_ATTR_PARTIAL;
  592.    
  593.     if (area->sh_info)
  594.         sh_info_remove_reference(area->sh_info);
  595.        
  596.     mutex_unlock(&area->lock);
  597.  
  598.     /*
  599.      * Remove the empty area from address space.
  600.      */
  601.     btree_remove(&as->as_area_btree, base, NULL);
  602.    
  603.     free(area);
  604.    
  605.     mutex_unlock(&as->lock);
  606.     interrupts_restore(ipl);
  607.     return 0;
  608. }
  609.  
  610. /** Share address space area with another or the same address space.
  611.  *
  612.  * Address space area mapping is shared with a new address space area.
  613.  * If the source address space area has not been shared so far,
  614.  * a new sh_info is created. The new address space area simply gets the
  615.  * sh_info of the source area. The process of duplicating the
  616.  * mapping is done through the backend share function.
  617.  *
  618.  * @param src_as    Pointer to source address space.
  619.  * @param src_base  Base address of the source address space area.
  620.  * @param acc_size  Expected size of the source area.
  621.  * @param dst_as    Pointer to destination address space.
  622.  * @param dst_base  Target base address.
  623.  * @param dst_flags_mask Destination address space area flags mask.
  624.  *
  625.  * @return      Zero on success or ENOENT if there is no such task or if
  626.  *          there is no such address space area, EPERM if there was
  627.  *          a problem in accepting the area or ENOMEM if there was a
  628.  *          problem in allocating destination address space area.
  629.  *          ENOTSUP is returned if the address space area backend
  630.  *          does not support sharing.
  631.  */
  632. int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
  633.     as_t *dst_as, uintptr_t dst_base, int dst_flags_mask)
  634. {
  635.     ipl_t ipl;
  636.     int src_flags;
  637.     size_t src_size;
  638.     as_area_t *src_area, *dst_area;
  639.     share_info_t *sh_info;
  640.     mem_backend_t *src_backend;
  641.     mem_backend_data_t src_backend_data;
  642.    
  643.     ipl = interrupts_disable();
  644.     mutex_lock(&src_as->lock);
  645.     src_area = find_area_and_lock(src_as, src_base);
  646.     if (!src_area) {
  647.         /*
  648.          * Could not find the source address space area.
  649.          */
  650.         mutex_unlock(&src_as->lock);
  651.         interrupts_restore(ipl);
  652.         return ENOENT;
  653.     }
  654.  
  655.     if (!src_area->backend || !src_area->backend->share) {
  656.         /*
  657.          * There is no backend or the backend does not
  658.          * know how to share the area.
  659.          */
  660.         mutex_unlock(&src_area->lock);
  661.         mutex_unlock(&src_as->lock);
  662.         interrupts_restore(ipl);
  663.         return ENOTSUP;
  664.     }
  665.    
  666.     src_size = src_area->pages * PAGE_SIZE;
  667.     src_flags = src_area->flags;
  668.     src_backend = src_area->backend;
  669.     src_backend_data = src_area->backend_data;
  670.  
  671.     /* Share the cacheable flag from the original mapping */
  672.     if (src_flags & AS_AREA_CACHEABLE)
  673.         dst_flags_mask |= AS_AREA_CACHEABLE;
  674.  
  675.     if (src_size != acc_size ||
  676.         (src_flags & dst_flags_mask) != dst_flags_mask) {
  677.         mutex_unlock(&src_area->lock);
  678.         mutex_unlock(&src_as->lock);
  679.         interrupts_restore(ipl);
  680.         return EPERM;
  681.     }
  682.  
  683.     /*
  684.      * Now we are committed to sharing the area.
  685.      * First, prepare the area for sharing.
  686.      * Then it will be safe to unlock it.
  687.      */
  688.     sh_info = src_area->sh_info;
  689.     if (!sh_info) {
  690.         sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0);
  691.         mutex_initialize(&sh_info->lock, MUTEX_PASSIVE);
  692.         sh_info->refcount = 2;
  693.         btree_create(&sh_info->pagemap);
  694.         src_area->sh_info = sh_info;
  695.         /*
  696.          * Call the backend to setup sharing.
  697.          */
  698.         src_area->backend->share(src_area);
  699.     } else {
  700.         mutex_lock(&sh_info->lock);
  701.         sh_info->refcount++;
  702.         mutex_unlock(&sh_info->lock);
  703.     }
  704.  
  705.     mutex_unlock(&src_area->lock);
  706.     mutex_unlock(&src_as->lock);
  707.  
  708.     /*
  709.      * Create copy of the source address space area.
  710.      * The destination area is created with AS_AREA_ATTR_PARTIAL
  711.      * attribute set which prevents race condition with
  712.      * preliminary as_page_fault() calls.
  713.      * The flags of the source area are masked against dst_flags_mask
  714.      * to support sharing in less privileged mode.
  715.      */
  716.     dst_area = as_area_create(dst_as, dst_flags_mask, src_size, dst_base,
  717.         AS_AREA_ATTR_PARTIAL, src_backend, &src_backend_data);
  718.     if (!dst_area) {
  719.         /*
  720.          * Destination address space area could not be created.
  721.          */
  722.         sh_info_remove_reference(sh_info);
  723.        
  724.         interrupts_restore(ipl);
  725.         return ENOMEM;
  726.     }
  727.  
  728.     /*
  729.      * Now the destination address space area has been
  730.      * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
  731.      * attribute and set the sh_info.
  732.      */
  733.     mutex_lock(&dst_as->lock); 
  734.     mutex_lock(&dst_area->lock);
  735.     dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
  736.     dst_area->sh_info = sh_info;
  737.     mutex_unlock(&dst_area->lock);
  738.     mutex_unlock(&dst_as->lock);   
  739.  
  740.     interrupts_restore(ipl);
  741.    
  742.     return 0;
  743. }
  744.  
  745. /** Check access mode for address space area.
  746.  *
  747.  * The address space area must be locked prior to this call.
  748.  *
  749.  * @param area      Address space area.
  750.  * @param access    Access mode.
  751.  *
  752.  * @return      False if access violates area's permissions, true
  753.  *          otherwise.
  754.  */
  755. bool as_area_check_access(as_area_t *area, pf_access_t access)
  756. {
  757.     int flagmap[] = {
  758.         [PF_ACCESS_READ] = AS_AREA_READ,
  759.         [PF_ACCESS_WRITE] = AS_AREA_WRITE,
  760.         [PF_ACCESS_EXEC] = AS_AREA_EXEC
  761.     };
  762.  
  763.     if (!(area->flags & flagmap[access]))
  764.         return false;
  765.    
  766.     return true;
  767. }
  768.  
  769. /** Change adress space area flags.
  770.  *
  771.  * The idea is to have the same data, but with a different access mode.
  772.  * This is needed e.g. for writing code into memory and then executing it.
  773.  * In order for this to work properly, this may copy the data
  774.  * into private anonymous memory (unless it's already there).
  775.  *
  776.  * @param as      Address space.
  777.  * @param flags   Flags of the area memory.
  778.  * @param address Address within the area to be changed.
  779.  *
  780.  * @return Zero on success or a value from @ref errno.h on failure.
  781.  *
  782.  */
  783. int as_area_change_flags(as_t *as, int flags, uintptr_t address)
  784. {
  785.     as_area_t *area;
  786.     uintptr_t base;
  787.     link_t *cur;
  788.     ipl_t ipl;
  789.     int page_flags;
  790.     uintptr_t *old_frame;
  791.     index_t frame_idx;
  792.     count_t used_pages;
  793.    
  794.     /* Flags for the new memory mapping */
  795.     page_flags = area_flags_to_page_flags(flags);
  796.  
  797.     ipl = interrupts_disable();
  798.     mutex_lock(&as->lock);
  799.  
  800.     area = find_area_and_lock(as, address);
  801.     if (!area) {
  802.         mutex_unlock(&as->lock);
  803.         interrupts_restore(ipl);
  804.         return ENOENT;
  805.     }
  806.  
  807.     if ((area->sh_info) || (area->backend != &anon_backend)) {
  808.         /* Copying shared areas not supported yet */
  809.         /* Copying non-anonymous memory not supported yet */
  810.         mutex_unlock(&area->lock);
  811.         mutex_unlock(&as->lock);
  812.         interrupts_restore(ipl);
  813.         return ENOTSUP;
  814.     }
  815.  
  816.     base = area->base;
  817.  
  818.     /*
  819.      * Compute total number of used pages in the used_space B+tree
  820.      */
  821.     used_pages = 0;
  822.  
  823.     for (cur = area->used_space.leaf_head.next;
  824.         cur != &area->used_space.leaf_head; cur = cur->next) {
  825.         btree_node_t *node;
  826.         unsigned int i;
  827.        
  828.         node = list_get_instance(cur, btree_node_t, leaf_link);
  829.         for (i = 0; i < node->keys; i++) {
  830.             used_pages += (count_t) node->value[i];
  831.         }
  832.     }
  833.  
  834.     /* An array for storing frame numbers */
  835.     old_frame = malloc(used_pages * sizeof(uintptr_t), 0);
  836.  
  837.     /*
  838.      * Start TLB shootdown sequence.
  839.      */
  840.     tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base, area->pages);
  841.  
  842.     /*
  843.      * Remove used pages from page tables and remember their frame
  844.      * numbers.
  845.      */
  846.     frame_idx = 0;
  847.  
  848.     for (cur = area->used_space.leaf_head.next;
  849.         cur != &area->used_space.leaf_head; cur = cur->next) {
  850.         btree_node_t *node;
  851.         unsigned int i;
  852.        
  853.         node = list_get_instance(cur, btree_node_t, leaf_link);
  854.         for (i = 0; i < node->keys; i++) {
  855.             uintptr_t b = node->key[i];
  856.             count_t j;
  857.             pte_t *pte;
  858.            
  859.             for (j = 0; j < (count_t) node->value[i]; j++) {
  860.                 page_table_lock(as, false);
  861.                 pte = page_mapping_find(as, b + j * PAGE_SIZE);
  862.                 ASSERT(pte && PTE_VALID(pte) &&
  863.                     PTE_PRESENT(pte));
  864.                 old_frame[frame_idx++] = PTE_GET_FRAME(pte);
  865.  
  866.                 /* Remove old mapping */
  867.                 page_mapping_remove(as, b + j * PAGE_SIZE);
  868.                 page_table_unlock(as, false);
  869.             }
  870.         }
  871.     }
  872.  
  873.     /*
  874.      * Finish TLB shootdown sequence.
  875.      */
  876.  
  877.     tlb_invalidate_pages(as->asid, area->base, area->pages);
  878.    
  879.     /*
  880.      * Invalidate potential software translation caches (e.g. TSB on
  881.      * sparc64).
  882.      */
  883.     as_invalidate_translation_cache(as, area->base, area->pages);
  884.     tlb_shootdown_finalize();
  885.  
  886.     /*
  887.      * Set the new flags.
  888.      */
  889.     area->flags = flags;
  890.  
  891.     /*
  892.      * Map pages back in with new flags. This step is kept separate
  893.      * so that the memory area could not be accesed with both the old and
  894.      * the new flags at once.
  895.      */
  896.     frame_idx = 0;
  897.  
  898.     for (cur = area->used_space.leaf_head.next;
  899.         cur != &area->used_space.leaf_head; cur = cur->next) {
  900.         btree_node_t *node;
  901.         unsigned int i;
  902.        
  903.         node = list_get_instance(cur, btree_node_t, leaf_link);
  904.         for (i = 0; i < node->keys; i++) {
  905.             uintptr_t b = node->key[i];
  906.             count_t j;
  907.            
  908.             for (j = 0; j < (count_t) node->value[i]; j++) {
  909.                 page_table_lock(as, false);
  910.  
  911.                 /* Insert the new mapping */
  912.                 page_mapping_insert(as, b + j * PAGE_SIZE,
  913.                     old_frame[frame_idx++], page_flags);
  914.  
  915.                 page_table_unlock(as, false);
  916.             }
  917.         }
  918.     }
  919.  
  920.     free(old_frame);
  921.  
  922.     mutex_unlock(&area->lock);
  923.     mutex_unlock(&as->lock);
  924.     interrupts_restore(ipl);
  925.  
  926.     return 0;
  927. }
  928.  
  929.  
  930. /** Handle page fault within the current address space.
  931.  *
  932.  * This is the high-level page fault handler. It decides whether the page fault
  933.  * can be resolved by any backend and if so, it invokes the backend to resolve
  934.  * the page fault.
  935.  *
  936.  * Interrupts are assumed disabled.
  937.  *
  938.  * @param page      Faulting page.
  939.  * @param access    Access mode that caused the page fault (i.e.
  940.  *          read/write/exec).
  941.  * @param istate    Pointer to the interrupted state.
  942.  *
  943.  * @return      AS_PF_FAULT on page fault, AS_PF_OK on success or
  944.  *          AS_PF_DEFER if the fault was caused by copy_to_uspace()
  945.  *          or copy_from_uspace().
  946.  */
  947. int as_page_fault(uintptr_t page, pf_access_t access, istate_t *istate)
  948. {
  949.     pte_t *pte;
  950.     as_area_t *area;
  951.    
  952.     if (!THREAD)
  953.         return AS_PF_FAULT;
  954.        
  955.     ASSERT(AS);
  956.  
  957.     mutex_lock(&AS->lock);
  958.     area = find_area_and_lock(AS, page);   
  959.     if (!area) {
  960.         /*
  961.          * No area contained mapping for 'page'.
  962.          * Signal page fault to low-level handler.
  963.          */
  964.         mutex_unlock(&AS->lock);
  965.         goto page_fault;
  966.     }
  967.  
  968.     if (area->attributes & AS_AREA_ATTR_PARTIAL) {
  969.         /*
  970.          * The address space area is not fully initialized.
  971.          * Avoid possible race by returning error.
  972.          */
  973.         mutex_unlock(&area->lock);
  974.         mutex_unlock(&AS->lock);
  975.         goto page_fault;       
  976.     }
  977.  
  978.     if (!area->backend || !area->backend->page_fault) {
  979.         /*
  980.          * The address space area is not backed by any backend
  981.          * or the backend cannot handle page faults.
  982.          */
  983.         mutex_unlock(&area->lock);
  984.         mutex_unlock(&AS->lock);
  985.         goto page_fault;       
  986.     }
  987.  
  988.     page_table_lock(AS, false);
  989.    
  990.     /*
  991.      * To avoid race condition between two page faults on the same address,
  992.      * we need to make sure the mapping has not been already inserted.
  993.      */
  994.     if ((pte = page_mapping_find(AS, page))) {
  995.         if (PTE_PRESENT(pte)) {
  996.             if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) ||
  997.                 (access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) ||
  998.                 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) {
  999.                 page_table_unlock(AS, false);
  1000.                 mutex_unlock(&area->lock);
  1001.                 mutex_unlock(&AS->lock);
  1002.                 return AS_PF_OK;
  1003.             }
  1004.         }
  1005.     }
  1006.    
  1007.     /*
  1008.      * Resort to the backend page fault handler.
  1009.      */
  1010.     if (area->backend->page_fault(area, page, access) != AS_PF_OK) {
  1011.         page_table_unlock(AS, false);
  1012.         mutex_unlock(&area->lock);
  1013.         mutex_unlock(&AS->lock);
  1014.         goto page_fault;
  1015.     }
  1016.    
  1017.     page_table_unlock(AS, false);
  1018.     mutex_unlock(&area->lock);
  1019.     mutex_unlock(&AS->lock);
  1020.     return AS_PF_OK;
  1021.  
  1022. page_fault:
  1023.     if (THREAD->in_copy_from_uspace) {
  1024.         THREAD->in_copy_from_uspace = false;
  1025.         istate_set_retaddr(istate,
  1026.             (uintptr_t) &memcpy_from_uspace_failover_address);
  1027.     } else if (THREAD->in_copy_to_uspace) {
  1028.         THREAD->in_copy_to_uspace = false;
  1029.         istate_set_retaddr(istate,
  1030.             (uintptr_t) &memcpy_to_uspace_failover_address);
  1031.     } else {
  1032.         return AS_PF_FAULT;
  1033.     }
  1034.  
  1035.     return AS_PF_DEFER;
  1036. }
  1037.  
  1038. /** Switch address spaces.
  1039.  *
  1040.  * Note that this function cannot sleep as it is essentially a part of
  1041.  * scheduling. Sleeping here would lead to deadlock on wakeup. Another
  1042.  * thing which is forbidden in this context is locking the address space.
  1043.  *
  1044.  * When this function is enetered, no spinlocks may be held.
  1045.  *
  1046.  * @param old       Old address space or NULL.
  1047.  * @param new       New address space.
  1048.  */
  1049. void as_switch(as_t *old_as, as_t *new_as)
  1050. {
  1051.     DEADLOCK_PROBE_INIT(p_asidlock);
  1052.     preemption_disable();
  1053. retry:
  1054.     (void) interrupts_disable();
  1055.     if (!spinlock_trylock(&asidlock)) {
  1056.         /*
  1057.          * Avoid deadlock with TLB shootdown.
  1058.          * We can enable interrupts here because
  1059.          * preemption is disabled. We should not be
  1060.          * holding any other lock.
  1061.          */
  1062.         (void) interrupts_enable();
  1063.         DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
  1064.         goto retry;
  1065.     }
  1066.     preemption_enable();
  1067.  
  1068.     /*
  1069.      * First, take care of the old address space.
  1070.      */
  1071.     if (old_as) {
  1072.         ASSERT(old_as->cpu_refcount);
  1073.         if((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
  1074.             /*
  1075.              * The old address space is no longer active on
  1076.              * any processor. It can be appended to the
  1077.              * list of inactive address spaces with assigned
  1078.              * ASID.
  1079.              */
  1080.             ASSERT(old_as->asid != ASID_INVALID);
  1081.             list_append(&old_as->inactive_as_with_asid_link,
  1082.                 &inactive_as_with_asid_head);
  1083.         }
  1084.  
  1085.         /*
  1086.          * Perform architecture-specific tasks when the address space
  1087.          * is being removed from the CPU.
  1088.          */
  1089.         as_deinstall_arch(old_as);
  1090.     }
  1091.  
  1092.     /*
  1093.      * Second, prepare the new address space.
  1094.      */
  1095.     if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
  1096.         if (new_as->asid != ASID_INVALID)
  1097.             list_remove(&new_as->inactive_as_with_asid_link);
  1098.         else
  1099.             new_as->asid = asid_get();
  1100.     }
  1101. #ifdef AS_PAGE_TABLE
  1102.     SET_PTL0_ADDRESS(new_as->genarch.page_table);
  1103. #endif
  1104.    
  1105.     /*
  1106.      * Perform architecture-specific steps.
  1107.      * (e.g. write ASID to hardware register etc.)
  1108.      */
  1109.     as_install_arch(new_as);
  1110.  
  1111.     spinlock_unlock(&asidlock);
  1112.    
  1113.     AS = new_as;
  1114. }
  1115.  
  1116. /** Convert address space area flags to page flags.
  1117.  *
  1118.  * @param aflags    Flags of some address space area.
  1119.  *
  1120.  * @return      Flags to be passed to page_mapping_insert().
  1121.  */
  1122. int area_flags_to_page_flags(int aflags)
  1123. {
  1124.     int flags;
  1125.  
  1126.     flags = PAGE_USER | PAGE_PRESENT;
  1127.    
  1128.     if (aflags & AS_AREA_READ)
  1129.         flags |= PAGE_READ;
  1130.        
  1131.     if (aflags & AS_AREA_WRITE)
  1132.         flags |= PAGE_WRITE;
  1133.    
  1134.     if (aflags & AS_AREA_EXEC)
  1135.         flags |= PAGE_EXEC;
  1136.    
  1137.     if (aflags & AS_AREA_CACHEABLE)
  1138.         flags |= PAGE_CACHEABLE;
  1139.        
  1140.     return flags;
  1141. }
  1142.  
  1143. /** Compute flags for virtual address translation subsytem.
  1144.  *
  1145.  * The address space area must be locked.
  1146.  * Interrupts must be disabled.
  1147.  *
  1148.  * @param a     Address space area.
  1149.  *
  1150.  * @return      Flags to be used in page_mapping_insert().
  1151.  */
  1152. int as_area_get_flags(as_area_t *a)
  1153. {
  1154.     return area_flags_to_page_flags(a->flags);
  1155. }
  1156.  
  1157. /** Create page table.
  1158.  *
  1159.  * Depending on architecture, create either address space private or global page
  1160.  * table.
  1161.  *
  1162.  * @param flags     Flags saying whether the page table is for the kernel
  1163.  *          address space.
  1164.  *
  1165.  * @return      First entry of the page table.
  1166.  */
  1167. pte_t *page_table_create(int flags)
  1168. {
  1169.     ASSERT(as_operations);
  1170.     ASSERT(as_operations->page_table_create);
  1171.    
  1172.     return as_operations->page_table_create(flags);
  1173. }
  1174.  
  1175. /** Destroy page table.
  1176.  *
  1177.  * Destroy page table in architecture specific way.
  1178.  *
  1179.  * @param page_table    Physical address of PTL0.
  1180.  */
  1181. void page_table_destroy(pte_t *page_table)
  1182. {
  1183.     ASSERT(as_operations);
  1184.     ASSERT(as_operations->page_table_destroy);
  1185.    
  1186.     as_operations->page_table_destroy(page_table);
  1187. }
  1188.  
  1189. /** Lock page table.
  1190.  *
  1191.  * This function should be called before any page_mapping_insert(),
  1192.  * page_mapping_remove() and page_mapping_find().
  1193.  *
  1194.  * Locking order is such that address space areas must be locked
  1195.  * prior to this call. Address space can be locked prior to this
  1196.  * call in which case the lock argument is false.
  1197.  *
  1198.  * @param as        Address space.
  1199.  * @param lock      If false, do not attempt to lock as->lock.
  1200.  */
  1201. void page_table_lock(as_t *as, bool lock)
  1202. {
  1203.     ASSERT(as_operations);
  1204.     ASSERT(as_operations->page_table_lock);
  1205.    
  1206.     as_operations->page_table_lock(as, lock);
  1207. }
  1208.  
  1209. /** Unlock page table.
  1210.  *
  1211.  * @param as        Address space.
  1212.  * @param unlock    If false, do not attempt to unlock as->lock.
  1213.  */
  1214. void page_table_unlock(as_t *as, bool unlock)
  1215. {
  1216.     ASSERT(as_operations);
  1217.     ASSERT(as_operations->page_table_unlock);
  1218.    
  1219.     as_operations->page_table_unlock(as, unlock);
  1220. }
  1221.  
  1222.  
  1223. /** Find address space area and lock it.
  1224.  *
  1225.  * The address space must be locked and interrupts must be disabled.
  1226.  *
  1227.  * @param as        Address space.
  1228.  * @param va        Virtual address.
  1229.  *
  1230.  * @return      Locked address space area containing va on success or
  1231.  *          NULL on failure.
  1232.  */
  1233. as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
  1234. {
  1235.     as_area_t *a;
  1236.     btree_node_t *leaf, *lnode;
  1237.     unsigned int i;
  1238.    
  1239.     a = (as_area_t *) btree_search(&as->as_area_btree, va, &leaf);
  1240.     if (a) {
  1241.         /* va is the base address of an address space area */
  1242.         mutex_lock(&a->lock);
  1243.         return a;
  1244.     }
  1245.    
  1246.     /*
  1247.      * Search the leaf node and the righmost record of its left neighbour
  1248.      * to find out whether this is a miss or va belongs to an address
  1249.      * space area found there.
  1250.      */
  1251.    
  1252.     /* First, search the leaf node itself. */
  1253.     for (i = 0; i < leaf->keys; i++) {
  1254.         a = (as_area_t *) leaf->value[i];
  1255.         mutex_lock(&a->lock);
  1256.         if ((a->base <= va) && (va < a->base + a->pages * PAGE_SIZE)) {
  1257.             return a;
  1258.         }
  1259.         mutex_unlock(&a->lock);
  1260.     }
  1261.  
  1262.     /*
  1263.      * Second, locate the left neighbour and test its last record.
  1264.      * Because of its position in the B+tree, it must have base < va.
  1265.      */
  1266.     lnode = btree_leaf_node_left_neighbour(&as->as_area_btree, leaf);
  1267.     if (lnode) {
  1268.         a = (as_area_t *) lnode->value[lnode->keys - 1];
  1269.         mutex_lock(&a->lock);
  1270.         if (va < a->base + a->pages * PAGE_SIZE) {
  1271.             return a;
  1272.         }
  1273.         mutex_unlock(&a->lock);
  1274.     }
  1275.  
  1276.     return NULL;
  1277. }
  1278.  
  1279. /** Check area conflicts with other areas.
  1280.  *
  1281.  * The address space must be locked and interrupts must be disabled.
  1282.  *
  1283.  * @param as        Address space.
  1284.  * @param va        Starting virtual address of the area being tested.
  1285.  * @param size      Size of the area being tested.
  1286.  * @param avoid_area    Do not touch this area.
  1287.  *
  1288.  * @return      True if there is no conflict, false otherwise.
  1289.  */
  1290. bool
  1291. check_area_conflicts(as_t *as, uintptr_t va, size_t size, as_area_t *avoid_area)
  1292. {
  1293.     as_area_t *a;
  1294.     btree_node_t *leaf, *node;
  1295.     unsigned int i;
  1296.    
  1297.     /*
  1298.      * We don't want any area to have conflicts with NULL page.
  1299.      */
  1300.     if (overlaps(va, size, NULL, PAGE_SIZE))
  1301.         return false;
  1302.    
  1303.     /*
  1304.      * The leaf node is found in O(log n), where n is proportional to
  1305.      * the number of address space areas belonging to as.
  1306.      * The check for conflicts is then attempted on the rightmost
  1307.      * record in the left neighbour, the leftmost record in the right
  1308.      * neighbour and all records in the leaf node itself.
  1309.      */
  1310.    
  1311.     if ((a = (as_area_t *) btree_search(&as->as_area_btree, va, &leaf))) {
  1312.         if (a != avoid_area)
  1313.             return false;
  1314.     }
  1315.    
  1316.     /* First, check the two border cases. */
  1317.     if ((node = btree_leaf_node_left_neighbour(&as->as_area_btree, leaf))) {
  1318.         a = (as_area_t *) node->value[node->keys - 1];
  1319.         mutex_lock(&a->lock);
  1320.         if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) {
  1321.             mutex_unlock(&a->lock);
  1322.             return false;
  1323.         }
  1324.         mutex_unlock(&a->lock);
  1325.     }
  1326.     node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf);
  1327.     if (node) {
  1328.         a = (as_area_t *) node->value[0];
  1329.         mutex_lock(&a->lock);
  1330.         if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) {
  1331.             mutex_unlock(&a->lock);
  1332.             return false;
  1333.         }
  1334.         mutex_unlock(&a->lock);
  1335.     }
  1336.    
  1337.     /* Second, check the leaf node. */
  1338.     for (i = 0; i < leaf->keys; i++) {
  1339.         a = (as_area_t *) leaf->value[i];
  1340.    
  1341.         if (a == avoid_area)
  1342.             continue;
  1343.    
  1344.         mutex_lock(&a->lock);
  1345.         if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) {
  1346.             mutex_unlock(&a->lock);
  1347.             return false;
  1348.         }
  1349.         mutex_unlock(&a->lock);
  1350.     }
  1351.  
  1352.     /*
  1353.      * So far, the area does not conflict with other areas.
  1354.      * Check if it doesn't conflict with kernel address space.
  1355.      */  
  1356.     if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
  1357.         return !overlaps(va, size,
  1358.             KERNEL_ADDRESS_SPACE_START,
  1359.             KERNEL_ADDRESS_SPACE_END - KERNEL_ADDRESS_SPACE_START);
  1360.     }
  1361.  
  1362.     return true;
  1363. }
  1364.  
  1365. /** Return size of the address space area with given base.
  1366.  *
  1367.  * @param base      Arbitrary address insede the address space area.
  1368.  *
  1369.  * @return      Size of the address space area in bytes or zero if it
  1370.  *          does not exist.
  1371.  */
  1372. size_t as_area_get_size(uintptr_t base)
  1373. {
  1374.     ipl_t ipl;
  1375.     as_area_t *src_area;
  1376.     size_t size;
  1377.  
  1378.     ipl = interrupts_disable();
  1379.     src_area = find_area_and_lock(AS, base);
  1380.     if (src_area) {
  1381.         size = src_area->pages * PAGE_SIZE;
  1382.         mutex_unlock(&src_area->lock);
  1383.     } else {
  1384.         size = 0;
  1385.     }
  1386.     interrupts_restore(ipl);
  1387.     return size;
  1388. }
  1389.  
  1390. /** Mark portion of address space area as used.
  1391.  *
  1392.  * The address space area must be already locked.
  1393.  *
  1394.  * @param a     Address space area.
  1395.  * @param page      First page to be marked.
  1396.  * @param count     Number of page to be marked.
  1397.  *
  1398.  * @return      Zero on failure and non-zero on success.
  1399.  */
  1400. int used_space_insert(as_area_t *a, uintptr_t page, count_t count)
  1401. {
  1402.     btree_node_t *leaf, *node;
  1403.     count_t pages;
  1404.     unsigned int i;
  1405.  
  1406.     ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
  1407.     ASSERT(count);
  1408.  
  1409.     pages = (count_t) btree_search(&a->used_space, page, &leaf);
  1410.     if (pages) {
  1411.         /*
  1412.          * We hit the beginning of some used space.
  1413.          */
  1414.         return 0;
  1415.     }
  1416.  
  1417.     if (!leaf->keys) {
  1418.         btree_insert(&a->used_space, page, (void *) count, leaf);
  1419.         return 1;
  1420.     }
  1421.  
  1422.     node = btree_leaf_node_left_neighbour(&a->used_space, leaf);
  1423.     if (node) {
  1424.         uintptr_t left_pg = node->key[node->keys - 1];
  1425.         uintptr_t right_pg = leaf->key[0];
  1426.         count_t left_cnt = (count_t) node->value[node->keys - 1];
  1427.         count_t right_cnt = (count_t) leaf->value[0];
  1428.        
  1429.         /*
  1430.          * Examine the possibility that the interval fits
  1431.          * somewhere between the rightmost interval of
  1432.          * the left neigbour and the first interval of the leaf.
  1433.          */
  1434.          
  1435.         if (page >= right_pg) {
  1436.             /* Do nothing. */
  1437.         } else if (overlaps(page, count * PAGE_SIZE, left_pg,
  1438.             left_cnt * PAGE_SIZE)) {
  1439.             /* The interval intersects with the left interval. */
  1440.             return 0;
  1441.         } else if (overlaps(page, count * PAGE_SIZE, right_pg,
  1442.             right_cnt * PAGE_SIZE)) {
  1443.             /* The interval intersects with the right interval. */
  1444.             return 0;          
  1445.         } else if ((page == left_pg + left_cnt * PAGE_SIZE) &&
  1446.             (page + count * PAGE_SIZE == right_pg)) {
  1447.             /*
  1448.              * The interval can be added by merging the two already
  1449.              * present intervals.
  1450.              */
  1451.             node->value[node->keys - 1] += count + right_cnt;
  1452.             btree_remove(&a->used_space, right_pg, leaf);
  1453.             return 1;
  1454.         } else if (page == left_pg + left_cnt * PAGE_SIZE) {
  1455.             /*
  1456.              * The interval can be added by simply growing the left
  1457.              * interval.
  1458.              */
  1459.             node->value[node->keys - 1] += count;
  1460.             return 1;
  1461.         } else if (page + count * PAGE_SIZE == right_pg) {
  1462.             /*
  1463.              * The interval can be addded by simply moving base of
  1464.              * the right interval down and increasing its size
  1465.              * accordingly.
  1466.              */
  1467.             leaf->value[0] += count;
  1468.             leaf->key[0] = page;
  1469.             return 1;
  1470.         } else {
  1471.             /*
  1472.              * The interval is between both neigbouring intervals,
  1473.              * but cannot be merged with any of them.
  1474.              */
  1475.             btree_insert(&a->used_space, page, (void *) count,
  1476.                 leaf);
  1477.             return 1;
  1478.         }
  1479.     } else if (page < leaf->key[0]) {
  1480.         uintptr_t right_pg = leaf->key[0];
  1481.         count_t right_cnt = (count_t) leaf->value[0];
  1482.    
  1483.         /*
  1484.          * Investigate the border case in which the left neighbour does
  1485.          * not exist but the interval fits from the left.
  1486.          */
  1487.          
  1488.         if (overlaps(page, count * PAGE_SIZE, right_pg,
  1489.             right_cnt * PAGE_SIZE)) {
  1490.             /* The interval intersects with the right interval. */
  1491.             return 0;
  1492.         } else if (page + count * PAGE_SIZE == right_pg) {
  1493.             /*
  1494.              * The interval can be added by moving the base of the
  1495.              * right interval down and increasing its size
  1496.              * accordingly.
  1497.              */
  1498.             leaf->key[0] = page;
  1499.             leaf->value[0] += count;
  1500.             return 1;
  1501.         } else {
  1502.             /*
  1503.              * The interval doesn't adjoin with the right interval.
  1504.              * It must be added individually.
  1505.              */
  1506.             btree_insert(&a->used_space, page, (void *) count,
  1507.                 leaf);
  1508.             return 1;
  1509.         }
  1510.     }
  1511.  
  1512.     node = btree_leaf_node_right_neighbour(&a->used_space, leaf);
  1513.     if (node) {
  1514.         uintptr_t left_pg = leaf->key[leaf->keys - 1];
  1515.         uintptr_t right_pg = node->key[0];
  1516.         count_t left_cnt = (count_t) leaf->value[leaf->keys - 1];
  1517.         count_t right_cnt = (count_t) node->value[0];
  1518.        
  1519.         /*
  1520.          * Examine the possibility that the interval fits
  1521.          * somewhere between the leftmost interval of
  1522.          * the right neigbour and the last interval of the leaf.
  1523.          */
  1524.  
  1525.         if (page < left_pg) {
  1526.             /* Do nothing. */
  1527.         } else if (overlaps(page, count * PAGE_SIZE, left_pg,
  1528.             left_cnt * PAGE_SIZE)) {
  1529.             /* The interval intersects with the left interval. */
  1530.             return 0;
  1531.         } else if (overlaps(page, count * PAGE_SIZE, right_pg,
  1532.             right_cnt * PAGE_SIZE)) {
  1533.             /* The interval intersects with the right interval. */
  1534.             return 0;          
  1535.         } else if ((page == left_pg + left_cnt * PAGE_SIZE) &&
  1536.             (page + count * PAGE_SIZE == right_pg)) {
  1537.             /*
  1538.              * The interval can be added by merging the two already
  1539.              * present intervals.
  1540.              * */
  1541.             leaf->value[leaf->keys - 1] += count + right_cnt;
  1542.             btree_remove(&a->used_space, right_pg, node);
  1543.             return 1;
  1544.         } else if (page == left_pg + left_cnt * PAGE_SIZE) {
  1545.             /*
  1546.              * The interval can be added by simply growing the left
  1547.              * interval.
  1548.              * */
  1549.             leaf->value[leaf->keys - 1] +=  count;
  1550.             return 1;
  1551.         } else if (page + count * PAGE_SIZE == right_pg) {
  1552.             /*
  1553.              * The interval can be addded by simply moving base of
  1554.              * the right interval down and increasing its size
  1555.              * accordingly.
  1556.              */
  1557.             node->value[0] += count;
  1558.             node->key[0] = page;
  1559.             return 1;
  1560.         } else {
  1561.             /*
  1562.              * The interval is between both neigbouring intervals,
  1563.              * but cannot be merged with any of them.
  1564.              */
  1565.             btree_insert(&a->used_space, page, (void *) count,
  1566.                 leaf);
  1567.             return 1;
  1568.         }
  1569.     } else if (page >= leaf->key[leaf->keys - 1]) {
  1570.         uintptr_t left_pg = leaf->key[leaf->keys - 1];
  1571.         count_t left_cnt = (count_t) leaf->value[leaf->keys - 1];
  1572.    
  1573.         /*
  1574.          * Investigate the border case in which the right neighbour
  1575.          * does not exist but the interval fits from the right.
  1576.          */
  1577.          
  1578.         if (overlaps(page, count * PAGE_SIZE, left_pg,
  1579.             left_cnt * PAGE_SIZE)) {
  1580.             /* The interval intersects with the left interval. */
  1581.             return 0;
  1582.         } else if (left_pg + left_cnt * PAGE_SIZE == page) {
  1583.             /*
  1584.              * The interval can be added by growing the left
  1585.              * interval.
  1586.              */
  1587.             leaf->value[leaf->keys - 1] += count;
  1588.             return 1;
  1589.         } else {
  1590.             /*
  1591.              * The interval doesn't adjoin with the left interval.
  1592.              * It must be added individually.
  1593.              */
  1594.             btree_insert(&a->used_space, page, (void *) count,
  1595.                 leaf);
  1596.             return 1;
  1597.         }
  1598.     }
  1599.    
  1600.     /*
  1601.      * Note that if the algorithm made it thus far, the interval can fit
  1602.      * only between two other intervals of the leaf. The two border cases
  1603.      * were already resolved.
  1604.      */
  1605.     for (i = 1; i < leaf->keys; i++) {
  1606.         if (page < leaf->key[i]) {
  1607.             uintptr_t left_pg = leaf->key[i - 1];
  1608.             uintptr_t right_pg = leaf->key[i];
  1609.             count_t left_cnt = (count_t) leaf->value[i - 1];
  1610.             count_t right_cnt = (count_t) leaf->value[i];
  1611.  
  1612.             /*
  1613.              * The interval fits between left_pg and right_pg.
  1614.              */
  1615.  
  1616.             if (overlaps(page, count * PAGE_SIZE, left_pg,
  1617.                 left_cnt * PAGE_SIZE)) {
  1618.                 /*
  1619.                  * The interval intersects with the left
  1620.                  * interval.
  1621.                  */
  1622.                 return 0;
  1623.             } else if (overlaps(page, count * PAGE_SIZE, right_pg,
  1624.                 right_cnt * PAGE_SIZE)) {
  1625.                 /*
  1626.                  * The interval intersects with the right
  1627.                  * interval.
  1628.                  */
  1629.                 return 0;          
  1630.             } else if ((page == left_pg + left_cnt * PAGE_SIZE) &&
  1631.                 (page + count * PAGE_SIZE == right_pg)) {
  1632.                 /*
  1633.                  * The interval can be added by merging the two
  1634.                  * already present intervals.
  1635.                  */
  1636.                 leaf->value[i - 1] += count + right_cnt;
  1637.                 btree_remove(&a->used_space, right_pg, leaf);
  1638.                 return 1;
  1639.             } else if (page == left_pg + left_cnt * PAGE_SIZE) {
  1640.                 /*
  1641.                  * The interval can be added by simply growing
  1642.                  * the left interval.
  1643.                  */
  1644.                 leaf->value[i - 1] += count;
  1645.                 return 1;
  1646.             } else if (page + count * PAGE_SIZE == right_pg) {
  1647.                 /*
  1648.                      * The interval can be addded by simply moving
  1649.                  * base of the right interval down and
  1650.                  * increasing its size accordingly.
  1651.                  */
  1652.                 leaf->value[i] += count;
  1653.                 leaf->key[i] = page;
  1654.                 return 1;
  1655.             } else {
  1656.                 /*
  1657.                  * The interval is between both neigbouring
  1658.                  * intervals, but cannot be merged with any of
  1659.                  * them.
  1660.                  */
  1661.                 btree_insert(&a->used_space, page,
  1662.                     (void *) count, leaf);
  1663.                 return 1;
  1664.             }
  1665.         }
  1666.     }
  1667.  
  1668.     panic("Inconsistency detected while adding %" PRIc " pages of used "
  1669.         "space at %p.\n", count, page);
  1670. }
  1671.  
  1672. /** Mark portion of address space area as unused.
  1673.  *
  1674.  * The address space area must be already locked.
  1675.  *
  1676.  * @param a     Address space area.
  1677.  * @param page      First page to be marked.
  1678.  * @param count     Number of page to be marked.
  1679.  *
  1680.  * @return      Zero on failure and non-zero on success.
  1681.  */
  1682. int used_space_remove(as_area_t *a, uintptr_t page, count_t count)
  1683. {
  1684.     btree_node_t *leaf, *node;
  1685.     count_t pages;
  1686.     unsigned int i;
  1687.  
  1688.     ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
  1689.     ASSERT(count);
  1690.  
  1691.     pages = (count_t) btree_search(&a->used_space, page, &leaf);
  1692.     if (pages) {
  1693.         /*
  1694.          * We are lucky, page is the beginning of some interval.
  1695.          */
  1696.         if (count > pages) {
  1697.             return 0;
  1698.         } else if (count == pages) {
  1699.             btree_remove(&a->used_space, page, leaf);
  1700.             return 1;
  1701.         } else {
  1702.             /*
  1703.              * Find the respective interval.
  1704.              * Decrease its size and relocate its start address.
  1705.              */
  1706.             for (i = 0; i < leaf->keys; i++) {
  1707.                 if (leaf->key[i] == page) {
  1708.                     leaf->key[i] += count * PAGE_SIZE;
  1709.                     leaf->value[i] -= count;
  1710.                     return 1;
  1711.                 }
  1712.             }
  1713.             goto error;
  1714.         }
  1715.     }
  1716.  
  1717.     node = btree_leaf_node_left_neighbour(&a->used_space, leaf);
  1718.     if (node && page < leaf->key[0]) {
  1719.         uintptr_t left_pg = node->key[node->keys - 1];
  1720.         count_t left_cnt = (count_t) node->value[node->keys - 1];
  1721.  
  1722.         if (overlaps(left_pg, left_cnt * PAGE_SIZE, page,
  1723.             count * PAGE_SIZE)) {
  1724.             if (page + count * PAGE_SIZE ==
  1725.                 left_pg + left_cnt * PAGE_SIZE) {
  1726.                 /*
  1727.                  * The interval is contained in the rightmost
  1728.                  * interval of the left neighbour and can be
  1729.                  * removed by updating the size of the bigger
  1730.                  * interval.
  1731.                  */
  1732.                 node->value[node->keys - 1] -= count;
  1733.                 return 1;
  1734.             } else if (page + count * PAGE_SIZE <
  1735.                 left_pg + left_cnt*PAGE_SIZE) {
  1736.                 count_t new_cnt;
  1737.                
  1738.                 /*
  1739.                  * The interval is contained in the rightmost
  1740.                  * interval of the left neighbour but its
  1741.                  * removal requires both updating the size of
  1742.                  * the original interval and also inserting a
  1743.                  * new interval.
  1744.                  */
  1745.                 new_cnt = ((left_pg + left_cnt * PAGE_SIZE) -
  1746.                     (page + count*PAGE_SIZE)) >> PAGE_WIDTH;
  1747.                 node->value[node->keys - 1] -= count + new_cnt;
  1748.                 btree_insert(&a->used_space, page +
  1749.                     count * PAGE_SIZE, (void *) new_cnt, leaf);
  1750.                 return 1;
  1751.             }
  1752.         }
  1753.         return 0;
  1754.     } else if (page < leaf->key[0]) {
  1755.         return 0;
  1756.     }
  1757.    
  1758.     if (page > leaf->key[leaf->keys - 1]) {
  1759.         uintptr_t left_pg = leaf->key[leaf->keys - 1];
  1760.         count_t left_cnt = (count_t) leaf->value[leaf->keys - 1];
  1761.  
  1762.         if (overlaps(left_pg, left_cnt * PAGE_SIZE, page,
  1763.             count * PAGE_SIZE)) {
  1764.             if (page + count * PAGE_SIZE ==
  1765.                 left_pg + left_cnt * PAGE_SIZE) {
  1766.                 /*
  1767.                  * The interval is contained in the rightmost
  1768.                  * interval of the leaf and can be removed by
  1769.                  * updating the size of the bigger interval.
  1770.                  */
  1771.                 leaf->value[leaf->keys - 1] -= count;
  1772.                 return 1;
  1773.             } else if (page + count * PAGE_SIZE < left_pg +
  1774.                 left_cnt * PAGE_SIZE) {
  1775.                 count_t new_cnt;
  1776.                
  1777.                 /*
  1778.                  * The interval is contained in the rightmost
  1779.                  * interval of the leaf but its removal
  1780.                  * requires both updating the size of the
  1781.                  * original interval and also inserting a new
  1782.                  * interval.
  1783.                  */
  1784.                 new_cnt = ((left_pg + left_cnt * PAGE_SIZE) -
  1785.                     (page + count * PAGE_SIZE)) >> PAGE_WIDTH;
  1786.                 leaf->value[leaf->keys - 1] -= count + new_cnt;
  1787.                 btree_insert(&a->used_space, page +
  1788.                     count * PAGE_SIZE, (void *) new_cnt, leaf);
  1789.                 return 1;
  1790.             }
  1791.         }
  1792.         return 0;
  1793.     }  
  1794.    
  1795.     /*
  1796.      * The border cases have been already resolved.
  1797.      * Now the interval can be only between intervals of the leaf.
  1798.      */
  1799.     for (i = 1; i < leaf->keys - 1; i++) {
  1800.         if (page < leaf->key[i]) {
  1801.             uintptr_t left_pg = leaf->key[i - 1];
  1802.             count_t left_cnt = (count_t) leaf->value[i - 1];
  1803.  
  1804.             /*
  1805.              * Now the interval is between intervals corresponding
  1806.              * to (i - 1) and i.
  1807.              */
  1808.             if (overlaps(left_pg, left_cnt * PAGE_SIZE, page,
  1809.                 count * PAGE_SIZE)) {
  1810.                 if (page + count * PAGE_SIZE ==
  1811.                     left_pg + left_cnt*PAGE_SIZE) {
  1812.                     /*
  1813.                      * The interval is contained in the
  1814.                      * interval (i - 1) of the leaf and can
  1815.                      * be removed by updating the size of
  1816.                      * the bigger interval.
  1817.                      */
  1818.                     leaf->value[i - 1] -= count;
  1819.                     return 1;
  1820.                 } else if (page + count * PAGE_SIZE <
  1821.                     left_pg + left_cnt * PAGE_SIZE) {
  1822.                     count_t new_cnt;
  1823.                
  1824.                     /*
  1825.                      * The interval is contained in the
  1826.                      * interval (i - 1) of the leaf but its
  1827.                      * removal requires both updating the
  1828.                      * size of the original interval and
  1829.                      * also inserting a new interval.
  1830.                      */
  1831.                     new_cnt = ((left_pg +
  1832.                         left_cnt * PAGE_SIZE) -
  1833.                         (page + count * PAGE_SIZE)) >>
  1834.                         PAGE_WIDTH;
  1835.                     leaf->value[i - 1] -= count + new_cnt;
  1836.                     btree_insert(&a->used_space, page +
  1837.                         count * PAGE_SIZE, (void *) new_cnt,
  1838.                         leaf);
  1839.                     return 1;
  1840.                 }
  1841.             }
  1842.             return 0;
  1843.         }
  1844.     }
  1845.  
  1846. error:
  1847.     panic("Inconsistency detected while removing %" PRIc " pages of used "
  1848.         "space from %p.\n", count, page);
  1849. }
  1850.  
  1851. /** Remove reference to address space area share info.
  1852.  *
  1853.  * If the reference count drops to 0, the sh_info is deallocated.
  1854.  *
  1855.  * @param sh_info   Pointer to address space area share info.
  1856.  */
  1857. void sh_info_remove_reference(share_info_t *sh_info)
  1858. {
  1859.     bool dealloc = false;
  1860.  
  1861.     mutex_lock(&sh_info->lock);
  1862.     ASSERT(sh_info->refcount);
  1863.     if (--sh_info->refcount == 0) {
  1864.         dealloc = true;
  1865.         link_t *cur;
  1866.        
  1867.         /*
  1868.          * Now walk carefully the pagemap B+tree and free/remove
  1869.          * reference from all frames found there.
  1870.          */
  1871.         for (cur = sh_info->pagemap.leaf_head.next;
  1872.             cur != &sh_info->pagemap.leaf_head; cur = cur->next) {
  1873.             btree_node_t *node;
  1874.             unsigned int i;
  1875.            
  1876.             node = list_get_instance(cur, btree_node_t, leaf_link);
  1877.             for (i = 0; i < node->keys; i++)
  1878.                 frame_free((uintptr_t) node->value[i]);
  1879.         }
  1880.        
  1881.     }
  1882.     mutex_unlock(&sh_info->lock);
  1883.    
  1884.     if (dealloc) {
  1885.         btree_destroy(&sh_info->pagemap);
  1886.         free(sh_info);
  1887.     }
  1888. }
  1889.  
  1890. /*
  1891.  * Address space related syscalls.
  1892.  */
  1893.  
  1894. /** Wrapper for as_area_create(). */
  1895. unative_t sys_as_area_create(uintptr_t address, size_t size, int flags)
  1896. {
  1897.     if (as_area_create(AS, flags | AS_AREA_CACHEABLE, size, address,
  1898.         AS_AREA_ATTR_NONE, &anon_backend, NULL)) {
  1899.         return (unative_t) address;
  1900.     } else {
  1901.         return (unative_t) -1;
  1902.     }
  1903. }
  1904.  
  1905. /** Wrapper for as_area_resize(). */
  1906. unative_t sys_as_area_resize(uintptr_t address, size_t size, int flags)
  1907. {
  1908.     return (unative_t) as_area_resize(AS, address, size, 0);
  1909. }
  1910.  
  1911. /** Wrapper for as_area_change_flags(). */
  1912. unative_t sys_as_area_change_flags(uintptr_t address, int flags)
  1913. {
  1914.     return (unative_t) as_area_change_flags(AS, flags, address);
  1915. }
  1916.  
  1917. /** Wrapper for as_area_destroy(). */
  1918. unative_t sys_as_area_destroy(uintptr_t address)
  1919. {
  1920.     return (unative_t) as_area_destroy(AS, address);
  1921. }
  1922.  
  1923. /** Print out information about address space.
  1924.  *
  1925.  * @param as        Address space.
  1926.  */
  1927. void as_print(as_t *as)
  1928. {
  1929.     ipl_t ipl;
  1930.    
  1931.     ipl = interrupts_disable();
  1932.     mutex_lock(&as->lock);
  1933.    
  1934.     /* print out info about address space areas */
  1935.     link_t *cur;
  1936.     for (cur = as->as_area_btree.leaf_head.next;
  1937.         cur != &as->as_area_btree.leaf_head; cur = cur->next) {
  1938.         btree_node_t *node;
  1939.        
  1940.         node = list_get_instance(cur, btree_node_t, leaf_link);
  1941.        
  1942.         unsigned int i;
  1943.         for (i = 0; i < node->keys; i++) {
  1944.             as_area_t *area = node->value[i];
  1945.        
  1946.             mutex_lock(&area->lock);
  1947.             printf("as_area: %p, base=%p, pages=%" PRIc
  1948.                 " (%p - %p)\n", area, area->base, area->pages,
  1949.                 area->base, area->base + FRAMES2SIZE(area->pages));
  1950.             mutex_unlock(&area->lock);
  1951.         }
  1952.     }
  1953.    
  1954.     mutex_unlock(&as->lock);
  1955.     interrupts_restore(ipl);
  1956. }
  1957.  
  1958. /** @}
  1959.  */
  1960.