Subversion Repositories HelenOS

Rev

Rev 3596 | Rev 4377 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright (c) 2001-2006 Jakub Jermar
  3.  * All rights reserved.
  4.  *
  5.  * Redistribution and use in source and binary forms, with or without
  6.  * modification, are permitted provided that the following conditions
  7.  * are met:
  8.  *
  9.  * - Redistributions of source code must retain the above copyright
  10.  *   notice, this list of conditions and the following disclaimer.
  11.  * - Redistributions in binary form must reproduce the above copyright
  12.  *   notice, this list of conditions and the following disclaimer in the
  13.  *   documentation and/or other materials provided with the distribution.
  14.  * - The name of the author may not be used to endorse or promote products
  15.  *   derived from this software without specific prior written permission.
  16.  *
  17.  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18.  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20.  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21.  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26.  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27.  */
  28.  
  29. /** @addtogroup genericmm
  30.  * @{
  31.  */
  32.  
  33. /**
  34.  * @file
  35.  * @brief   Address space related functions.
  36.  *
  37.  * This file contains address space manipulation functions.
  38.  * Roughly speaking, this is a higher-level client of
  39.  * Virtual Address Translation (VAT) subsystem.
  40.  *
  41.  * Functionality provided by this file allows one to
  42.  * create address spaces and create, resize and share
  43.  * address space areas.
  44.  *
  45.  * @see page.c
  46.  *
  47.  */
  48.  
  49. #include <mm/as.h>
  50. #include <arch/mm/as.h>
  51. #include <mm/page.h>
  52. #include <mm/frame.h>
  53. #include <mm/slab.h>
  54. #include <mm/tlb.h>
  55. #include <arch/mm/page.h>
  56. #include <genarch/mm/page_pt.h>
  57. #include <genarch/mm/page_ht.h>
  58. #include <mm/asid.h>
  59. #include <arch/mm/asid.h>
  60. #include <preemption.h>
  61. #include <synch/spinlock.h>
  62. #include <synch/mutex.h>
  63. #include <adt/list.h>
  64. #include <adt/btree.h>
  65. #include <proc/task.h>
  66. #include <proc/thread.h>
  67. #include <arch/asm.h>
  68. #include <panic.h>
  69. #include <debug.h>
  70. #include <print.h>
  71. #include <memstr.h>
  72. #include <macros.h>
  73. #include <arch.h>
  74. #include <errno.h>
  75. #include <config.h>
  76. #include <align.h>
  77. #include <arch/types.h>
  78. #include <syscall/copy.h>
  79. #include <arch/interrupt.h>
  80.  
  81. #ifdef CONFIG_VIRT_IDX_DCACHE
  82. #include <arch/mm/cache.h>
  83. #endif /* CONFIG_VIRT_IDX_DCACHE */
  84.  
  85. /**
  86.  * Each architecture decides what functions will be used to carry out
  87.  * address space operations such as creating or locking page tables.
  88.  */
  89. as_operations_t *as_operations = NULL;
  90.  
  91. /**
  92.  * Slab for as_t objects.
  93.  */
  94. static slab_cache_t *as_slab;
  95.  
  96. /**
  97.  * This lock serializes access to the ASID subsystem.
  98.  * It protects:
  99.  * - inactive_as_with_asid_head list
  100.  * - as->asid for each as of the as_t type
  101.  * - asids_allocated counter
  102.  */
  103. SPINLOCK_INITIALIZE(asidlock);
  104.  
  105. /**
  106.  * This list contains address spaces that are not active on any
  107.  * processor and that have valid ASID.
  108.  */
  109. LIST_INITIALIZE(inactive_as_with_asid_head);
  110.  
  111. /** Kernel address space. */
  112. as_t *AS_KERNEL = NULL;
  113.  
  114. static int area_flags_to_page_flags(int);
  115. static bool check_area_conflicts(as_t *, uintptr_t, size_t, as_area_t *);
  116. static void sh_info_remove_reference(share_info_t *);
  117.  
  118. static int as_constructor(void *obj, int flags)
  119. {
  120.     as_t *as = (as_t *) obj;
  121.     int rc;
  122.  
  123.     link_initialize(&as->inactive_as_with_asid_link);
  124.     mutex_initialize(&as->lock, MUTEX_PASSIVE);
  125.    
  126.     rc = as_constructor_arch(as, flags);
  127.    
  128.     return rc;
  129. }
  130.  
  131. static int as_destructor(void *obj)
  132. {
  133.     as_t *as = (as_t *) obj;
  134.  
  135.     return as_destructor_arch(as);
  136. }
  137.  
  138. /** Initialize address space subsystem. */
  139. void as_init(void)
  140. {
  141.     as_arch_init();
  142.  
  143.     as_slab = slab_cache_create("as_slab", sizeof(as_t), 0,
  144.         as_constructor, as_destructor, SLAB_CACHE_MAGDEFERRED);
  145.    
  146.     AS_KERNEL = as_create(FLAG_AS_KERNEL);
  147.     if (!AS_KERNEL)
  148.         panic("can't create kernel address space\n");
  149.    
  150. }
  151.  
  152. /** Create address space.
  153.  *
  154.  * @param flags     Flags that influence the way in wich the address space
  155.  *          is created.
  156.  */
  157. as_t *as_create(int flags)
  158. {
  159.     as_t *as;
  160.  
  161.     as = (as_t *) slab_alloc(as_slab, 0);
  162.     (void) as_create_arch(as, 0);
  163.    
  164.     btree_create(&as->as_area_btree);
  165.    
  166.     if (flags & FLAG_AS_KERNEL)
  167.         as->asid = ASID_KERNEL;
  168.     else
  169.         as->asid = ASID_INVALID;
  170.    
  171.     atomic_set(&as->refcount, 0);
  172.     as->cpu_refcount = 0;
  173. #ifdef AS_PAGE_TABLE
  174.     as->genarch.page_table = page_table_create(flags);
  175. #else
  176.     page_table_create(flags);
  177. #endif
  178.  
  179.     return as;
  180. }
  181.  
  182. /** Destroy adress space.
  183.  *
  184.  * When there are no tasks referencing this address space (i.e. its refcount is
  185.  * zero), the address space can be destroyed.
  186.  *
  187.  * We know that we don't hold any spinlock.
  188.  *
  189.  * @param as        Address space to be destroyed.
  190.  */
  191. void as_destroy(as_t *as)
  192. {
  193.     ipl_t ipl;
  194.     bool cond;
  195.     DEADLOCK_PROBE_INIT(p_asidlock);
  196.  
  197.     ASSERT(atomic_get(&as->refcount) == 0);
  198.    
  199.     /*
  200.      * Since there is no reference to this area,
  201.      * it is safe not to lock its mutex.
  202.      */
  203.  
  204.     /*
  205.      * We need to avoid deadlock between TLB shootdown and asidlock.
  206.      * We therefore try to take asid conditionally and if we don't succeed,
  207.      * we enable interrupts and try again. This is done while preemption is
  208.      * disabled to prevent nested context switches. We also depend on the
  209.      * fact that so far no spinlocks are held.
  210.      */
  211.     preemption_disable();
  212.     ipl = interrupts_read();
  213. retry:
  214.     interrupts_disable();
  215.     if (!spinlock_trylock(&asidlock)) {
  216.         interrupts_enable();
  217.         DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
  218.         goto retry;
  219.     }
  220.     preemption_enable();    /* Interrupts disabled, enable preemption */
  221.     if (as->asid != ASID_INVALID && as != AS_KERNEL) {
  222.         if (as != AS && as->cpu_refcount == 0)
  223.             list_remove(&as->inactive_as_with_asid_link);
  224.         asid_put(as->asid);
  225.     }
  226.     spinlock_unlock(&asidlock);
  227.  
  228.     /*
  229.      * Destroy address space areas of the address space.
  230.      * The B+tree must be walked carefully because it is
  231.      * also being destroyed.
  232.      */
  233.     for (cond = true; cond; ) {
  234.         btree_node_t *node;
  235.  
  236.         ASSERT(!list_empty(&as->as_area_btree.leaf_head));
  237.         node = list_get_instance(as->as_area_btree.leaf_head.next,
  238.             btree_node_t, leaf_link);
  239.  
  240.         if ((cond = node->keys)) {
  241.             as_area_destroy(as, node->key[0]);
  242.         }
  243.     }
  244.  
  245.     btree_destroy(&as->as_area_btree);
  246. #ifdef AS_PAGE_TABLE
  247.     page_table_destroy(as->genarch.page_table);
  248. #else
  249.     page_table_destroy(NULL);
  250. #endif
  251.  
  252.     interrupts_restore(ipl);
  253.  
  254.     slab_free(as_slab, as);
  255. }
  256.  
  257. /** Create address space area of common attributes.
  258.  *
  259.  * The created address space area is added to the target address space.
  260.  *
  261.  * @param as        Target address space.
  262.  * @param flags     Flags of the area memory.
  263.  * @param size      Size of area.
  264.  * @param base      Base address of area.
  265.  * @param attrs     Attributes of the area.
  266.  * @param backend   Address space area backend. NULL if no backend is used.
  267.  * @param backend_data  NULL or a pointer to an array holding two void *.
  268.  *
  269.  * @return      Address space area on success or NULL on failure.
  270.  */
  271. as_area_t *
  272. as_area_create(as_t *as, int flags, size_t size, uintptr_t base, int attrs,
  273.     mem_backend_t *backend, mem_backend_data_t *backend_data)
  274. {
  275.     ipl_t ipl;
  276.     as_area_t *a;
  277.    
  278.     if (base % PAGE_SIZE)
  279.         return NULL;
  280.  
  281.     if (!size)
  282.         return NULL;
  283.  
  284.     /* Writeable executable areas are not supported. */
  285.     if ((flags & AS_AREA_EXEC) && (flags & AS_AREA_WRITE))
  286.         return NULL;
  287.    
  288.     ipl = interrupts_disable();
  289.     mutex_lock(&as->lock);
  290.    
  291.     if (!check_area_conflicts(as, base, size, NULL)) {
  292.         mutex_unlock(&as->lock);
  293.         interrupts_restore(ipl);
  294.         return NULL;
  295.     }
  296.    
  297.     a = (as_area_t *) malloc(sizeof(as_area_t), 0);
  298.  
  299.     mutex_initialize(&a->lock, MUTEX_PASSIVE);
  300.    
  301.     a->as = as;
  302.     a->flags = flags;
  303.     a->attributes = attrs;
  304.     a->pages = SIZE2FRAMES(size);
  305.     a->base = base;
  306.     a->sh_info = NULL;
  307.     a->backend = backend;
  308.     if (backend_data)
  309.         a->backend_data = *backend_data;
  310.     else
  311.         memsetb(&a->backend_data, sizeof(a->backend_data), 0);
  312.  
  313.     btree_create(&a->used_space);
  314.    
  315.     btree_insert(&as->as_area_btree, base, (void *) a, NULL);
  316.  
  317.     mutex_unlock(&as->lock);
  318.     interrupts_restore(ipl);
  319.  
  320.     return a;
  321. }
  322.  
  323. /** Find address space area and change it.
  324.  *
  325.  * @param as        Address space.
  326.  * @param address   Virtual address belonging to the area to be changed.
  327.  *          Must be page-aligned.
  328.  * @param size      New size of the virtual memory block starting at
  329.  *          address.
  330.  * @param flags     Flags influencing the remap operation. Currently unused.
  331.  *
  332.  * @return      Zero on success or a value from @ref errno.h otherwise.
  333.  */
  334. int as_area_resize(as_t *as, uintptr_t address, size_t size, int flags)
  335. {
  336.     as_area_t *area;
  337.     ipl_t ipl;
  338.     size_t pages;
  339.    
  340.     ipl = interrupts_disable();
  341.     mutex_lock(&as->lock);
  342.    
  343.     /*
  344.      * Locate the area.
  345.      */
  346.     area = find_area_and_lock(as, address);
  347.     if (!area) {
  348.         mutex_unlock(&as->lock);
  349.         interrupts_restore(ipl);
  350.         return ENOENT;
  351.     }
  352.  
  353.     if (area->backend == &phys_backend) {
  354.         /*
  355.          * Remapping of address space areas associated
  356.          * with memory mapped devices is not supported.
  357.          */
  358.         mutex_unlock(&area->lock);
  359.         mutex_unlock(&as->lock);
  360.         interrupts_restore(ipl);
  361.         return ENOTSUP;
  362.     }
  363.     if (area->sh_info) {
  364.         /*
  365.          * Remapping of shared address space areas
  366.          * is not supported.
  367.          */
  368.         mutex_unlock(&area->lock);
  369.         mutex_unlock(&as->lock);
  370.         interrupts_restore(ipl);
  371.         return ENOTSUP;
  372.     }
  373.  
  374.     pages = SIZE2FRAMES((address - area->base) + size);
  375.     if (!pages) {
  376.         /*
  377.          * Zero size address space areas are not allowed.
  378.          */
  379.         mutex_unlock(&area->lock);
  380.         mutex_unlock(&as->lock);
  381.         interrupts_restore(ipl);
  382.         return EPERM;
  383.     }
  384.    
  385.     if (pages < area->pages) {
  386.         bool cond;
  387.         uintptr_t start_free = area->base + pages * PAGE_SIZE;
  388.  
  389.         /*
  390.          * Shrinking the area.
  391.          * No need to check for overlaps.
  392.          */
  393.  
  394.         /*
  395.          * Start TLB shootdown sequence.
  396.          */
  397.         tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base +
  398.             pages * PAGE_SIZE, area->pages - pages);
  399.  
  400.         /*
  401.          * Remove frames belonging to used space starting from
  402.          * the highest addresses downwards until an overlap with
  403.          * the resized address space area is found. Note that this
  404.          * is also the right way to remove part of the used_space
  405.          * B+tree leaf list.
  406.          */    
  407.         for (cond = true; cond;) {
  408.             btree_node_t *node;
  409.        
  410.             ASSERT(!list_empty(&area->used_space.leaf_head));
  411.             node =
  412.                 list_get_instance(area->used_space.leaf_head.prev,
  413.                 btree_node_t, leaf_link);
  414.             if ((cond = (bool) node->keys)) {
  415.                 uintptr_t b = node->key[node->keys - 1];
  416.                 count_t c =
  417.                     (count_t) node->value[node->keys - 1];
  418.                 unsigned int i = 0;
  419.            
  420.                 if (overlaps(b, c * PAGE_SIZE, area->base,
  421.                     pages * PAGE_SIZE)) {
  422.                    
  423.                     if (b + c * PAGE_SIZE <= start_free) {
  424.                         /*
  425.                          * The whole interval fits
  426.                          * completely in the resized
  427.                          * address space area.
  428.                          */
  429.                         break;
  430.                     }
  431.        
  432.                     /*
  433.                      * Part of the interval corresponding
  434.                      * to b and c overlaps with the resized
  435.                      * address space area.
  436.                      */
  437.        
  438.                     cond = false;   /* we are almost done */
  439.                     i = (start_free - b) >> PAGE_WIDTH;
  440.                     if (!used_space_remove(area, start_free,
  441.                         c - i))
  442.                         panic("Could not remove used "
  443.                             "space.\n");
  444.                 } else {
  445.                     /*
  446.                      * The interval of used space can be
  447.                      * completely removed.
  448.                      */
  449.                     if (!used_space_remove(area, b, c))
  450.                         panic("Could not remove used "
  451.                             "space.\n");
  452.                 }
  453.            
  454.                 for (; i < c; i++) {
  455.                     pte_t *pte;
  456.            
  457.                     page_table_lock(as, false);
  458.                     pte = page_mapping_find(as, b +
  459.                         i * PAGE_SIZE);
  460.                     ASSERT(pte && PTE_VALID(pte) &&
  461.                         PTE_PRESENT(pte));
  462.                     if (area->backend &&
  463.                         area->backend->frame_free) {
  464.                         area->backend->frame_free(area,
  465.                             b + i * PAGE_SIZE,
  466.                             PTE_GET_FRAME(pte));
  467.                     }
  468.                     page_mapping_remove(as, b +
  469.                         i * PAGE_SIZE);
  470.                     page_table_unlock(as, false);
  471.                 }
  472.             }
  473.         }
  474.  
  475.         /*
  476.          * Finish TLB shootdown sequence.
  477.          */
  478.  
  479.         tlb_invalidate_pages(as->asid, area->base + pages * PAGE_SIZE,
  480.             area->pages - pages);
  481.         /*
  482.          * Invalidate software translation caches (e.g. TSB on sparc64).
  483.          */
  484.         as_invalidate_translation_cache(as, area->base +
  485.             pages * PAGE_SIZE, area->pages - pages);
  486.         tlb_shootdown_finalize();
  487.        
  488.     } else {
  489.         /*
  490.          * Growing the area.
  491.          * Check for overlaps with other address space areas.
  492.          */
  493.         if (!check_area_conflicts(as, address, pages * PAGE_SIZE,
  494.             area)) {
  495.             mutex_unlock(&area->lock);
  496.             mutex_unlock(&as->lock);       
  497.             interrupts_restore(ipl);
  498.             return EADDRNOTAVAIL;
  499.         }
  500.     }
  501.  
  502.     area->pages = pages;
  503.    
  504.     mutex_unlock(&area->lock);
  505.     mutex_unlock(&as->lock);
  506.     interrupts_restore(ipl);
  507.  
  508.     return 0;
  509. }
  510.  
  511. /** Destroy address space area.
  512.  *
  513.  * @param as        Address space.
  514.  * @param address   Address within the area to be deleted.
  515.  *
  516.  * @return      Zero on success or a value from @ref errno.h on failure.
  517.  */
  518. int as_area_destroy(as_t *as, uintptr_t address)
  519. {
  520.     as_area_t *area;
  521.     uintptr_t base;
  522.     link_t *cur;
  523.     ipl_t ipl;
  524.  
  525.     ipl = interrupts_disable();
  526.     mutex_lock(&as->lock);
  527.  
  528.     area = find_area_and_lock(as, address);
  529.     if (!area) {
  530.         mutex_unlock(&as->lock);
  531.         interrupts_restore(ipl);
  532.         return ENOENT;
  533.     }
  534.  
  535.     base = area->base;
  536.  
  537.     /*
  538.      * Start TLB shootdown sequence.
  539.      */
  540.     tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base, area->pages);
  541.  
  542.     /*
  543.      * Visit only the pages mapped by used_space B+tree.
  544.      */
  545.     for (cur = area->used_space.leaf_head.next;
  546.         cur != &area->used_space.leaf_head; cur = cur->next) {
  547.         btree_node_t *node;
  548.         unsigned int i;
  549.        
  550.         node = list_get_instance(cur, btree_node_t, leaf_link);
  551.         for (i = 0; i < node->keys; i++) {
  552.             uintptr_t b = node->key[i];
  553.             count_t j;
  554.             pte_t *pte;
  555.            
  556.             for (j = 0; j < (count_t) node->value[i]; j++) {
  557.                 page_table_lock(as, false);
  558.                 pte = page_mapping_find(as, b + j * PAGE_SIZE);
  559.                 ASSERT(pte && PTE_VALID(pte) &&
  560.                     PTE_PRESENT(pte));
  561.                 if (area->backend &&
  562.                     area->backend->frame_free) {
  563.                     area->backend->frame_free(area, b +
  564.                         j * PAGE_SIZE, PTE_GET_FRAME(pte));
  565.                 }
  566.                 page_mapping_remove(as, b + j * PAGE_SIZE);            
  567.                 page_table_unlock(as, false);
  568.             }
  569.         }
  570.     }
  571.  
  572.     /*
  573.      * Finish TLB shootdown sequence.
  574.      */
  575.  
  576.     tlb_invalidate_pages(as->asid, area->base, area->pages);
  577.     /*
  578.      * Invalidate potential software translation caches (e.g. TSB on
  579.      * sparc64).
  580.      */
  581.     as_invalidate_translation_cache(as, area->base, area->pages);
  582.     tlb_shootdown_finalize();
  583.    
  584.     btree_destroy(&area->used_space);
  585.  
  586.     area->attributes |= AS_AREA_ATTR_PARTIAL;
  587.    
  588.     if (area->sh_info)
  589.         sh_info_remove_reference(area->sh_info);
  590.        
  591.     mutex_unlock(&area->lock);
  592.  
  593.     /*
  594.      * Remove the empty area from address space.
  595.      */
  596.     btree_remove(&as->as_area_btree, base, NULL);
  597.    
  598.     free(area);
  599.    
  600.     mutex_unlock(&as->lock);
  601.     interrupts_restore(ipl);
  602.     return 0;
  603. }
  604.  
  605. /** Share address space area with another or the same address space.
  606.  *
  607.  * Address space area mapping is shared with a new address space area.
  608.  * If the source address space area has not been shared so far,
  609.  * a new sh_info is created. The new address space area simply gets the
  610.  * sh_info of the source area. The process of duplicating the
  611.  * mapping is done through the backend share function.
  612.  *
  613.  * @param src_as    Pointer to source address space.
  614.  * @param src_base  Base address of the source address space area.
  615.  * @param acc_size  Expected size of the source area.
  616.  * @param dst_as    Pointer to destination address space.
  617.  * @param dst_base  Target base address.
  618.  * @param dst_flags_mask Destination address space area flags mask.
  619.  *
  620.  * @return      Zero on success or ENOENT if there is no such task or if
  621.  *          there is no such address space area, EPERM if there was
  622.  *          a problem in accepting the area or ENOMEM if there was a
  623.  *          problem in allocating destination address space area.
  624.  *          ENOTSUP is returned if the address space area backend
  625.  *          does not support sharing.
  626.  */
  627. int as_area_share(as_t *src_as, uintptr_t src_base, size_t acc_size,
  628.     as_t *dst_as, uintptr_t dst_base, int dst_flags_mask)
  629. {
  630.     ipl_t ipl;
  631.     int src_flags;
  632.     size_t src_size;
  633.     as_area_t *src_area, *dst_area;
  634.     share_info_t *sh_info;
  635.     mem_backend_t *src_backend;
  636.     mem_backend_data_t src_backend_data;
  637.    
  638.     ipl = interrupts_disable();
  639.     mutex_lock(&src_as->lock);
  640.     src_area = find_area_and_lock(src_as, src_base);
  641.     if (!src_area) {
  642.         /*
  643.          * Could not find the source address space area.
  644.          */
  645.         mutex_unlock(&src_as->lock);
  646.         interrupts_restore(ipl);
  647.         return ENOENT;
  648.     }
  649.  
  650.     if (!src_area->backend || !src_area->backend->share) {
  651.         /*
  652.          * There is no backend or the backend does not
  653.          * know how to share the area.
  654.          */
  655.         mutex_unlock(&src_area->lock);
  656.         mutex_unlock(&src_as->lock);
  657.         interrupts_restore(ipl);
  658.         return ENOTSUP;
  659.     }
  660.    
  661.     src_size = src_area->pages * PAGE_SIZE;
  662.     src_flags = src_area->flags;
  663.     src_backend = src_area->backend;
  664.     src_backend_data = src_area->backend_data;
  665.  
  666.     /* Share the cacheable flag from the original mapping */
  667.     if (src_flags & AS_AREA_CACHEABLE)
  668.         dst_flags_mask |= AS_AREA_CACHEABLE;
  669.  
  670.     if (src_size != acc_size ||
  671.         (src_flags & dst_flags_mask) != dst_flags_mask) {
  672.         mutex_unlock(&src_area->lock);
  673.         mutex_unlock(&src_as->lock);
  674.         interrupts_restore(ipl);
  675.         return EPERM;
  676.     }
  677.  
  678.     /*
  679.      * Now we are committed to sharing the area.
  680.      * First, prepare the area for sharing.
  681.      * Then it will be safe to unlock it.
  682.      */
  683.     sh_info = src_area->sh_info;
  684.     if (!sh_info) {
  685.         sh_info = (share_info_t *) malloc(sizeof(share_info_t), 0);
  686.         mutex_initialize(&sh_info->lock, MUTEX_PASSIVE);
  687.         sh_info->refcount = 2;
  688.         btree_create(&sh_info->pagemap);
  689.         src_area->sh_info = sh_info;
  690.         /*
  691.          * Call the backend to setup sharing.
  692.          */
  693.         src_area->backend->share(src_area);
  694.     } else {
  695.         mutex_lock(&sh_info->lock);
  696.         sh_info->refcount++;
  697.         mutex_unlock(&sh_info->lock);
  698.     }
  699.  
  700.     mutex_unlock(&src_area->lock);
  701.     mutex_unlock(&src_as->lock);
  702.  
  703.     /*
  704.      * Create copy of the source address space area.
  705.      * The destination area is created with AS_AREA_ATTR_PARTIAL
  706.      * attribute set which prevents race condition with
  707.      * preliminary as_page_fault() calls.
  708.      * The flags of the source area are masked against dst_flags_mask
  709.      * to support sharing in less privileged mode.
  710.      */
  711.     dst_area = as_area_create(dst_as, dst_flags_mask, src_size, dst_base,
  712.         AS_AREA_ATTR_PARTIAL, src_backend, &src_backend_data);
  713.     if (!dst_area) {
  714.         /*
  715.          * Destination address space area could not be created.
  716.          */
  717.         sh_info_remove_reference(sh_info);
  718.        
  719.         interrupts_restore(ipl);
  720.         return ENOMEM;
  721.     }
  722.  
  723.     /*
  724.      * Now the destination address space area has been
  725.      * fully initialized. Clear the AS_AREA_ATTR_PARTIAL
  726.      * attribute and set the sh_info.
  727.      */
  728.     mutex_lock(&dst_as->lock); 
  729.     mutex_lock(&dst_area->lock);
  730.     dst_area->attributes &= ~AS_AREA_ATTR_PARTIAL;
  731.     dst_area->sh_info = sh_info;
  732.     mutex_unlock(&dst_area->lock);
  733.     mutex_unlock(&dst_as->lock);   
  734.  
  735.     interrupts_restore(ipl);
  736.    
  737.     return 0;
  738. }
  739.  
  740. /** Check access mode for address space area.
  741.  *
  742.  * The address space area must be locked prior to this call.
  743.  *
  744.  * @param area      Address space area.
  745.  * @param access    Access mode.
  746.  *
  747.  * @return      False if access violates area's permissions, true
  748.  *          otherwise.
  749.  */
  750. bool as_area_check_access(as_area_t *area, pf_access_t access)
  751. {
  752.     int flagmap[] = {
  753.         [PF_ACCESS_READ] = AS_AREA_READ,
  754.         [PF_ACCESS_WRITE] = AS_AREA_WRITE,
  755.         [PF_ACCESS_EXEC] = AS_AREA_EXEC
  756.     };
  757.  
  758.     if (!(area->flags & flagmap[access]))
  759.         return false;
  760.    
  761.     return true;
  762. }
  763.  
  764. /** Change adress space area flags.
  765.  *
  766.  * The idea is to have the same data, but with a different access mode.
  767.  * This is needed e.g. for writing code into memory and then executing it.
  768.  * In order for this to work properly, this may copy the data
  769.  * into private anonymous memory (unless it's already there).
  770.  *
  771.  * @param as        Address space.
  772.  * @param flags     Flags of the area memory.
  773.  * @param address   Address withing the area to be changed.
  774.  *
  775.  * @return      Zero on success or a value from @ref errno.h on failure.
  776.  */
  777. int as_area_change_flags(as_t *as, int flags, uintptr_t address)
  778. {
  779.     as_area_t *area;
  780.     uintptr_t base;
  781.     link_t *cur;
  782.     ipl_t ipl;
  783.     int page_flags;
  784.     uintptr_t *old_frame;
  785.     index_t frame_idx;
  786.     count_t used_pages;
  787.  
  788.     /* Flags for the new memory mapping */
  789.     page_flags = area_flags_to_page_flags(flags);
  790.  
  791.     ipl = interrupts_disable();
  792.     mutex_lock(&as->lock);
  793.  
  794.     area = find_area_and_lock(as, address);
  795.     if (!area) {
  796.         mutex_unlock(&as->lock);
  797.         interrupts_restore(ipl);
  798.         return ENOENT;
  799.     }
  800.  
  801.     if (area->sh_info || area->backend != &anon_backend) {
  802.         /* Copying shared areas not supported yet */
  803.         /* Copying non-anonymous memory not supported yet */
  804.         mutex_unlock(&area->lock);
  805.         mutex_unlock(&as->lock);
  806.         interrupts_restore(ipl);
  807.         return ENOTSUP;
  808.     }
  809.  
  810.     base = area->base;
  811.  
  812.     /*
  813.      * Compute total number of used pages in the used_space B+tree
  814.      */
  815.     used_pages = 0;
  816.  
  817.     for (cur = area->used_space.leaf_head.next;
  818.         cur != &area->used_space.leaf_head; cur = cur->next) {
  819.         btree_node_t *node;
  820.         unsigned int i;
  821.        
  822.         node = list_get_instance(cur, btree_node_t, leaf_link);
  823.         for (i = 0; i < node->keys; i++) {
  824.             used_pages += (count_t) node->value[i];
  825.         }
  826.     }
  827.  
  828.     /* An array for storing frame numbers */
  829.     old_frame = malloc(used_pages * sizeof(uintptr_t), 0);
  830.  
  831.     /*
  832.      * Start TLB shootdown sequence.
  833.      */
  834.     tlb_shootdown_start(TLB_INVL_PAGES, as->asid, area->base, area->pages);
  835.  
  836.     /*
  837.      * Remove used pages from page tables and remember their frame
  838.      * numbers.
  839.      */
  840.     frame_idx = 0;
  841.  
  842.     for (cur = area->used_space.leaf_head.next;
  843.         cur != &area->used_space.leaf_head; cur = cur->next) {
  844.         btree_node_t *node;
  845.         unsigned int i;
  846.        
  847.         node = list_get_instance(cur, btree_node_t, leaf_link);
  848.         for (i = 0; i < node->keys; i++) {
  849.             uintptr_t b = node->key[i];
  850.             count_t j;
  851.             pte_t *pte;
  852.            
  853.             for (j = 0; j < (count_t) node->value[i]; j++) {
  854.                 page_table_lock(as, false);
  855.                 pte = page_mapping_find(as, b + j * PAGE_SIZE);
  856.                 ASSERT(pte && PTE_VALID(pte) &&
  857.                     PTE_PRESENT(pte));
  858.                 old_frame[frame_idx++] = PTE_GET_FRAME(pte);
  859.  
  860.                 /* Remove old mapping */
  861.                 page_mapping_remove(as, b + j * PAGE_SIZE);
  862.                 page_table_unlock(as, false);
  863.             }
  864.         }
  865.     }
  866.  
  867.     /*
  868.      * Finish TLB shootdown sequence.
  869.      */
  870.  
  871.     tlb_invalidate_pages(as->asid, area->base, area->pages);
  872.     /*
  873.      * Invalidate potential software translation caches (e.g. TSB on
  874.      * sparc64).
  875.      */
  876.     as_invalidate_translation_cache(as, area->base, area->pages);
  877.     tlb_shootdown_finalize();
  878.  
  879.     /*
  880.      * Set the new flags.
  881.      */
  882.     area->flags = flags;
  883.  
  884.     /*
  885.      * Map pages back in with new flags. This step is kept separate
  886.      * so that the memory area could not be accesed with both the old and
  887.      * the new flags at once.
  888.      */
  889.     frame_idx = 0;
  890.  
  891.     for (cur = area->used_space.leaf_head.next;
  892.         cur != &area->used_space.leaf_head; cur = cur->next) {
  893.         btree_node_t *node;
  894.         unsigned int i;
  895.        
  896.         node = list_get_instance(cur, btree_node_t, leaf_link);
  897.         for (i = 0; i < node->keys; i++) {
  898.             uintptr_t b = node->key[i];
  899.             count_t j;
  900.            
  901.             for (j = 0; j < (count_t) node->value[i]; j++) {
  902.                 page_table_lock(as, false);
  903.  
  904.                 /* Insert the new mapping */
  905.                 page_mapping_insert(as, b + j * PAGE_SIZE,
  906.                     old_frame[frame_idx++], page_flags);
  907.  
  908.                 page_table_unlock(as, false);
  909.             }
  910.         }
  911.     }
  912.  
  913.     free(old_frame);
  914.  
  915.     mutex_unlock(&area->lock);
  916.     mutex_unlock(&as->lock);
  917.     interrupts_restore(ipl);
  918.  
  919.     return 0;
  920. }
  921.  
  922.  
  923. /** Handle page fault within the current address space.
  924.  *
  925.  * This is the high-level page fault handler. It decides whether the page fault
  926.  * can be resolved by any backend and if so, it invokes the backend to resolve
  927.  * the page fault.
  928.  *
  929.  * Interrupts are assumed disabled.
  930.  *
  931.  * @param page      Faulting page.
  932.  * @param access    Access mode that caused the page fault (i.e.
  933.  *          read/write/exec).
  934.  * @param istate    Pointer to the interrupted state.
  935.  *
  936.  * @return      AS_PF_FAULT on page fault, AS_PF_OK on success or
  937.  *          AS_PF_DEFER if the fault was caused by copy_to_uspace()
  938.  *          or copy_from_uspace().
  939.  */
  940. int as_page_fault(uintptr_t page, pf_access_t access, istate_t *istate)
  941. {
  942.     pte_t *pte;
  943.     as_area_t *area;
  944.    
  945.     if (!THREAD)
  946.         return AS_PF_FAULT;
  947.        
  948.     ASSERT(AS);
  949.  
  950.     mutex_lock(&AS->lock);
  951.     area = find_area_and_lock(AS, page);   
  952.     if (!area) {
  953.         /*
  954.          * No area contained mapping for 'page'.
  955.          * Signal page fault to low-level handler.
  956.          */
  957.         mutex_unlock(&AS->lock);
  958.         goto page_fault;
  959.     }
  960.  
  961.     if (area->attributes & AS_AREA_ATTR_PARTIAL) {
  962.         /*
  963.          * The address space area is not fully initialized.
  964.          * Avoid possible race by returning error.
  965.          */
  966.         mutex_unlock(&area->lock);
  967.         mutex_unlock(&AS->lock);
  968.         goto page_fault;       
  969.     }
  970.  
  971.     if (!area->backend || !area->backend->page_fault) {
  972.         /*
  973.          * The address space area is not backed by any backend
  974.          * or the backend cannot handle page faults.
  975.          */
  976.         mutex_unlock(&area->lock);
  977.         mutex_unlock(&AS->lock);
  978.         goto page_fault;       
  979.     }
  980.  
  981.     page_table_lock(AS, false);
  982.    
  983.     /*
  984.      * To avoid race condition between two page faults on the same address,
  985.      * we need to make sure the mapping has not been already inserted.
  986.      */
  987.     if ((pte = page_mapping_find(AS, page))) {
  988.         if (PTE_PRESENT(pte)) {
  989.             if (((access == PF_ACCESS_READ) && PTE_READABLE(pte)) ||
  990.                 (access == PF_ACCESS_WRITE && PTE_WRITABLE(pte)) ||
  991.                 (access == PF_ACCESS_EXEC && PTE_EXECUTABLE(pte))) {
  992.                 page_table_unlock(AS, false);
  993.                 mutex_unlock(&area->lock);
  994.                 mutex_unlock(&AS->lock);
  995.                 return AS_PF_OK;
  996.             }
  997.         }
  998.     }
  999.    
  1000.     /*
  1001.      * Resort to the backend page fault handler.
  1002.      */
  1003.     if (area->backend->page_fault(area, page, access) != AS_PF_OK) {
  1004.         page_table_unlock(AS, false);
  1005.         mutex_unlock(&area->lock);
  1006.         mutex_unlock(&AS->lock);
  1007.         goto page_fault;
  1008.     }
  1009.    
  1010.     page_table_unlock(AS, false);
  1011.     mutex_unlock(&area->lock);
  1012.     mutex_unlock(&AS->lock);
  1013.     return AS_PF_OK;
  1014.  
  1015. page_fault:
  1016.     if (THREAD->in_copy_from_uspace) {
  1017.         THREAD->in_copy_from_uspace = false;
  1018.         istate_set_retaddr(istate,
  1019.             (uintptr_t) &memcpy_from_uspace_failover_address);
  1020.     } else if (THREAD->in_copy_to_uspace) {
  1021.         THREAD->in_copy_to_uspace = false;
  1022.         istate_set_retaddr(istate,
  1023.             (uintptr_t) &memcpy_to_uspace_failover_address);
  1024.     } else {
  1025.         return AS_PF_FAULT;
  1026.     }
  1027.  
  1028.     return AS_PF_DEFER;
  1029. }
  1030.  
  1031. /** Switch address spaces.
  1032.  *
  1033.  * Note that this function cannot sleep as it is essentially a part of
  1034.  * scheduling. Sleeping here would lead to deadlock on wakeup. Another
  1035.  * thing which is forbidden in this context is locking the address space.
  1036.  *
  1037.  * When this function is enetered, no spinlocks may be held.
  1038.  *
  1039.  * @param old       Old address space or NULL.
  1040.  * @param new       New address space.
  1041.  */
  1042. void as_switch(as_t *old_as, as_t *new_as)
  1043. {
  1044.     DEADLOCK_PROBE_INIT(p_asidlock);
  1045.     preemption_disable();
  1046. retry:
  1047.     (void) interrupts_disable();
  1048.     if (!spinlock_trylock(&asidlock)) {
  1049.         /*
  1050.          * Avoid deadlock with TLB shootdown.
  1051.          * We can enable interrupts here because
  1052.          * preemption is disabled. We should not be
  1053.          * holding any other lock.
  1054.          */
  1055.         (void) interrupts_enable();
  1056.         DEADLOCK_PROBE(p_asidlock, DEADLOCK_THRESHOLD);
  1057.         goto retry;
  1058.     }
  1059.     preemption_enable();
  1060.  
  1061.     /*
  1062.      * First, take care of the old address space.
  1063.      */
  1064.     if (old_as) {
  1065.         ASSERT(old_as->cpu_refcount);
  1066.         if((--old_as->cpu_refcount == 0) && (old_as != AS_KERNEL)) {
  1067.             /*
  1068.              * The old address space is no longer active on
  1069.              * any processor. It can be appended to the
  1070.              * list of inactive address spaces with assigned
  1071.              * ASID.
  1072.              */
  1073.             ASSERT(old_as->asid != ASID_INVALID);
  1074.             list_append(&old_as->inactive_as_with_asid_link,
  1075.                 &inactive_as_with_asid_head);
  1076.         }
  1077.  
  1078.         /*
  1079.          * Perform architecture-specific tasks when the address space
  1080.          * is being removed from the CPU.
  1081.          */
  1082.         as_deinstall_arch(old_as);
  1083.     }
  1084.  
  1085.     /*
  1086.      * Second, prepare the new address space.
  1087.      */
  1088.     if ((new_as->cpu_refcount++ == 0) && (new_as != AS_KERNEL)) {
  1089.         if (new_as->asid != ASID_INVALID)
  1090.             list_remove(&new_as->inactive_as_with_asid_link);
  1091.         else
  1092.             new_as->asid = asid_get();
  1093.     }
  1094. #ifdef AS_PAGE_TABLE
  1095.     SET_PTL0_ADDRESS(new_as->genarch.page_table);
  1096. #endif
  1097.    
  1098.     /*
  1099.      * Perform architecture-specific steps.
  1100.      * (e.g. write ASID to hardware register etc.)
  1101.      */
  1102.     as_install_arch(new_as);
  1103.  
  1104.     spinlock_unlock(&asidlock);
  1105.    
  1106.     AS = new_as;
  1107. }
  1108.  
  1109. /** Convert address space area flags to page flags.
  1110.  *
  1111.  * @param aflags    Flags of some address space area.
  1112.  *
  1113.  * @return      Flags to be passed to page_mapping_insert().
  1114.  */
  1115. int area_flags_to_page_flags(int aflags)
  1116. {
  1117.     int flags;
  1118.  
  1119.     flags = PAGE_USER | PAGE_PRESENT;
  1120.    
  1121.     if (aflags & AS_AREA_READ)
  1122.         flags |= PAGE_READ;
  1123.        
  1124.     if (aflags & AS_AREA_WRITE)
  1125.         flags |= PAGE_WRITE;
  1126.    
  1127.     if (aflags & AS_AREA_EXEC)
  1128.         flags |= PAGE_EXEC;
  1129.    
  1130.     if (aflags & AS_AREA_CACHEABLE)
  1131.         flags |= PAGE_CACHEABLE;
  1132.        
  1133.     return flags;
  1134. }
  1135.  
  1136. /** Compute flags for virtual address translation subsytem.
  1137.  *
  1138.  * The address space area must be locked.
  1139.  * Interrupts must be disabled.
  1140.  *
  1141.  * @param a     Address space area.
  1142.  *
  1143.  * @return      Flags to be used in page_mapping_insert().
  1144.  */
  1145. int as_area_get_flags(as_area_t *a)
  1146. {
  1147.     return area_flags_to_page_flags(a->flags);
  1148. }
  1149.  
  1150. /** Create page table.
  1151.  *
  1152.  * Depending on architecture, create either address space private or global page
  1153.  * table.
  1154.  *
  1155.  * @param flags     Flags saying whether the page table is for the kernel
  1156.  *          address space.
  1157.  *
  1158.  * @return      First entry of the page table.
  1159.  */
  1160. pte_t *page_table_create(int flags)
  1161. {
  1162.     ASSERT(as_operations);
  1163.     ASSERT(as_operations->page_table_create);
  1164.    
  1165.     return as_operations->page_table_create(flags);
  1166. }
  1167.  
  1168. /** Destroy page table.
  1169.  *
  1170.  * Destroy page table in architecture specific way.
  1171.  *
  1172.  * @param page_table    Physical address of PTL0.
  1173.  */
  1174. void page_table_destroy(pte_t *page_table)
  1175. {
  1176.     ASSERT(as_operations);
  1177.     ASSERT(as_operations->page_table_destroy);
  1178.    
  1179.     as_operations->page_table_destroy(page_table);
  1180. }
  1181.  
  1182. /** Lock page table.
  1183.  *
  1184.  * This function should be called before any page_mapping_insert(),
  1185.  * page_mapping_remove() and page_mapping_find().
  1186.  *
  1187.  * Locking order is such that address space areas must be locked
  1188.  * prior to this call. Address space can be locked prior to this
  1189.  * call in which case the lock argument is false.
  1190.  *
  1191.  * @param as        Address space.
  1192.  * @param lock      If false, do not attempt to lock as->lock.
  1193.  */
  1194. void page_table_lock(as_t *as, bool lock)
  1195. {
  1196.     ASSERT(as_operations);
  1197.     ASSERT(as_operations->page_table_lock);
  1198.    
  1199.     as_operations->page_table_lock(as, lock);
  1200. }
  1201.  
  1202. /** Unlock page table.
  1203.  *
  1204.  * @param as        Address space.
  1205.  * @param unlock    If false, do not attempt to unlock as->lock.
  1206.  */
  1207. void page_table_unlock(as_t *as, bool unlock)
  1208. {
  1209.     ASSERT(as_operations);
  1210.     ASSERT(as_operations->page_table_unlock);
  1211.    
  1212.     as_operations->page_table_unlock(as, unlock);
  1213. }
  1214.  
  1215.  
  1216. /** Find address space area and lock it.
  1217.  *
  1218.  * The address space must be locked and interrupts must be disabled.
  1219.  *
  1220.  * @param as        Address space.
  1221.  * @param va        Virtual address.
  1222.  *
  1223.  * @return      Locked address space area containing va on success or
  1224.  *          NULL on failure.
  1225.  */
  1226. as_area_t *find_area_and_lock(as_t *as, uintptr_t va)
  1227. {
  1228.     as_area_t *a;
  1229.     btree_node_t *leaf, *lnode;
  1230.     unsigned int i;
  1231.    
  1232.     a = (as_area_t *) btree_search(&as->as_area_btree, va, &leaf);
  1233.     if (a) {
  1234.         /* va is the base address of an address space area */
  1235.         mutex_lock(&a->lock);
  1236.         return a;
  1237.     }
  1238.    
  1239.     /*
  1240.      * Search the leaf node and the righmost record of its left neighbour
  1241.      * to find out whether this is a miss or va belongs to an address
  1242.      * space area found there.
  1243.      */
  1244.    
  1245.     /* First, search the leaf node itself. */
  1246.     for (i = 0; i < leaf->keys; i++) {
  1247.         a = (as_area_t *) leaf->value[i];
  1248.         mutex_lock(&a->lock);
  1249.         if ((a->base <= va) && (va < a->base + a->pages * PAGE_SIZE)) {
  1250.             return a;
  1251.         }
  1252.         mutex_unlock(&a->lock);
  1253.     }
  1254.  
  1255.     /*
  1256.      * Second, locate the left neighbour and test its last record.
  1257.      * Because of its position in the B+tree, it must have base < va.
  1258.      */
  1259.     lnode = btree_leaf_node_left_neighbour(&as->as_area_btree, leaf);
  1260.     if (lnode) {
  1261.         a = (as_area_t *) lnode->value[lnode->keys - 1];
  1262.         mutex_lock(&a->lock);
  1263.         if (va < a->base + a->pages * PAGE_SIZE) {
  1264.             return a;
  1265.         }
  1266.         mutex_unlock(&a->lock);
  1267.     }
  1268.  
  1269.     return NULL;
  1270. }
  1271.  
  1272. /** Check area conflicts with other areas.
  1273.  *
  1274.  * The address space must be locked and interrupts must be disabled.
  1275.  *
  1276.  * @param as        Address space.
  1277.  * @param va        Starting virtual address of the area being tested.
  1278.  * @param size      Size of the area being tested.
  1279.  * @param avoid_area    Do not touch this area.
  1280.  *
  1281.  * @return      True if there is no conflict, false otherwise.
  1282.  */
  1283. bool
  1284. check_area_conflicts(as_t *as, uintptr_t va, size_t size, as_area_t *avoid_area)
  1285. {
  1286.     as_area_t *a;
  1287.     btree_node_t *leaf, *node;
  1288.     unsigned int i;
  1289.    
  1290.     /*
  1291.      * We don't want any area to have conflicts with NULL page.
  1292.      */
  1293.     if (overlaps(va, size, NULL, PAGE_SIZE))
  1294.         return false;
  1295.    
  1296.     /*
  1297.      * The leaf node is found in O(log n), where n is proportional to
  1298.      * the number of address space areas belonging to as.
  1299.      * The check for conflicts is then attempted on the rightmost
  1300.      * record in the left neighbour, the leftmost record in the right
  1301.      * neighbour and all records in the leaf node itself.
  1302.      */
  1303.    
  1304.     if ((a = (as_area_t *) btree_search(&as->as_area_btree, va, &leaf))) {
  1305.         if (a != avoid_area)
  1306.             return false;
  1307.     }
  1308.    
  1309.     /* First, check the two border cases. */
  1310.     if ((node = btree_leaf_node_left_neighbour(&as->as_area_btree, leaf))) {
  1311.         a = (as_area_t *) node->value[node->keys - 1];
  1312.         mutex_lock(&a->lock);
  1313.         if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) {
  1314.             mutex_unlock(&a->lock);
  1315.             return false;
  1316.         }
  1317.         mutex_unlock(&a->lock);
  1318.     }
  1319.     node = btree_leaf_node_right_neighbour(&as->as_area_btree, leaf);
  1320.     if (node) {
  1321.         a = (as_area_t *) node->value[0];
  1322.         mutex_lock(&a->lock);
  1323.         if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) {
  1324.             mutex_unlock(&a->lock);
  1325.             return false;
  1326.         }
  1327.         mutex_unlock(&a->lock);
  1328.     }
  1329.    
  1330.     /* Second, check the leaf node. */
  1331.     for (i = 0; i < leaf->keys; i++) {
  1332.         a = (as_area_t *) leaf->value[i];
  1333.    
  1334.         if (a == avoid_area)
  1335.             continue;
  1336.    
  1337.         mutex_lock(&a->lock);
  1338.         if (overlaps(va, size, a->base, a->pages * PAGE_SIZE)) {
  1339.             mutex_unlock(&a->lock);
  1340.             return false;
  1341.         }
  1342.         mutex_unlock(&a->lock);
  1343.     }
  1344.  
  1345.     /*
  1346.      * So far, the area does not conflict with other areas.
  1347.      * Check if it doesn't conflict with kernel address space.
  1348.      */  
  1349.     if (!KERNEL_ADDRESS_SPACE_SHADOWED) {
  1350.         return !overlaps(va, size,
  1351.             KERNEL_ADDRESS_SPACE_START,
  1352.             KERNEL_ADDRESS_SPACE_END - KERNEL_ADDRESS_SPACE_START);
  1353.     }
  1354.  
  1355.     return true;
  1356. }
  1357.  
  1358. /** Return size of the address space area with given base.
  1359.  *
  1360.  * @param base      Arbitrary address insede the address space area.
  1361.  *
  1362.  * @return      Size of the address space area in bytes or zero if it
  1363.  *          does not exist.
  1364.  */
  1365. size_t as_area_get_size(uintptr_t base)
  1366. {
  1367.     ipl_t ipl;
  1368.     as_area_t *src_area;
  1369.     size_t size;
  1370.  
  1371.     ipl = interrupts_disable();
  1372.     src_area = find_area_and_lock(AS, base);
  1373.     if (src_area) {
  1374.         size = src_area->pages * PAGE_SIZE;
  1375.         mutex_unlock(&src_area->lock);
  1376.     } else {
  1377.         size = 0;
  1378.     }
  1379.     interrupts_restore(ipl);
  1380.     return size;
  1381. }
  1382.  
  1383. /** Mark portion of address space area as used.
  1384.  *
  1385.  * The address space area must be already locked.
  1386.  *
  1387.  * @param a     Address space area.
  1388.  * @param page      First page to be marked.
  1389.  * @param count     Number of page to be marked.
  1390.  *
  1391.  * @return      Zero on failure and non-zero on success.
  1392.  */
  1393. int used_space_insert(as_area_t *a, uintptr_t page, count_t count)
  1394. {
  1395.     btree_node_t *leaf, *node;
  1396.     count_t pages;
  1397.     unsigned int i;
  1398.  
  1399.     ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
  1400.     ASSERT(count);
  1401.  
  1402.     pages = (count_t) btree_search(&a->used_space, page, &leaf);
  1403.     if (pages) {
  1404.         /*
  1405.          * We hit the beginning of some used space.
  1406.          */
  1407.         return 0;
  1408.     }
  1409.  
  1410.     if (!leaf->keys) {
  1411.         btree_insert(&a->used_space, page, (void *) count, leaf);
  1412.         return 1;
  1413.     }
  1414.  
  1415.     node = btree_leaf_node_left_neighbour(&a->used_space, leaf);
  1416.     if (node) {
  1417.         uintptr_t left_pg = node->key[node->keys - 1];
  1418.         uintptr_t right_pg = leaf->key[0];
  1419.         count_t left_cnt = (count_t) node->value[node->keys - 1];
  1420.         count_t right_cnt = (count_t) leaf->value[0];
  1421.        
  1422.         /*
  1423.          * Examine the possibility that the interval fits
  1424.          * somewhere between the rightmost interval of
  1425.          * the left neigbour and the first interval of the leaf.
  1426.          */
  1427.          
  1428.         if (page >= right_pg) {
  1429.             /* Do nothing. */
  1430.         } else if (overlaps(page, count * PAGE_SIZE, left_pg,
  1431.             left_cnt * PAGE_SIZE)) {
  1432.             /* The interval intersects with the left interval. */
  1433.             return 0;
  1434.         } else if (overlaps(page, count * PAGE_SIZE, right_pg,
  1435.             right_cnt * PAGE_SIZE)) {
  1436.             /* The interval intersects with the right interval. */
  1437.             return 0;          
  1438.         } else if ((page == left_pg + left_cnt * PAGE_SIZE) &&
  1439.             (page + count * PAGE_SIZE == right_pg)) {
  1440.             /*
  1441.              * The interval can be added by merging the two already
  1442.              * present intervals.
  1443.              */
  1444.             node->value[node->keys - 1] += count + right_cnt;
  1445.             btree_remove(&a->used_space, right_pg, leaf);
  1446.             return 1;
  1447.         } else if (page == left_pg + left_cnt * PAGE_SIZE) {
  1448.             /*
  1449.              * The interval can be added by simply growing the left
  1450.              * interval.
  1451.              */
  1452.             node->value[node->keys - 1] += count;
  1453.             return 1;
  1454.         } else if (page + count * PAGE_SIZE == right_pg) {
  1455.             /*
  1456.              * The interval can be addded by simply moving base of
  1457.              * the right interval down and increasing its size
  1458.              * accordingly.
  1459.              */
  1460.             leaf->value[0] += count;
  1461.             leaf->key[0] = page;
  1462.             return 1;
  1463.         } else {
  1464.             /*
  1465.              * The interval is between both neigbouring intervals,
  1466.              * but cannot be merged with any of them.
  1467.              */
  1468.             btree_insert(&a->used_space, page, (void *) count,
  1469.                 leaf);
  1470.             return 1;
  1471.         }
  1472.     } else if (page < leaf->key[0]) {
  1473.         uintptr_t right_pg = leaf->key[0];
  1474.         count_t right_cnt = (count_t) leaf->value[0];
  1475.    
  1476.         /*
  1477.          * Investigate the border case in which the left neighbour does
  1478.          * not exist but the interval fits from the left.
  1479.          */
  1480.          
  1481.         if (overlaps(page, count * PAGE_SIZE, right_pg,
  1482.             right_cnt * PAGE_SIZE)) {
  1483.             /* The interval intersects with the right interval. */
  1484.             return 0;
  1485.         } else if (page + count * PAGE_SIZE == right_pg) {
  1486.             /*
  1487.              * The interval can be added by moving the base of the
  1488.              * right interval down and increasing its size
  1489.              * accordingly.
  1490.              */
  1491.             leaf->key[0] = page;
  1492.             leaf->value[0] += count;
  1493.             return 1;
  1494.         } else {
  1495.             /*
  1496.              * The interval doesn't adjoin with the right interval.
  1497.              * It must be added individually.
  1498.              */
  1499.             btree_insert(&a->used_space, page, (void *) count,
  1500.                 leaf);
  1501.             return 1;
  1502.         }
  1503.     }
  1504.  
  1505.     node = btree_leaf_node_right_neighbour(&a->used_space, leaf);
  1506.     if (node) {
  1507.         uintptr_t left_pg = leaf->key[leaf->keys - 1];
  1508.         uintptr_t right_pg = node->key[0];
  1509.         count_t left_cnt = (count_t) leaf->value[leaf->keys - 1];
  1510.         count_t right_cnt = (count_t) node->value[0];
  1511.        
  1512.         /*
  1513.          * Examine the possibility that the interval fits
  1514.          * somewhere between the leftmost interval of
  1515.          * the right neigbour and the last interval of the leaf.
  1516.          */
  1517.  
  1518.         if (page < left_pg) {
  1519.             /* Do nothing. */
  1520.         } else if (overlaps(page, count * PAGE_SIZE, left_pg,
  1521.             left_cnt * PAGE_SIZE)) {
  1522.             /* The interval intersects with the left interval. */
  1523.             return 0;
  1524.         } else if (overlaps(page, count * PAGE_SIZE, right_pg,
  1525.             right_cnt * PAGE_SIZE)) {
  1526.             /* The interval intersects with the right interval. */
  1527.             return 0;          
  1528.         } else if ((page == left_pg + left_cnt * PAGE_SIZE) &&
  1529.             (page + count * PAGE_SIZE == right_pg)) {
  1530.             /*
  1531.              * The interval can be added by merging the two already
  1532.              * present intervals.
  1533.              * */
  1534.             leaf->value[leaf->keys - 1] += count + right_cnt;
  1535.             btree_remove(&a->used_space, right_pg, node);
  1536.             return 1;
  1537.         } else if (page == left_pg + left_cnt * PAGE_SIZE) {
  1538.             /*
  1539.              * The interval can be added by simply growing the left
  1540.              * interval.
  1541.              * */
  1542.             leaf->value[leaf->keys - 1] +=  count;
  1543.             return 1;
  1544.         } else if (page + count * PAGE_SIZE == right_pg) {
  1545.             /*
  1546.              * The interval can be addded by simply moving base of
  1547.              * the right interval down and increasing its size
  1548.              * accordingly.
  1549.              */
  1550.             node->value[0] += count;
  1551.             node->key[0] = page;
  1552.             return 1;
  1553.         } else {
  1554.             /*
  1555.              * The interval is between both neigbouring intervals,
  1556.              * but cannot be merged with any of them.
  1557.              */
  1558.             btree_insert(&a->used_space, page, (void *) count,
  1559.                 leaf);
  1560.             return 1;
  1561.         }
  1562.     } else if (page >= leaf->key[leaf->keys - 1]) {
  1563.         uintptr_t left_pg = leaf->key[leaf->keys - 1];
  1564.         count_t left_cnt = (count_t) leaf->value[leaf->keys - 1];
  1565.    
  1566.         /*
  1567.          * Investigate the border case in which the right neighbour
  1568.          * does not exist but the interval fits from the right.
  1569.          */
  1570.          
  1571.         if (overlaps(page, count * PAGE_SIZE, left_pg,
  1572.             left_cnt * PAGE_SIZE)) {
  1573.             /* The interval intersects with the left interval. */
  1574.             return 0;
  1575.         } else if (left_pg + left_cnt * PAGE_SIZE == page) {
  1576.             /*
  1577.              * The interval can be added by growing the left
  1578.              * interval.
  1579.              */
  1580.             leaf->value[leaf->keys - 1] += count;
  1581.             return 1;
  1582.         } else {
  1583.             /*
  1584.              * The interval doesn't adjoin with the left interval.
  1585.              * It must be added individually.
  1586.              */
  1587.             btree_insert(&a->used_space, page, (void *) count,
  1588.                 leaf);
  1589.             return 1;
  1590.         }
  1591.     }
  1592.    
  1593.     /*
  1594.      * Note that if the algorithm made it thus far, the interval can fit
  1595.      * only between two other intervals of the leaf. The two border cases
  1596.      * were already resolved.
  1597.      */
  1598.     for (i = 1; i < leaf->keys; i++) {
  1599.         if (page < leaf->key[i]) {
  1600.             uintptr_t left_pg = leaf->key[i - 1];
  1601.             uintptr_t right_pg = leaf->key[i];
  1602.             count_t left_cnt = (count_t) leaf->value[i - 1];
  1603.             count_t right_cnt = (count_t) leaf->value[i];
  1604.  
  1605.             /*
  1606.              * The interval fits between left_pg and right_pg.
  1607.              */
  1608.  
  1609.             if (overlaps(page, count * PAGE_SIZE, left_pg,
  1610.                 left_cnt * PAGE_SIZE)) {
  1611.                 /*
  1612.                  * The interval intersects with the left
  1613.                  * interval.
  1614.                  */
  1615.                 return 0;
  1616.             } else if (overlaps(page, count * PAGE_SIZE, right_pg,
  1617.                 right_cnt * PAGE_SIZE)) {
  1618.                 /*
  1619.                  * The interval intersects with the right
  1620.                  * interval.
  1621.                  */
  1622.                 return 0;          
  1623.             } else if ((page == left_pg + left_cnt * PAGE_SIZE) &&
  1624.                 (page + count * PAGE_SIZE == right_pg)) {
  1625.                 /*
  1626.                  * The interval can be added by merging the two
  1627.                  * already present intervals.
  1628.                  */
  1629.                 leaf->value[i - 1] += count + right_cnt;
  1630.                 btree_remove(&a->used_space, right_pg, leaf);
  1631.                 return 1;
  1632.             } else if (page == left_pg + left_cnt * PAGE_SIZE) {
  1633.                 /*
  1634.                  * The interval can be added by simply growing
  1635.                  * the left interval.
  1636.                  */
  1637.                 leaf->value[i - 1] += count;
  1638.                 return 1;
  1639.             } else if (page + count * PAGE_SIZE == right_pg) {
  1640.                 /*
  1641.                      * The interval can be addded by simply moving
  1642.                  * base of the right interval down and
  1643.                  * increasing its size accordingly.
  1644.                  */
  1645.                 leaf->value[i] += count;
  1646.                 leaf->key[i] = page;
  1647.                 return 1;
  1648.             } else {
  1649.                 /*
  1650.                  * The interval is between both neigbouring
  1651.                  * intervals, but cannot be merged with any of
  1652.                  * them.
  1653.                  */
  1654.                 btree_insert(&a->used_space, page,
  1655.                     (void *) count, leaf);
  1656.                 return 1;
  1657.             }
  1658.         }
  1659.     }
  1660.  
  1661.     panic("Inconsistency detected while adding %" PRIc " pages of used "
  1662.         "space at %p.\n", count, page);
  1663. }
  1664.  
  1665. /** Mark portion of address space area as unused.
  1666.  *
  1667.  * The address space area must be already locked.
  1668.  *
  1669.  * @param a     Address space area.
  1670.  * @param page      First page to be marked.
  1671.  * @param count     Number of page to be marked.
  1672.  *
  1673.  * @return      Zero on failure and non-zero on success.
  1674.  */
  1675. int used_space_remove(as_area_t *a, uintptr_t page, count_t count)
  1676. {
  1677.     btree_node_t *leaf, *node;
  1678.     count_t pages;
  1679.     unsigned int i;
  1680.  
  1681.     ASSERT(page == ALIGN_DOWN(page, PAGE_SIZE));
  1682.     ASSERT(count);
  1683.  
  1684.     pages = (count_t) btree_search(&a->used_space, page, &leaf);
  1685.     if (pages) {
  1686.         /*
  1687.          * We are lucky, page is the beginning of some interval.
  1688.          */
  1689.         if (count > pages) {
  1690.             return 0;
  1691.         } else if (count == pages) {
  1692.             btree_remove(&a->used_space, page, leaf);
  1693.             return 1;
  1694.         } else {
  1695.             /*
  1696.              * Find the respective interval.
  1697.              * Decrease its size and relocate its start address.
  1698.              */
  1699.             for (i = 0; i < leaf->keys; i++) {
  1700.                 if (leaf->key[i] == page) {
  1701.                     leaf->key[i] += count * PAGE_SIZE;
  1702.                     leaf->value[i] -= count;
  1703.                     return 1;
  1704.                 }
  1705.             }
  1706.             goto error;
  1707.         }
  1708.     }
  1709.  
  1710.     node = btree_leaf_node_left_neighbour(&a->used_space, leaf);
  1711.     if (node && page < leaf->key[0]) {
  1712.         uintptr_t left_pg = node->key[node->keys - 1];
  1713.         count_t left_cnt = (count_t) node->value[node->keys - 1];
  1714.  
  1715.         if (overlaps(left_pg, left_cnt * PAGE_SIZE, page,
  1716.             count * PAGE_SIZE)) {
  1717.             if (page + count * PAGE_SIZE ==
  1718.                 left_pg + left_cnt * PAGE_SIZE) {
  1719.                 /*
  1720.                  * The interval is contained in the rightmost
  1721.                  * interval of the left neighbour and can be
  1722.                  * removed by updating the size of the bigger
  1723.                  * interval.
  1724.                  */
  1725.                 node->value[node->keys - 1] -= count;
  1726.                 return 1;
  1727.             } else if (page + count * PAGE_SIZE <
  1728.                 left_pg + left_cnt*PAGE_SIZE) {
  1729.                 count_t new_cnt;
  1730.                
  1731.                 /*
  1732.                  * The interval is contained in the rightmost
  1733.                  * interval of the left neighbour but its
  1734.                  * removal requires both updating the size of
  1735.                  * the original interval and also inserting a
  1736.                  * new interval.
  1737.                  */
  1738.                 new_cnt = ((left_pg + left_cnt * PAGE_SIZE) -
  1739.                     (page + count*PAGE_SIZE)) >> PAGE_WIDTH;
  1740.                 node->value[node->keys - 1] -= count + new_cnt;
  1741.                 btree_insert(&a->used_space, page +
  1742.                     count * PAGE_SIZE, (void *) new_cnt, leaf);
  1743.                 return 1;
  1744.             }
  1745.         }
  1746.         return 0;
  1747.     } else if (page < leaf->key[0]) {
  1748.         return 0;
  1749.     }
  1750.    
  1751.     if (page > leaf->key[leaf->keys - 1]) {
  1752.         uintptr_t left_pg = leaf->key[leaf->keys - 1];
  1753.         count_t left_cnt = (count_t) leaf->value[leaf->keys - 1];
  1754.  
  1755.         if (overlaps(left_pg, left_cnt * PAGE_SIZE, page,
  1756.             count * PAGE_SIZE)) {
  1757.             if (page + count * PAGE_SIZE ==
  1758.                 left_pg + left_cnt * PAGE_SIZE) {
  1759.                 /*
  1760.                  * The interval is contained in the rightmost
  1761.                  * interval of the leaf and can be removed by
  1762.                  * updating the size of the bigger interval.
  1763.                  */
  1764.                 leaf->value[leaf->keys - 1] -= count;
  1765.                 return 1;
  1766.             } else if (page + count * PAGE_SIZE < left_pg +
  1767.                 left_cnt * PAGE_SIZE) {
  1768.                 count_t new_cnt;
  1769.                
  1770.                 /*
  1771.                  * The interval is contained in the rightmost
  1772.                  * interval of the leaf but its removal
  1773.                  * requires both updating the size of the
  1774.                  * original interval and also inserting a new
  1775.                  * interval.
  1776.                  */
  1777.                 new_cnt = ((left_pg + left_cnt * PAGE_SIZE) -
  1778.                     (page + count * PAGE_SIZE)) >> PAGE_WIDTH;
  1779.                 leaf->value[leaf->keys - 1] -= count + new_cnt;
  1780.                 btree_insert(&a->used_space, page +
  1781.                     count * PAGE_SIZE, (void *) new_cnt, leaf);
  1782.                 return 1;
  1783.             }
  1784.         }
  1785.         return 0;
  1786.     }  
  1787.    
  1788.     /*
  1789.      * The border cases have been already resolved.
  1790.      * Now the interval can be only between intervals of the leaf.
  1791.      */
  1792.     for (i = 1; i < leaf->keys - 1; i++) {
  1793.         if (page < leaf->key[i]) {
  1794.             uintptr_t left_pg = leaf->key[i - 1];
  1795.             count_t left_cnt = (count_t) leaf->value[i - 1];
  1796.  
  1797.             /*
  1798.              * Now the interval is between intervals corresponding
  1799.              * to (i - 1) and i.
  1800.              */
  1801.             if (overlaps(left_pg, left_cnt * PAGE_SIZE, page,
  1802.                 count * PAGE_SIZE)) {
  1803.                 if (page + count * PAGE_SIZE ==
  1804.                     left_pg + left_cnt*PAGE_SIZE) {
  1805.                     /*
  1806.                      * The interval is contained in the
  1807.                      * interval (i - 1) of the leaf and can
  1808.                      * be removed by updating the size of
  1809.                      * the bigger interval.
  1810.                      */
  1811.                     leaf->value[i - 1] -= count;
  1812.                     return 1;
  1813.                 } else if (page + count * PAGE_SIZE <
  1814.                     left_pg + left_cnt * PAGE_SIZE) {
  1815.                     count_t new_cnt;
  1816.                
  1817.                     /*
  1818.                      * The interval is contained in the
  1819.                      * interval (i - 1) of the leaf but its
  1820.                      * removal requires both updating the
  1821.                      * size of the original interval and
  1822.                      * also inserting a new interval.
  1823.                      */
  1824.                     new_cnt = ((left_pg +
  1825.                         left_cnt * PAGE_SIZE) -
  1826.                         (page + count * PAGE_SIZE)) >>
  1827.                         PAGE_WIDTH;
  1828.                     leaf->value[i - 1] -= count + new_cnt;
  1829.                     btree_insert(&a->used_space, page +
  1830.                         count * PAGE_SIZE, (void *) new_cnt,
  1831.                         leaf);
  1832.                     return 1;
  1833.                 }
  1834.             }
  1835.             return 0;
  1836.         }
  1837.     }
  1838.  
  1839. error:
  1840.     panic("Inconsistency detected while removing %" PRIc " pages of used "
  1841.         "space from %p.\n", count, page);
  1842. }
  1843.  
  1844. /** Remove reference to address space area share info.
  1845.  *
  1846.  * If the reference count drops to 0, the sh_info is deallocated.
  1847.  *
  1848.  * @param sh_info   Pointer to address space area share info.
  1849.  */
  1850. void sh_info_remove_reference(share_info_t *sh_info)
  1851. {
  1852.     bool dealloc = false;
  1853.  
  1854.     mutex_lock(&sh_info->lock);
  1855.     ASSERT(sh_info->refcount);
  1856.     if (--sh_info->refcount == 0) {
  1857.         dealloc = true;
  1858.         link_t *cur;
  1859.        
  1860.         /*
  1861.          * Now walk carefully the pagemap B+tree and free/remove
  1862.          * reference from all frames found there.
  1863.          */
  1864.         for (cur = sh_info->pagemap.leaf_head.next;
  1865.             cur != &sh_info->pagemap.leaf_head; cur = cur->next) {
  1866.             btree_node_t *node;
  1867.             unsigned int i;
  1868.            
  1869.             node = list_get_instance(cur, btree_node_t, leaf_link);
  1870.             for (i = 0; i < node->keys; i++)
  1871.                 frame_free((uintptr_t) node->value[i]);
  1872.         }
  1873.        
  1874.     }
  1875.     mutex_unlock(&sh_info->lock);
  1876.    
  1877.     if (dealloc) {
  1878.         btree_destroy(&sh_info->pagemap);
  1879.         free(sh_info);
  1880.     }
  1881. }
  1882.  
  1883. /*
  1884.  * Address space related syscalls.
  1885.  */
  1886.  
  1887. /** Wrapper for as_area_create(). */
  1888. unative_t sys_as_area_create(uintptr_t address, size_t size, int flags)
  1889. {
  1890.     if (as_area_create(AS, flags | AS_AREA_CACHEABLE, size, address,
  1891.         AS_AREA_ATTR_NONE, &anon_backend, NULL))
  1892.         return (unative_t) address;
  1893.     else
  1894.         return (unative_t) -1;
  1895. }
  1896.  
  1897. /** Wrapper for as_area_resize(). */
  1898. unative_t sys_as_area_resize(uintptr_t address, size_t size, int flags)
  1899. {
  1900.     return (unative_t) as_area_resize(AS, address, size, 0);
  1901. }
  1902.  
  1903. /** Wrapper for as_area_change_flags(). */
  1904. unative_t sys_as_area_change_flags(uintptr_t address, int flags)
  1905. {
  1906.     return (unative_t) as_area_change_flags(AS, flags, address);
  1907. }
  1908.  
  1909. /** Wrapper for as_area_destroy(). */
  1910. unative_t sys_as_area_destroy(uintptr_t address)
  1911. {
  1912.     return (unative_t) as_area_destroy(AS, address);
  1913. }
  1914.  
  1915. /** Print out information about address space.
  1916.  *
  1917.  * @param as        Address space.
  1918.  */
  1919. void as_print(as_t *as)
  1920. {
  1921.     ipl_t ipl;
  1922.    
  1923.     ipl = interrupts_disable();
  1924.     mutex_lock(&as->lock);
  1925.    
  1926.     /* print out info about address space areas */
  1927.     link_t *cur;
  1928.     for (cur = as->as_area_btree.leaf_head.next;
  1929.         cur != &as->as_area_btree.leaf_head; cur = cur->next) {
  1930.         btree_node_t *node;
  1931.        
  1932.         node = list_get_instance(cur, btree_node_t, leaf_link);
  1933.        
  1934.         unsigned int i;
  1935.         for (i = 0; i < node->keys; i++) {
  1936.             as_area_t *area = node->value[i];
  1937.        
  1938.             mutex_lock(&area->lock);
  1939.             printf("as_area: %p, base=%p, pages=%" PRIc
  1940.                 " (%p - %p)\n", area, area->base, area->pages,
  1941.                 area->base, area->base + FRAMES2SIZE(area->pages));
  1942.             mutex_unlock(&area->lock);
  1943.         }
  1944.     }
  1945.    
  1946.     mutex_unlock(&as->lock);
  1947.     interrupts_restore(ipl);
  1948. }
  1949.  
  1950. /** @}
  1951.  */
  1952.