Subversion Repositories HelenOS

Rev

Rev 2709 | Rev 2735 | Go to most recent revision | Blame | Compare with Previous | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright (c) 2008 Jakub Jermar
  3.  * All rights reserved.
  4.  *
  5.  * Redistribution and use in source and binary forms, with or without
  6.  * modification, are permitted provided that the following conditions
  7.  * are met:
  8.  *
  9.  * - Redistributions of source code must retain the above copyright
  10.  *   notice, this list of conditions and the following disclaimer.
  11.  * - Redistributions in binary form must reproduce the above copyright
  12.  *   notice, this list of conditions and the following disclaimer in the
  13.  *   documentation and/or other materials provided with the distribution.
  14.  * - The name of the author may not be used to endorse or promote products
  15.  *   derived from this software without specific prior written permission.
  16.  *
  17.  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  18.  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  19.  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  20.  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  21.  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  22.  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  26.  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27.  */
  28.  
  29. /** @addtogroup fs
  30.  * @{
  31.  */
  32.  
  33. /**
  34.  * @file    vfs_ops.c
  35.  * @brief   Operations that VFS offers to its clients.
  36.  */
  37.  
  38. #include <ipc/ipc.h>
  39. #include <async.h>
  40. #include <errno.h>
  41. #include <stdio.h>
  42. #include <stdlib.h>
  43. #include <string.h>
  44. #include <bool.h>
  45. #include <futex.h>
  46. #include <rwlock.h>
  47. #include <libadt/list.h>
  48. #include <unistd.h>
  49. #include <ctype.h>
  50. #include <fcntl.h>
  51. #include <assert.h>
  52. #include <atomic.h>
  53. #include "vfs.h"
  54.  
  55. /**
  56.  * This rwlock prevents the race between a triplet-to-VFS-node resolution and a
  57.  * concurrent VFS operation which modifies the file system namespace.
  58.  */
  59. RWLOCK_INITIALIZE(namespace_rwlock);
  60.  
  61. atomic_t rootfs_futex = FUTEX_INITIALIZER;
  62. vfs_triplet_t rootfs = {
  63.     .fs_handle = 0,
  64.     .dev_handle = 0,
  65.     .index = 0,
  66. };
  67.  
  68. static int lookup_root(int fs_handle, int dev_handle, vfs_lookup_res_t *result)
  69. {
  70.     vfs_pair_t altroot = {
  71.         .fs_handle = fs_handle,
  72.         .dev_handle = dev_handle,
  73.     };
  74.  
  75.     return vfs_lookup_internal("/", strlen("/"), L_DIRECTORY, result,
  76.         &altroot);
  77. }
  78.  
  79. void vfs_mount(ipc_callid_t rid, ipc_call_t *request)
  80. {
  81.     int dev_handle;
  82.     vfs_node_t *mp_node = NULL;
  83.  
  84.     /*
  85.      * We expect the library to do the device-name to device-handle
  86.      * translation for us, thus the device handle will arrive as ARG1
  87.      * in the request.
  88.      */
  89.     dev_handle = IPC_GET_ARG1(*request);
  90.  
  91.     /*
  92.      * For now, don't make use of ARG2 and ARG3, but they can be used to
  93.      * carry mount options in the future.
  94.      */
  95.  
  96.     ipc_callid_t callid;
  97.     size_t size;
  98.  
  99.     /*
  100.      * Now, we expect the client to send us data with the name of the file
  101.      * system.
  102.      */
  103.     if (!ipc_data_write_receive(&callid, &size)) {
  104.         ipc_answer_0(callid, EINVAL);
  105.         ipc_answer_0(rid, EINVAL);
  106.         return;
  107.     }
  108.  
  109.     /*
  110.      * Don't receive more than is necessary for storing a full file system
  111.      * name.
  112.      */
  113.     if (size < 1 || size > FS_NAME_MAXLEN) {
  114.         ipc_answer_0(callid, EINVAL);
  115.         ipc_answer_0(rid, EINVAL);
  116.         return;
  117.     }
  118.  
  119.     /* Deliver the file system name. */
  120.     char fs_name[FS_NAME_MAXLEN + 1];
  121.     (void) ipc_data_write_finalize(callid, fs_name, size);
  122.     fs_name[size] = '\0';
  123.    
  124.     /*
  125.      * Check if we know a file system with the same name as is in fs_name.
  126.      * This will also give us its file system handle.
  127.      */
  128.     int fs_handle = fs_name_to_handle(fs_name, true);
  129.     if (!fs_handle) {
  130.         ipc_answer_0(rid, ENOENT);
  131.         return;
  132.     }
  133.  
  134.     /* Now, we want the client to send us the mount point. */
  135.     if (!ipc_data_write_receive(&callid, &size)) {
  136.         ipc_answer_0(callid, EINVAL);
  137.         ipc_answer_0(rid, EINVAL);
  138.         return;
  139.     }
  140.  
  141.     /* Check whether size is reasonable wrt. the mount point. */
  142.     if (size < 1 || size > MAX_PATH_LEN) {
  143.         ipc_answer_0(callid, EINVAL);
  144.         ipc_answer_0(rid, EINVAL);
  145.         return;
  146.     }
  147.     /* Allocate buffer for the mount point data being received. */
  148.     uint8_t *buf;
  149.     buf = malloc(size);
  150.     if (!buf) {
  151.         ipc_answer_0(callid, ENOMEM);
  152.         ipc_answer_0(rid, ENOMEM);
  153.         return;
  154.     }
  155.  
  156.     /* Deliver the mount point. */
  157.     (void) ipc_data_write_finalize(callid, buf, size);
  158.  
  159.     /*
  160.      * Lookup the root node of the filesystem being mounted.
  161.      * In this case, we don't need to take the namespace_futex as the root
  162.      * node cannot be removed. However, we do take a reference to it so
  163.      * that we can track how many times it has been mounted.
  164.      */
  165.     int rc;
  166.     vfs_lookup_res_t mr_res;
  167.     rc = lookup_root(fs_handle, dev_handle, &mr_res);
  168.     if (rc != EOK) {
  169.         free(buf);
  170.         ipc_answer_0(rid, rc);
  171.         return;
  172.     }
  173.     vfs_node_t *mr_node = vfs_node_get(&mr_res);
  174.     if (!mr_node) {
  175.         free(buf);
  176.         ipc_answer_0(rid, ENOMEM);
  177.         return;
  178.     }
  179.  
  180.     /* Finally, we need to resolve the path to the mountpoint. */
  181.     vfs_lookup_res_t mp_res;
  182.     futex_down(&rootfs_futex);
  183.     if (rootfs.fs_handle) {
  184.         /* We already have the root FS. */
  185.         rwlock_write_lock(&namespace_rwlock);
  186.         rc = vfs_lookup_internal(buf, size, L_DIRECTORY, &mp_res,
  187.             NULL);
  188.         if (rc != EOK) {
  189.             /* The lookup failed for some reason. */
  190.             rwlock_write_unlock(&namespace_rwlock);
  191.             futex_up(&rootfs_futex);
  192.             vfs_node_put(mr_node);  /* failed -> drop reference */
  193.             free(buf);
  194.             ipc_answer_0(rid, rc);
  195.             return;
  196.         }
  197.         mp_node = vfs_node_get(&mp_res);
  198.         if (!mp_node) {
  199.             rwlock_write_unlock(&namespace_rwlock);
  200.             futex_up(&rootfs_futex);
  201.             vfs_node_put(mr_node);  /* failed -> drop reference */
  202.             free(buf);
  203.             ipc_answer_0(rid, ENOMEM);
  204.             return;
  205.         }
  206.         /*
  207.          * Now we hold a reference to mp_node.
  208.          * It will be dropped upon the corresponding VFS_UNMOUNT.
  209.          * This prevents the mount point from being deleted.
  210.          */
  211.         rwlock_write_unlock(&namespace_rwlock);
  212.     } else {
  213.         /* We still don't have the root file system mounted. */
  214.         if ((size == 1) && (buf[0] == '/')) {
  215.             /* For this simple, but important case, we are done. */
  216.             rootfs = mr_res.triplet;
  217.             futex_up(&rootfs_futex);
  218.             free(buf);
  219.             ipc_answer_0(rid, EOK);
  220.             return;
  221.         } else {
  222.             /*
  223.              * We can't resolve this without the root filesystem
  224.              * being mounted first.
  225.              */
  226.             futex_up(&rootfs_futex);
  227.             free(buf);
  228.             vfs_node_put(mr_node);  /* failed -> drop reference */
  229.             ipc_answer_0(rid, ENOENT);
  230.             return;
  231.         }
  232.     }
  233.     futex_up(&rootfs_futex);
  234.    
  235.     free(buf);  /* The buffer is not needed anymore. */
  236.    
  237.     /*
  238.      * At this point, we have all necessary pieces: file system and device
  239.      * handles, and we know the mount point VFS node and also the root node
  240.      * of the file system being mounted.
  241.      */
  242.  
  243.     int phone = vfs_grab_phone(mp_res.triplet.fs_handle);
  244.     /* Later we can use ARG3 to pass mode/flags. */
  245.     aid_t req1 = async_send_3(phone, VFS_MOUNT,
  246.         (ipcarg_t) mp_res.triplet.dev_handle,
  247.         (ipcarg_t) mp_res.triplet.index, 0, NULL);
  248.     /* The second call uses the same method. */
  249.     aid_t req2 = async_send_3(phone, VFS_MOUNT,
  250.         (ipcarg_t) mr_res.triplet.fs_handle,
  251.         (ipcarg_t) mr_res.triplet.dev_handle,
  252.         (ipcarg_t) mr_res.triplet.index, NULL);
  253.     vfs_release_phone(phone);
  254.  
  255.     ipcarg_t rc1;
  256.     ipcarg_t rc2;
  257.     async_wait_for(req1, &rc1);
  258.     async_wait_for(req2, &rc2);
  259.  
  260.     if ((rc1 != EOK) || (rc2 != EOK)) {
  261.         /* Mount failed, drop references to mr_node and mp_node. */
  262.         vfs_node_put(mr_node);
  263.         if (mp_node)
  264.             vfs_node_put(mp_node);
  265.     }
  266.    
  267.     if (rc2 == EOK)
  268.         ipc_answer_0(rid, rc1);
  269.     else if (rc1 == EOK)
  270.         ipc_answer_0(rid, rc2);
  271.     else
  272.         ipc_answer_0(rid, rc1);
  273. }
  274.  
  275. void vfs_open(ipc_callid_t rid, ipc_call_t *request)
  276. {
  277.     if (!vfs_files_init()) {
  278.         ipc_answer_0(rid, ENOMEM);
  279.         return;
  280.     }
  281.  
  282.     /*
  283.      * The POSIX interface is open(path, oflag, mode).
  284.      * We can receive oflags and mode along with the VFS_OPEN call; the path
  285.      * will need to arrive in another call.
  286.      *
  287.      * We also receive one private, non-POSIX set of flags called lflag
  288.      * used to pass information to vfs_lookup_internal().
  289.      */
  290.     int lflag = IPC_GET_ARG1(*request);
  291.     int oflag = IPC_GET_ARG2(*request);
  292.     int mode = IPC_GET_ARG3(*request);
  293.     size_t len;
  294.  
  295.     if (oflag & O_CREAT)
  296.         lflag |= L_CREATE;
  297.     if (oflag & O_EXCL)
  298.         lflag |= L_EXCLUSIVE;
  299.  
  300.     ipc_callid_t callid;
  301.  
  302.     if (!ipc_data_write_receive(&callid, &len)) {
  303.         ipc_answer_0(callid, EINVAL);
  304.         ipc_answer_0(rid, EINVAL);
  305.         return;
  306.     }
  307.  
  308.     /*
  309.      * Now we are on the verge of accepting the path.
  310.      *
  311.      * There is one optimization we could do in the future: copy the path
  312.      * directly into the PLB using some kind of a callback.
  313.      */
  314.     char *path = malloc(len);
  315.    
  316.     if (!path) {
  317.         ipc_answer_0(callid, ENOMEM);
  318.         ipc_answer_0(rid, ENOMEM);
  319.         return;
  320.     }
  321.  
  322.     int rc;
  323.     if ((rc = ipc_data_write_finalize(callid, path, len))) {
  324.         ipc_answer_0(rid, rc);
  325.         free(path);
  326.         return;
  327.     }
  328.    
  329.     /*
  330.      * Avoid the race condition in which the file can be deleted before we
  331.      * find/create-and-lock the VFS node corresponding to the looked-up
  332.      * triplet.
  333.      */
  334.     if (lflag & L_CREATE)
  335.         rwlock_write_lock(&namespace_rwlock);
  336.     else
  337.         rwlock_read_lock(&namespace_rwlock);
  338.  
  339.     /* The path is now populated and we can call vfs_lookup_internal(). */
  340.     vfs_lookup_res_t lr;
  341.     rc = vfs_lookup_internal(path, len, lflag, &lr, NULL);
  342.     if (rc) {
  343.         if (lflag & L_CREATE)
  344.             rwlock_write_unlock(&namespace_rwlock);
  345.         else
  346.             rwlock_read_unlock(&namespace_rwlock);
  347.         ipc_answer_0(rid, rc);
  348.         free(path);
  349.         return;
  350.     }
  351.  
  352.     /** Path is no longer needed. */
  353.     free(path);
  354.  
  355.     vfs_node_t *node = vfs_node_get(&lr);
  356.     if (lflag & L_CREATE)
  357.         rwlock_write_unlock(&namespace_rwlock);
  358.     else
  359.         rwlock_read_unlock(&namespace_rwlock);
  360.  
  361.     /*
  362.      * Get ourselves a file descriptor and the corresponding vfs_file_t
  363.      * structure.
  364.      */
  365.     int fd = vfs_fd_alloc();
  366.     if (fd < 0) {
  367.         vfs_node_put(node);
  368.         ipc_answer_0(rid, fd);
  369.         return;
  370.     }
  371.     vfs_file_t *file = vfs_file_get(fd);
  372.     file->node = node;
  373.     if (oflag & O_APPEND)
  374.         file->append = true;
  375.  
  376.     /*
  377.      * The following increase in reference count is for the fact that the
  378.      * file is being opened and that a file structure is pointing to it.
  379.      * It is necessary so that the file will not disappear when
  380.      * vfs_node_put() is called. The reference will be dropped by the
  381.      * respective VFS_CLOSE.
  382.      */
  383.     vfs_node_addref(node);
  384.     vfs_node_put(node);
  385.  
  386.     /* Success! Return the new file descriptor to the client. */
  387.     ipc_answer_1(rid, EOK, fd);
  388. }
  389.  
  390. static void vfs_rdwr(ipc_callid_t rid, ipc_call_t *request, bool read)
  391. {
  392.  
  393.     /*
  394.      * The following code strongly depends on the fact that the files data
  395.      * structure can be only accessed by a single fibril and all file
  396.      * operations are serialized (i.e. the reads and writes cannot
  397.      * interleave and a file cannot be closed while it is being read).
  398.      *
  399.      * Additional synchronization needs to be added once the table of
  400.      * open files supports parallel access!
  401.      */
  402.  
  403.     int fd = IPC_GET_ARG1(*request);
  404.  
  405.     /* Lookup the file structure corresponding to the file descriptor. */
  406.     vfs_file_t *file = vfs_file_get(fd);
  407.     if (!file) {
  408.         ipc_answer_0(rid, ENOENT);
  409.         return;
  410.     }
  411.  
  412.     /*
  413.      * Now we need to receive a call with client's
  414.      * IPC_M_DATA_READ/IPC_M_DATA_WRITE request.
  415.      */
  416.     ipc_callid_t callid;
  417.     int res;
  418.     if (read)
  419.         res = ipc_data_read_receive(&callid, NULL);
  420.     else
  421.         res = ipc_data_write_receive(&callid, NULL);
  422.     if (!res) {
  423.         ipc_answer_0(callid, EINVAL);
  424.         ipc_answer_0(rid, EINVAL);
  425.         return;
  426.     }
  427.  
  428.     /*
  429.      * Lock the open file structure so that no other thread can manipulate
  430.      * the same open file at a time.
  431.      */
  432.     futex_down(&file->lock);
  433.  
  434.     /*
  435.      * Lock the file's node so that no other client can read/write to it at
  436.      * the same time.
  437.      */
  438.     if (read)
  439.         rwlock_read_lock(&file->node->contents_rwlock);
  440.     else
  441.         rwlock_write_lock(&file->node->contents_rwlock);
  442.  
  443.     int fs_phone = vfs_grab_phone(file->node->fs_handle);  
  444.    
  445.     /* Make a VFS_READ/VFS_WRITE request at the destination FS server. */
  446.     aid_t msg;
  447.     ipc_call_t answer;
  448.     if (!read && file->append)
  449.         file->pos = file->node->size;
  450.     msg = async_send_3(fs_phone, IPC_GET_METHOD(*request),
  451.         file->node->dev_handle, file->node->index, file->pos, &answer);
  452.    
  453.     /*
  454.      * Forward the IPC_M_DATA_READ/IPC_M_DATA_WRITE request to the
  455.      * destination FS server. The call will be routed as if sent by
  456.      * ourselves. Note that call arguments are immutable in this case so we
  457.      * don't have to bother.
  458.      */
  459.     ipc_forward_fast(callid, fs_phone, 0, 0, 0, IPC_FF_ROUTE_FROM_ME);
  460.  
  461.     vfs_release_phone(fs_phone);
  462.  
  463.     /* Wait for reply from the FS server. */
  464.     ipcarg_t rc;
  465.     async_wait_for(msg, &rc);
  466.     size_t bytes = IPC_GET_ARG1(answer);
  467.  
  468.     /* Unlock the VFS node. */
  469.     if (read)
  470.         rwlock_read_unlock(&file->node->contents_rwlock);
  471.     else {
  472.         /* Update the cached version of node's size. */
  473.         if (rc == EOK)
  474.             file->node->size = IPC_GET_ARG2(answer);
  475.         rwlock_write_unlock(&file->node->contents_rwlock);
  476.     }
  477.  
  478.     /* Update the position pointer and unlock the open file. */
  479.     if (rc == EOK)
  480.         file->pos += bytes;
  481.     futex_up(&file->lock);
  482.  
  483.     /*
  484.      * FS server's reply is the final result of the whole operation we
  485.      * return to the client.
  486.      */
  487.     ipc_answer_1(rid, rc, bytes);
  488. }
  489.  
  490. void vfs_read(ipc_callid_t rid, ipc_call_t *request)
  491. {
  492.     vfs_rdwr(rid, request, true);
  493. }
  494.  
  495. void vfs_write(ipc_callid_t rid, ipc_call_t *request)
  496. {
  497.     vfs_rdwr(rid, request, false);
  498. }
  499.  
  500. void vfs_seek(ipc_callid_t rid, ipc_call_t *request)
  501. {
  502.     int fd = (int) IPC_GET_ARG1(*request);
  503.     off_t off = (off_t) IPC_GET_ARG2(*request);
  504.     int whence = (int) IPC_GET_ARG3(*request);
  505.  
  506.  
  507.     /* Lookup the file structure corresponding to the file descriptor. */
  508.     vfs_file_t *file = vfs_file_get(fd);
  509.     if (!file) {
  510.         ipc_answer_0(rid, ENOENT);
  511.         return;
  512.     }
  513.  
  514.     off_t newpos;
  515.     futex_down(&file->lock);
  516.     if (whence == SEEK_SET) {
  517.         file->pos = off;
  518.         futex_up(&file->lock);
  519.         ipc_answer_1(rid, EOK, off);
  520.         return;
  521.     }
  522.     if (whence == SEEK_CUR) {
  523.         if (file->pos + off < file->pos) {
  524.             futex_up(&file->lock);
  525.             ipc_answer_0(rid, EOVERFLOW);
  526.             return;
  527.         }
  528.         file->pos += off;
  529.         newpos = file->pos;
  530.         futex_up(&file->lock);
  531.         ipc_answer_1(rid, EOK, newpos);
  532.         return;
  533.     }
  534.     if (whence == SEEK_END) {
  535.         rwlock_read_lock(&file->node->contents_rwlock);
  536.         size_t size = file->node->size;
  537.         rwlock_read_unlock(&file->node->contents_rwlock);
  538.         if (size + off < size) {
  539.             futex_up(&file->lock);
  540.             ipc_answer_0(rid, EOVERFLOW);
  541.             return;
  542.         }
  543.         newpos = size + off;
  544.         futex_up(&file->lock);
  545.         ipc_answer_1(rid, EOK, newpos);
  546.         return;
  547.     }
  548.     futex_up(&file->lock);
  549.     ipc_answer_0(rid, EINVAL);
  550. }
  551.  
  552. void vfs_truncate(ipc_callid_t rid, ipc_call_t *request)
  553. {
  554.     int fd = IPC_GET_ARG1(*request);
  555.     size_t size = IPC_GET_ARG2(*request);
  556.     ipcarg_t rc;
  557.  
  558.     vfs_file_t *file = vfs_file_get(fd);
  559.     if (!file) {
  560.         ipc_answer_0(rid, ENOENT);
  561.         return;
  562.     }
  563.     futex_down(&file->lock);
  564.  
  565.     rwlock_write_lock(&file->node->contents_rwlock);
  566.     int fs_phone = vfs_grab_phone(file->node->fs_handle);
  567.     rc = async_req_3_0(fs_phone, VFS_TRUNCATE,
  568.         (ipcarg_t)file->node->dev_handle, (ipcarg_t)file->node->index,
  569.         (ipcarg_t)size);
  570.     vfs_release_phone(fs_phone);
  571.     if (rc == EOK)
  572.         file->node->size = size;
  573.     rwlock_write_unlock(&file->node->contents_rwlock);
  574.  
  575.     futex_up(&file->lock);
  576.     ipc_answer_0(rid, rc);
  577. }
  578.  
  579. void vfs_mkdir(ipc_callid_t rid, ipc_call_t *request)
  580. {
  581.     int mode = IPC_GET_ARG1(*request);
  582.     size_t len;
  583.  
  584.     ipc_callid_t callid;
  585.  
  586.     if (!ipc_data_write_receive(&callid, &len)) {
  587.         ipc_answer_0(callid, EINVAL);
  588.         ipc_answer_0(rid, EINVAL);
  589.         return;
  590.     }
  591.  
  592.     /*
  593.      * Now we are on the verge of accepting the path.
  594.      *
  595.      * There is one optimization we could do in the future: copy the path
  596.      * directly into the PLB using some kind of a callback.
  597.      */
  598.     char *path = malloc(len);
  599.    
  600.     if (!path) {
  601.         ipc_answer_0(callid, ENOMEM);
  602.         ipc_answer_0(rid, ENOMEM);
  603.         return;
  604.     }
  605.  
  606.     int rc;
  607.     if ((rc = ipc_data_write_finalize(callid, path, len))) {
  608.         ipc_answer_0(rid, rc);
  609.         free(path);
  610.         return;
  611.     }
  612.    
  613.     rwlock_write_lock(&namespace_rwlock);
  614.     int lflag = L_DIRECTORY | L_CREATE | L_EXCLUSIVE;
  615.     rc = vfs_lookup_internal(path, len, lflag, NULL, NULL);
  616.     rwlock_write_unlock(&namespace_rwlock);
  617.     free(path);
  618.     ipc_answer_0(rid, rc);
  619. }
  620.  
  621. /**
  622.  * @}
  623.  */
  624.