From 2a20dba0727ac9731e957a93cf69f1aab3ed506d Mon Sep 17 00:00:00 2001 From: Rune Tynan Date: Thu, 21 Nov 2024 09:48:44 -0800 Subject: [PATCH] Move FdTable to a common location and split off unix behavior. Preparation for Windows FS support --- src/shims/files.rs | 411 +++++++++++++++++++++++++++++++ src/shims/mod.rs | 4 +- src/shims/unix/fd.rs | 404 +----------------------------- src/shims/unix/fs.rs | 105 ++++---- src/shims/unix/linux/epoll.rs | 12 +- src/shims/unix/linux/eventfd.rs | 32 ++- src/shims/unix/mod.rs | 2 +- src/shims/unix/unnamed_socket.rs | 104 ++++---- 8 files changed, 560 insertions(+), 514 deletions(-) create mode 100644 src/shims/files.rs diff --git a/src/shims/files.rs b/src/shims/files.rs new file mode 100644 index 0000000000..1d533fffad --- /dev/null +++ b/src/shims/files.rs @@ -0,0 +1,411 @@ +use std::any::Any; +use std::collections::BTreeMap; +use std::io::{IsTerminal, Read, SeekFrom, Write}; +use std::ops::Deref; +use std::rc::{Rc, Weak}; +use std::{fs, io}; + +use rustc_abi::Size; + +use crate::shims::unix::UnixFileDescription; +use crate::*; + +/// Represents an open file description. +pub trait FileDescription: std::fmt::Debug + Any { + fn name(&self) -> &'static str; + + /// Reads as much as possible into the given buffer `ptr`. + /// `len` indicates how many bytes we should try to read. + /// `dest` is where the return value should be stored: number of bytes read, or `-1` in case of error. + fn read<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + _communicate_allowed: bool, + _ptr: Pointer, + _len: usize, + _dest: &MPlaceTy<'tcx>, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + throw_unsup_format!("cannot read from {}", self.name()); + } + + /// Writes as much as possible from the given buffer `ptr`. + /// `len` indicates how many bytes we should try to write. + /// `dest` is where the return value should be stored: number of bytes written, or `-1` in case of error. + fn write<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + _communicate_allowed: bool, + _ptr: Pointer, + _len: usize, + _dest: &MPlaceTy<'tcx>, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + throw_unsup_format!("cannot write to {}", self.name()); + } + + /// Seeks to the given offset (which can be relative to the beginning, end, or current position). + /// Returns the new position from the start of the stream. + fn seek<'tcx>( + &self, + _communicate_allowed: bool, + _offset: SeekFrom, + ) -> InterpResult<'tcx, io::Result> { + throw_unsup_format!("cannot seek on {}", self.name()); + } + + fn close<'tcx>( + self: Box, + _communicate_allowed: bool, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result<()>> { + throw_unsup_format!("cannot close {}", self.name()); + } + + fn metadata<'tcx>(&self) -> InterpResult<'tcx, io::Result> { + throw_unsup_format!("obtaining metadata is only supported on file-backed file descriptors"); + } + + fn is_tty(&self, _communicate_allowed: bool) -> bool { + // Most FDs are not tty's and the consequence of a wrong `false` are minor, + // so we use a default impl here. + false + } + + fn as_unix<'tcx>(&self) -> InterpResult<'tcx, &dyn UnixFileDescription> { + throw_unsup_format!("Not a unix file descriptor: {}", self.name()); + } +} + +impl dyn FileDescription { + #[inline(always)] + pub fn downcast(&self) -> Option<&T> { + (self as &dyn Any).downcast_ref() + } +} + +impl FileDescription for io::Stdin { + fn name(&self) -> &'static str { + "stdin" + } + + fn read<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + communicate_allowed: bool, + ptr: Pointer, + len: usize, + dest: &MPlaceTy<'tcx>, + ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + let mut bytes = vec![0; len]; + if !communicate_allowed { + // We want isolation mode to be deterministic, so we have to disallow all reads, even stdin. + helpers::isolation_abort_error("`read` from stdin")?; + } + let result = Read::read(&mut { self }, &mut bytes); + match result { + Ok(read_size) => ecx.return_read_success(ptr, &bytes, read_size, dest), + Err(e) => ecx.set_last_error_and_return(e, dest), + } + } + + fn is_tty(&self, communicate_allowed: bool) -> bool { + communicate_allowed && self.is_terminal() + } +} + +impl FileDescription for io::Stdout { + fn name(&self) -> &'static str { + "stdout" + } + + fn write<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + _communicate_allowed: bool, + ptr: Pointer, + len: usize, + dest: &MPlaceTy<'tcx>, + ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + let bytes = ecx.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?; + // We allow writing to stderr even with isolation enabled. + let result = Write::write(&mut { self }, bytes); + // Stdout is buffered, flush to make sure it appears on the + // screen. This is the write() syscall of the interpreted + // program, we want it to correspond to a write() syscall on + // the host -- there is no good in adding extra buffering + // here. + io::stdout().flush().unwrap(); + match result { + Ok(write_size) => ecx.return_write_success(write_size, dest), + Err(e) => ecx.set_last_error_and_return(e, dest), + } + } + + fn is_tty(&self, communicate_allowed: bool) -> bool { + communicate_allowed && self.is_terminal() + } +} + +impl FileDescription for io::Stderr { + fn name(&self) -> &'static str { + "stderr" + } + + fn write<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + _communicate_allowed: bool, + ptr: Pointer, + len: usize, + dest: &MPlaceTy<'tcx>, + ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + let bytes = ecx.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?; + // We allow writing to stderr even with isolation enabled. + // No need to flush, stderr is not buffered. + let result = Write::write(&mut { self }, bytes); + match result { + Ok(write_size) => ecx.return_write_success(write_size, dest), + Err(e) => ecx.set_last_error_and_return(e, dest), + } + } + + fn is_tty(&self, communicate_allowed: bool) -> bool { + communicate_allowed && self.is_terminal() + } +} + +/// Like /dev/null +#[derive(Debug)] +pub struct NullOutput; + +impl FileDescription for NullOutput { + fn name(&self) -> &'static str { + "stderr and stdout" + } + + fn write<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + _communicate_allowed: bool, + _ptr: Pointer, + len: usize, + dest: &MPlaceTy<'tcx>, + ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + // We just don't write anything, but report to the user that we did. + ecx.return_write_success(len, dest) + } +} + +/// Structure contains both the file description and its unique identifier. +#[derive(Clone, Debug)] +pub struct FileDescWithId { + id: FdId, + file_description: Box, +} + +#[derive(Clone, Debug)] +pub struct FileDescriptionRef(Rc>); + +impl Deref for FileDescriptionRef { + type Target = dyn FileDescription; + + fn deref(&self) -> &Self::Target { + &*self.0.file_description + } +} + +impl FileDescriptionRef { + pub fn new(fd: impl FileDescription, id: FdId) -> Self { + FileDescriptionRef(Rc::new(FileDescWithId { id, file_description: Box::new(fd) })) + } + + pub fn close<'tcx>( + self, + communicate_allowed: bool, + ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result<()>> { + // Destroy this `Rc` using `into_inner` so we can call `close` instead of + // implicitly running the destructor of the file description. + let id = self.get_id(); + match Rc::into_inner(self.0) { + Some(fd) => { + // Remove entry from the global epoll_event_interest table. + ecx.machine.epoll_interests.remove(id); + + fd.file_description.close(communicate_allowed, ecx) + } + None => interp_ok(Ok(())), + } + } + + pub fn downgrade(&self) -> WeakFileDescriptionRef { + WeakFileDescriptionRef { weak_ref: Rc::downgrade(&self.0) } + } + + pub fn get_id(&self) -> FdId { + self.0.id + } +} + +/// Holds a weak reference to the actual file description. +#[derive(Clone, Debug, Default)] +pub struct WeakFileDescriptionRef { + weak_ref: Weak>, +} + +impl WeakFileDescriptionRef { + pub fn upgrade(&self) -> Option { + if let Some(file_desc_with_id) = self.weak_ref.upgrade() { + return Some(FileDescriptionRef(file_desc_with_id)); + } + None + } +} + +impl VisitProvenance for WeakFileDescriptionRef { + fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { + // A weak reference can never be the only reference to some pointer or place. + // Since the actual file description is tracked by strong ref somewhere, + // it is ok to make this a NOP operation. + } +} + +/// A unique id for file descriptions. While we could use the address, considering that +/// is definitely unique, the address would expose interpreter internal state when used +/// for sorting things. So instead we generate a unique id per file description that stays +/// the same even if a file descriptor is duplicated and gets a new integer file descriptor. +#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)] +pub struct FdId(usize); + +/// The file descriptor table +#[derive(Debug)] +pub struct FdTable { + pub fds: BTreeMap, + /// Unique identifier for file description, used to differentiate between various file description. + next_file_description_id: FdId, +} + +impl VisitProvenance for FdTable { + fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { + // All our FileDescription instances do not have any tags. + } +} + +impl FdTable { + fn new() -> Self { + FdTable { fds: BTreeMap::new(), next_file_description_id: FdId(0) } + } + pub(crate) fn init(mute_stdout_stderr: bool) -> FdTable { + let mut fds = FdTable::new(); + fds.insert_new(io::stdin()); + if mute_stdout_stderr { + assert_eq!(fds.insert_new(NullOutput), 1); + assert_eq!(fds.insert_new(NullOutput), 2); + } else { + assert_eq!(fds.insert_new(io::stdout()), 1); + assert_eq!(fds.insert_new(io::stderr()), 2); + } + fds + } + + pub fn new_ref(&mut self, fd: impl FileDescription) -> FileDescriptionRef { + let file_handle = FileDescriptionRef::new(fd, self.next_file_description_id); + self.next_file_description_id = FdId(self.next_file_description_id.0.strict_add(1)); + file_handle + } + + /// Insert a new file description to the FdTable. + pub fn insert_new(&mut self, fd: impl FileDescription) -> i32 { + let fd_ref = self.new_ref(fd); + self.insert(fd_ref) + } + + pub fn insert(&mut self, fd_ref: FileDescriptionRef) -> i32 { + self.insert_with_min_num(fd_ref, 0) + } + + /// Insert a file description, giving it a file descriptor that is at least `min_fd_num`. + pub fn insert_with_min_num(&mut self, file_handle: FileDescriptionRef, min_fd_num: i32) -> i32 { + // Find the lowest unused FD, starting from min_fd. If the first such unused FD is in + // between used FDs, the find_map combinator will return it. If the first such unused FD + // is after all other used FDs, the find_map combinator will return None, and we will use + // the FD following the greatest FD thus far. + let candidate_new_fd = + self.fds.range(min_fd_num..).zip(min_fd_num..).find_map(|((fd_num, _fd), counter)| { + if *fd_num != counter { + // There was a gap in the fds stored, return the first unused one + // (note that this relies on BTreeMap iterating in key order) + Some(counter) + } else { + // This fd is used, keep going + None + } + }); + let new_fd_num = candidate_new_fd.unwrap_or_else(|| { + // find_map ran out of BTreeMap entries before finding a free fd, use one plus the + // maximum fd in the map + self.fds.last_key_value().map(|(fd_num, _)| fd_num.strict_add(1)).unwrap_or(min_fd_num) + }); + + self.fds.try_insert(new_fd_num, file_handle).unwrap(); + new_fd_num + } + + pub fn get(&self, fd_num: i32) -> Option { + let fd = self.fds.get(&fd_num)?; + Some(fd.clone()) + } + + pub fn remove(&mut self, fd_num: i32) -> Option { + self.fds.remove(&fd_num) + } + + pub fn is_fd_num(&self, fd_num: i32) -> bool { + self.fds.contains_key(&fd_num) + } +} + +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { + /// Helper to implement `FileDescription::read`: + /// This is only used when `read` is successful. + /// `actual_read_size` should be the return value of some underlying `read` call that used + /// `bytes` as its output buffer. + /// The length of `bytes` must not exceed either the host's or the target's `isize`. + /// `bytes` is written to `buf` and the size is written to `dest`. + fn return_read_success( + &mut self, + buf: Pointer, + bytes: &[u8], + actual_read_size: usize, + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + // If reading to `bytes` did not fail, we write those bytes to the buffer. + // Crucially, if fewer than `bytes.len()` bytes were read, only write + // that much into the output buffer! + this.write_bytes_ptr(buf, bytes[..actual_read_size].iter().copied())?; + + // The actual read size is always less than what got originally requested so this cannot fail. + this.write_int(u64::try_from(actual_read_size).unwrap(), dest)?; + interp_ok(()) + } + + /// Helper to implement `FileDescription::write`: + /// This function is only used when `write` is successful, and writes `actual_write_size` to `dest` + fn return_write_success( + &mut self, + actual_write_size: usize, + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + // The actual write size is always less than what got originally requested so this cannot fail. + this.write_int(u64::try_from(actual_write_size).unwrap(), dest)?; + interp_ok(()) + } +} diff --git a/src/shims/mod.rs b/src/shims/mod.rs index b9317ac1a1..61681edcf7 100644 --- a/src/shims/mod.rs +++ b/src/shims/mod.rs @@ -2,6 +2,7 @@ mod alloc; mod backtrace; +mod files; #[cfg(unix)] mod native_lib; mod unix; @@ -18,7 +19,8 @@ pub mod panic; pub mod time; pub mod tls; -pub use self::unix::{DirTable, EpollInterestTable, FdTable}; +pub use self::files::FdTable; +pub use self::unix::{DirTable, EpollInterestTable}; /// What needs to be done after emulating an item (a shim or an intrinsic) is done. pub enum EmulateItemResult { diff --git a/src/shims/unix/fd.rs b/src/shims/unix/fd.rs index 27bdd508f7..b1ca503326 100644 --- a/src/shims/unix/fd.rs +++ b/src/shims/unix/fd.rs @@ -1,15 +1,12 @@ //! General management of file descriptors, and support for //! standard file descriptors (stdin/stdout/stderr). -use std::any::Any; -use std::collections::BTreeMap; -use std::io::{self, ErrorKind, IsTerminal, Read, SeekFrom, Write}; -use std::ops::Deref; -use std::rc::{Rc, Weak}; +use std::io::{self, ErrorKind}; use rustc_abi::Size; use crate::helpers::check_min_arg_count; +use crate::shims::files::FileDescription; use crate::shims::unix::linux::epoll::EpollReadyEvents; use crate::shims::unix::*; use crate::*; @@ -21,40 +18,8 @@ pub(crate) enum FlockOp { Unlock, } -/// Represents an open file description. -pub trait FileDescription: std::fmt::Debug + Any { - fn name(&self) -> &'static str; - - /// Reads as much as possible into the given buffer `ptr`. - /// `len` indicates how many bytes we should try to read. - /// `dest` is where the return value should be stored: number of bytes read, or `-1` in case of error. - fn read<'tcx>( - &self, - _self_ref: &FileDescriptionRef, - _communicate_allowed: bool, - _ptr: Pointer, - _len: usize, - _dest: &MPlaceTy<'tcx>, - _ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx> { - throw_unsup_format!("cannot read from {}", self.name()); - } - - /// Writes as much as possible from the given buffer `ptr`. - /// `len` indicates how many bytes we should try to write. - /// `dest` is where the return value should be stored: number of bytes written, or `-1` in case of error. - fn write<'tcx>( - &self, - _self_ref: &FileDescriptionRef, - _communicate_allowed: bool, - _ptr: Pointer, - _len: usize, - _dest: &MPlaceTy<'tcx>, - _ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx> { - throw_unsup_format!("cannot write to {}", self.name()); - } - +/// Represents unix-specific file descriptions. +pub trait UnixFileDescription: FileDescription { /// Reads as much as possible into the given buffer `ptr` from a given offset. /// `len` indicates how many bytes we should try to read. /// `dest` is where the return value should be stored: number of bytes read, or `-1` in case of error. @@ -69,7 +34,6 @@ pub trait FileDescription: std::fmt::Debug + Any { ) -> InterpResult<'tcx> { throw_unsup_format!("cannot pread from {}", self.name()); } - /// Writes as much as possible from the given buffer `ptr` starting at a given offset. /// `ptr` is the pointer to the user supplied read buffer. /// `len` indicates how many bytes we should try to write. @@ -86,24 +50,6 @@ pub trait FileDescription: std::fmt::Debug + Any { throw_unsup_format!("cannot pwrite to {}", self.name()); } - /// Seeks to the given offset (which can be relative to the beginning, end, or current position). - /// Returns the new position from the start of the stream. - fn seek<'tcx>( - &self, - _communicate_allowed: bool, - _offset: SeekFrom, - ) -> InterpResult<'tcx, io::Result> { - throw_unsup_format!("cannot seek on {}", self.name()); - } - - fn close<'tcx>( - self: Box, - _communicate_allowed: bool, - _ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx, io::Result<()>> { - throw_unsup_format!("cannot close {}", self.name()); - } - fn flock<'tcx>( &self, _communicate_allowed: bool, @@ -112,311 +58,12 @@ pub trait FileDescription: std::fmt::Debug + Any { throw_unsup_format!("cannot flock {}", self.name()); } - fn is_tty(&self, _communicate_allowed: bool) -> bool { - // Most FDs are not tty's and the consequence of a wrong `false` are minor, - // so we use a default impl here. - false - } - /// Check the readiness of file description. fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> { throw_unsup_format!("{}: epoll does not support this file description", self.name()); } } -impl dyn FileDescription { - #[inline(always)] - pub fn downcast(&self) -> Option<&T> { - (self as &dyn Any).downcast_ref() - } -} - -impl FileDescription for io::Stdin { - fn name(&self) -> &'static str { - "stdin" - } - - fn read<'tcx>( - &self, - _self_ref: &FileDescriptionRef, - communicate_allowed: bool, - ptr: Pointer, - len: usize, - dest: &MPlaceTy<'tcx>, - ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx> { - let mut bytes = vec![0; len]; - if !communicate_allowed { - // We want isolation mode to be deterministic, so we have to disallow all reads, even stdin. - helpers::isolation_abort_error("`read` from stdin")?; - } - let result = Read::read(&mut { self }, &mut bytes); - match result { - Ok(read_size) => ecx.return_read_success(ptr, &bytes, read_size, dest), - Err(e) => ecx.set_last_error_and_return(e, dest), - } - } - - fn is_tty(&self, communicate_allowed: bool) -> bool { - communicate_allowed && self.is_terminal() - } -} - -impl FileDescription for io::Stdout { - fn name(&self) -> &'static str { - "stdout" - } - - fn write<'tcx>( - &self, - _self_ref: &FileDescriptionRef, - _communicate_allowed: bool, - ptr: Pointer, - len: usize, - dest: &MPlaceTy<'tcx>, - ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx> { - let bytes = ecx.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?; - // We allow writing to stderr even with isolation enabled. - let result = Write::write(&mut { self }, bytes); - // Stdout is buffered, flush to make sure it appears on the - // screen. This is the write() syscall of the interpreted - // program, we want it to correspond to a write() syscall on - // the host -- there is no good in adding extra buffering - // here. - io::stdout().flush().unwrap(); - match result { - Ok(write_size) => ecx.return_write_success(write_size, dest), - Err(e) => ecx.set_last_error_and_return(e, dest), - } - } - - fn is_tty(&self, communicate_allowed: bool) -> bool { - communicate_allowed && self.is_terminal() - } -} - -impl FileDescription for io::Stderr { - fn name(&self) -> &'static str { - "stderr" - } - - fn write<'tcx>( - &self, - _self_ref: &FileDescriptionRef, - _communicate_allowed: bool, - ptr: Pointer, - len: usize, - dest: &MPlaceTy<'tcx>, - ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx> { - let bytes = ecx.read_bytes_ptr_strip_provenance(ptr, Size::from_bytes(len))?; - // We allow writing to stderr even with isolation enabled. - // No need to flush, stderr is not buffered. - let result = Write::write(&mut { self }, bytes); - match result { - Ok(write_size) => ecx.return_write_success(write_size, dest), - Err(e) => ecx.set_last_error_and_return(e, dest), - } - } - - fn is_tty(&self, communicate_allowed: bool) -> bool { - communicate_allowed && self.is_terminal() - } -} - -/// Like /dev/null -#[derive(Debug)] -pub struct NullOutput; - -impl FileDescription for NullOutput { - fn name(&self) -> &'static str { - "stderr and stdout" - } - - fn write<'tcx>( - &self, - _self_ref: &FileDescriptionRef, - _communicate_allowed: bool, - _ptr: Pointer, - len: usize, - dest: &MPlaceTy<'tcx>, - ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx> { - // We just don't write anything, but report to the user that we did. - ecx.return_write_success(len, dest) - } -} - -/// Structure contains both the file description and its unique identifier. -#[derive(Clone, Debug)] -pub struct FileDescWithId { - id: FdId, - file_description: Box, -} - -#[derive(Clone, Debug)] -pub struct FileDescriptionRef(Rc>); - -impl Deref for FileDescriptionRef { - type Target = dyn FileDescription; - - fn deref(&self) -> &Self::Target { - &*self.0.file_description - } -} - -impl FileDescriptionRef { - fn new(fd: impl FileDescription, id: FdId) -> Self { - FileDescriptionRef(Rc::new(FileDescWithId { id, file_description: Box::new(fd) })) - } - - pub fn close<'tcx>( - self, - communicate_allowed: bool, - ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx, io::Result<()>> { - // Destroy this `Rc` using `into_inner` so we can call `close` instead of - // implicitly running the destructor of the file description. - let id = self.get_id(); - match Rc::into_inner(self.0) { - Some(fd) => { - // Remove entry from the global epoll_event_interest table. - ecx.machine.epoll_interests.remove(id); - - fd.file_description.close(communicate_allowed, ecx) - } - None => interp_ok(Ok(())), - } - } - - pub fn downgrade(&self) -> WeakFileDescriptionRef { - WeakFileDescriptionRef { weak_ref: Rc::downgrade(&self.0) } - } - - pub fn get_id(&self) -> FdId { - self.0.id - } -} - -/// Holds a weak reference to the actual file description. -#[derive(Clone, Debug, Default)] -pub struct WeakFileDescriptionRef { - weak_ref: Weak>, -} - -impl WeakFileDescriptionRef { - pub fn upgrade(&self) -> Option { - if let Some(file_desc_with_id) = self.weak_ref.upgrade() { - return Some(FileDescriptionRef(file_desc_with_id)); - } - None - } -} - -impl VisitProvenance for WeakFileDescriptionRef { - fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { - // A weak reference can never be the only reference to some pointer or place. - // Since the actual file description is tracked by strong ref somewhere, - // it is ok to make this a NOP operation. - } -} - -/// A unique id for file descriptions. While we could use the address, considering that -/// is definitely unique, the address would expose interpreter internal state when used -/// for sorting things. So instead we generate a unique id per file description that stays -/// the same even if a file descriptor is duplicated and gets a new integer file descriptor. -#[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)] -pub struct FdId(usize); - -/// The file descriptor table -#[derive(Debug)] -pub struct FdTable { - pub fds: BTreeMap, - /// Unique identifier for file description, used to differentiate between various file description. - next_file_description_id: FdId, -} - -impl VisitProvenance for FdTable { - fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { - // All our FileDescription instances do not have any tags. - } -} - -impl FdTable { - fn new() -> Self { - FdTable { fds: BTreeMap::new(), next_file_description_id: FdId(0) } - } - pub(crate) fn init(mute_stdout_stderr: bool) -> FdTable { - let mut fds = FdTable::new(); - fds.insert_new(io::stdin()); - if mute_stdout_stderr { - assert_eq!(fds.insert_new(NullOutput), 1); - assert_eq!(fds.insert_new(NullOutput), 2); - } else { - assert_eq!(fds.insert_new(io::stdout()), 1); - assert_eq!(fds.insert_new(io::stderr()), 2); - } - fds - } - - pub fn new_ref(&mut self, fd: impl FileDescription) -> FileDescriptionRef { - let file_handle = FileDescriptionRef::new(fd, self.next_file_description_id); - self.next_file_description_id = FdId(self.next_file_description_id.0.strict_add(1)); - file_handle - } - - /// Insert a new file description to the FdTable. - pub fn insert_new(&mut self, fd: impl FileDescription) -> i32 { - let fd_ref = self.new_ref(fd); - self.insert(fd_ref) - } - - pub fn insert(&mut self, fd_ref: FileDescriptionRef) -> i32 { - self.insert_with_min_num(fd_ref, 0) - } - - /// Insert a file description, giving it a file descriptor that is at least `min_fd_num`. - fn insert_with_min_num(&mut self, file_handle: FileDescriptionRef, min_fd_num: i32) -> i32 { - // Find the lowest unused FD, starting from min_fd. If the first such unused FD is in - // between used FDs, the find_map combinator will return it. If the first such unused FD - // is after all other used FDs, the find_map combinator will return None, and we will use - // the FD following the greatest FD thus far. - let candidate_new_fd = - self.fds.range(min_fd_num..).zip(min_fd_num..).find_map(|((fd_num, _fd), counter)| { - if *fd_num != counter { - // There was a gap in the fds stored, return the first unused one - // (note that this relies on BTreeMap iterating in key order) - Some(counter) - } else { - // This fd is used, keep going - None - } - }); - let new_fd_num = candidate_new_fd.unwrap_or_else(|| { - // find_map ran out of BTreeMap entries before finding a free fd, use one plus the - // maximum fd in the map - self.fds.last_key_value().map(|(fd_num, _)| fd_num.strict_add(1)).unwrap_or(min_fd_num) - }); - - self.fds.try_insert(new_fd_num, file_handle).unwrap(); - new_fd_num - } - - pub fn get(&self, fd_num: i32) -> Option { - let fd = self.fds.get(&fd_num)?; - Some(fd.clone()) - } - - pub fn remove(&mut self, fd_num: i32) -> Option { - self.fds.remove(&fd_num) - } - - pub fn is_fd_num(&self, fd_num: i32) -> bool { - self.fds.contains_key(&fd_num) - } -} - impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { fn dup(&mut self, old_fd_num: i32) -> InterpResult<'tcx, Scalar> { @@ -472,7 +119,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { throw_unsup_format!("unsupported flags {:#x}", op); }; - let result = fd.flock(this.machine.communicate(), parsed_op)?; + let result = fd.as_unix()?.flock(this.machine.communicate(), parsed_op)?; drop(fd); // return `0` if flock is successful let result = result.map(|()| 0i32); @@ -602,7 +249,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let Ok(offset) = u64::try_from(offset) else { return this.set_last_error_and_return(LibcError("EINVAL"), dest); }; - fd.pread(communicate, offset, buf, count, dest, this)? + fd.as_unix()?.pread(communicate, offset, buf, count, dest, this)? } }; interp_ok(()) @@ -642,46 +289,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let Ok(offset) = u64::try_from(offset) else { return this.set_last_error_and_return(LibcError("EINVAL"), dest); }; - fd.pwrite(communicate, buf, count, offset, dest, this)? + fd.as_unix()?.pwrite(communicate, buf, count, offset, dest, this)? } }; interp_ok(()) } - - /// Helper to implement `FileDescription::read`: - /// This is only used when `read` is successful. - /// `actual_read_size` should be the return value of some underlying `read` call that used - /// `bytes` as its output buffer. - /// The length of `bytes` must not exceed either the host's or the target's `isize`. - /// `bytes` is written to `buf` and the size is written to `dest`. - fn return_read_success( - &mut self, - buf: Pointer, - bytes: &[u8], - actual_read_size: usize, - dest: &MPlaceTy<'tcx>, - ) -> InterpResult<'tcx> { - let this = self.eval_context_mut(); - // If reading to `bytes` did not fail, we write those bytes to the buffer. - // Crucially, if fewer than `bytes.len()` bytes were read, only write - // that much into the output buffer! - this.write_bytes_ptr(buf, bytes[..actual_read_size].iter().copied())?; - - // The actual read size is always less than what got originally requested so this cannot fail. - this.write_int(u64::try_from(actual_read_size).unwrap(), dest)?; - interp_ok(()) - } - - /// Helper to implement `FileDescription::write`: - /// This function is only used when `write` is successful, and writes `actual_write_size` to `dest` - fn return_write_success( - &mut self, - actual_write_size: usize, - dest: &MPlaceTy<'tcx>, - ) -> InterpResult<'tcx> { - let this = self.eval_context_mut(); - // The actual write size is always less than what got originally requested so this cannot fail. - this.write_int(u64::try_from(actual_write_size).unwrap(), dest)?; - interp_ok(()) - } } diff --git a/src/shims/unix/fs.rs b/src/shims/unix/fs.rs index 091def7ac6..e9cbdcca1b 100644 --- a/src/shims/unix/fs.rs +++ b/src/shims/unix/fs.rs @@ -2,7 +2,8 @@ use std::borrow::Cow; use std::fs::{ - DirBuilder, File, FileType, OpenOptions, ReadDir, read_dir, remove_dir, remove_file, rename, + DirBuilder, File, FileType, Metadata, OpenOptions, ReadDir, read_dir, remove_dir, remove_file, + rename, }; use std::io::{self, ErrorKind, IsTerminal, Read, Seek, SeekFrom, Write}; use std::path::{Path, PathBuf}; @@ -14,8 +15,9 @@ use rustc_data_structures::fx::FxHashMap; use self::fd::FlockOp; use self::shims::time::system_time_to_duration; use crate::helpers::check_min_arg_count; +use crate::shims::files::{EvalContextExt as _, FileDescription, FileDescriptionRef}; use crate::shims::os_str::bytes_to_os_str; -use crate::shims::unix::fd::FileDescriptionRef; +use crate::shims::unix::fd::UnixFileDescription; use crate::shims::unix::*; use crate::*; @@ -66,6 +68,55 @@ impl FileDescription for FileHandle { } } + fn seek<'tcx>( + &self, + communicate_allowed: bool, + offset: SeekFrom, + ) -> InterpResult<'tcx, io::Result> { + assert!(communicate_allowed, "isolation should have prevented even opening a file"); + interp_ok((&mut &self.file).seek(offset)) + } + + fn close<'tcx>( + self: Box, + communicate_allowed: bool, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx, io::Result<()>> { + assert!(communicate_allowed, "isolation should have prevented even opening a file"); + // We sync the file if it was opened in a mode different than read-only. + if self.writable { + // `File::sync_all` does the checks that are done when closing a file. We do this to + // to handle possible errors correctly. + let result = self.file.sync_all(); + // Now we actually close the file and return the result. + drop(*self); + interp_ok(result) + } else { + // We drop the file, this closes it but ignores any errors + // produced when closing it. This is done because + // `File::sync_all` cannot be done over files like + // `/dev/urandom` which are read-only. Check + // https://github.com/rust-lang/miri/issues/999#issuecomment-568920439 + // for a deeper discussion. + drop(*self); + interp_ok(Ok(())) + } + } + + fn metadata<'tcx>(&self) -> InterpResult<'tcx, io::Result> { + interp_ok(self.file.metadata()) + } + + fn is_tty(&self, communicate_allowed: bool) -> bool { + communicate_allowed && self.file.is_terminal() + } + + fn as_unix<'tcx>(&self) -> InterpResult<'tcx, &dyn UnixFileDescription> { + interp_ok(self) + } +} + +impl UnixFileDescription for FileHandle { fn pread<'tcx>( &self, communicate_allowed: bool, @@ -128,41 +179,6 @@ impl FileDescription for FileHandle { } } - fn seek<'tcx>( - &self, - communicate_allowed: bool, - offset: SeekFrom, - ) -> InterpResult<'tcx, io::Result> { - assert!(communicate_allowed, "isolation should have prevented even opening a file"); - interp_ok((&mut &self.file).seek(offset)) - } - - fn close<'tcx>( - self: Box, - communicate_allowed: bool, - _ecx: &mut MiriInterpCx<'tcx>, - ) -> InterpResult<'tcx, io::Result<()>> { - assert!(communicate_allowed, "isolation should have prevented even opening a file"); - // We sync the file if it was opened in a mode different than read-only. - if self.writable { - // `File::sync_all` does the checks that are done when closing a file. We do this to - // to handle possible errors correctly. - let result = self.file.sync_all(); - // Now we actually close the file and return the result. - drop(*self); - interp_ok(result) - } else { - // We drop the file, this closes it but ignores any errors - // produced when closing it. This is done because - // `File::sync_all` cannot be done over files like - // `/dev/urandom` which are read-only. Check - // https://github.com/rust-lang/miri/issues/999#issuecomment-568920439 - // for a deeper discussion. - drop(*self); - interp_ok(Ok(())) - } - } - fn flock<'tcx>( &self, communicate_allowed: bool, @@ -257,10 +273,6 @@ impl FileDescription for FileHandle { compile_error!("flock is supported only on UNIX and Windows hosts"); } } - - fn is_tty(&self, communicate_allowed: bool) -> bool { - communicate_allowed && self.file.is_terminal() - } } impl<'tcx> EvalContextExtPrivate<'tcx> for crate::MiriInterpCx<'tcx> {} @@ -1675,16 +1687,7 @@ impl FileMetadata { return interp_ok(Err(LibcError("EBADF"))); }; - let file = &fd - .downcast::() - .ok_or_else(|| { - err_unsup_format!( - "obtaining metadata is only supported on file-backed file descriptors" - ) - })? - .file; - - let metadata = file.metadata(); + let metadata = fd.metadata()?; drop(fd); FileMetadata::from_meta(ecx, metadata) } diff --git a/src/shims/unix/linux/epoll.rs b/src/shims/unix/linux/epoll.rs index de108665e9..3cd4098024 100644 --- a/src/shims/unix/linux/epoll.rs +++ b/src/shims/unix/linux/epoll.rs @@ -5,8 +5,8 @@ use std::rc::{Rc, Weak}; use std::time::Duration; use crate::concurrency::VClock; -use crate::shims::unix::fd::{FdId, FileDescriptionRef, WeakFileDescriptionRef}; -use crate::shims::unix::*; +use crate::shims::files::{FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef}; +use crate::shims::unix::UnixFileDescription; use crate::*; /// An `Epoll` file descriptor connects file handles and epoll events @@ -152,8 +152,14 @@ impl FileDescription for Epoll { ) -> InterpResult<'tcx, io::Result<()>> { interp_ok(Ok(())) } + + fn as_unix<'tcx>(&self) -> InterpResult<'tcx, &dyn UnixFileDescription> { + interp_ok(self) + } } +impl UnixFileDescription for Epoll {} + /// The table of all EpollEventInterest. /// The BTreeMap key is the FdId of an active file description registered with /// any epoll instance. The value is a list of EpollEventInterest associated @@ -595,7 +601,7 @@ fn check_and_update_one_event_interest<'tcx>( ecx: &MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, bool> { // Get the bitmask of ready events for a file description. - let ready_events_bitmask = fd_ref.get_epoll_ready_events()?.get_event_bitmask(ecx); + let ready_events_bitmask = fd_ref.as_unix()?.get_epoll_ready_events()?.get_event_bitmask(ecx); let epoll_event_interest = interest.borrow(); // This checks if any of the events specified in epoll_event_interest.events // match those in ready_events. diff --git a/src/shims/unix/linux/eventfd.rs b/src/shims/unix/linux/eventfd.rs index 63b7d37b13..0f6a383fbb 100644 --- a/src/shims/unix/linux/eventfd.rs +++ b/src/shims/unix/linux/eventfd.rs @@ -4,9 +4,9 @@ use std::io; use std::io::ErrorKind; use crate::concurrency::VClock; -use crate::shims::unix::fd::{FileDescriptionRef, WeakFileDescriptionRef}; +use crate::shims::files::{FileDescription, FileDescriptionRef, WeakFileDescriptionRef}; +use crate::shims::unix::fd::UnixFileDescription; use crate::shims::unix::linux::epoll::{EpollReadyEvents, EvalContextExt as _}; -use crate::shims::unix::*; use crate::*; /// Maximum value that the eventfd counter can hold. @@ -37,17 +37,6 @@ impl FileDescription for Event { "event" } - fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> { - // We only check the status of EPOLLIN and EPOLLOUT flags for eventfd. If other event flags - // need to be supported in the future, the check should be added here. - - interp_ok(EpollReadyEvents { - epollin: self.counter.get() != 0, - epollout: self.counter.get() != MAX_COUNTER, - ..EpollReadyEvents::new() - }) - } - fn close<'tcx>( self: Box, _communicate_allowed: bool, @@ -121,6 +110,23 @@ impl FileDescription for Event { let weak_eventfd = self_ref.downgrade(); eventfd_write(num, buf_place, dest, weak_eventfd, ecx) } + + fn as_unix<'tcx>(&self) -> InterpResult<'tcx, &dyn UnixFileDescription> { + interp_ok(self) + } +} + +impl UnixFileDescription for Event { + fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> { + // We only check the status of EPOLLIN and EPOLLOUT flags for eventfd. If other event flags + // need to be supported in the future, the check should be added here. + + interp_ok(EpollReadyEvents { + epollin: self.counter.get() != 0, + epollout: self.counter.get() != MAX_COUNTER, + ..EpollReadyEvents::new() + }) + } } impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} diff --git a/src/shims/unix/mod.rs b/src/shims/unix/mod.rs index c8c25c636e..0ece24da6c 100644 --- a/src/shims/unix/mod.rs +++ b/src/shims/unix/mod.rs @@ -16,7 +16,7 @@ mod solarish; // All the Unix-specific extension traits pub use self::env::{EvalContextExt as _, UnixEnvVars}; -pub use self::fd::{EvalContextExt as _, FdTable, FileDescription}; +pub use self::fd::{EvalContextExt as _, UnixFileDescription}; pub use self::fs::{DirTable, EvalContextExt as _}; pub use self::linux::epoll::EpollInterestTable; pub use self::mem::EvalContextExt as _; diff --git a/src/shims/unix/unnamed_socket.rs b/src/shims/unix/unnamed_socket.rs index 8ccce7c198..d7348e9ea9 100644 --- a/src/shims/unix/unnamed_socket.rs +++ b/src/shims/unix/unnamed_socket.rs @@ -10,9 +10,11 @@ use std::io::{ErrorKind, Read}; use rustc_abi::Size; use crate::concurrency::VClock; -use crate::shims::unix::fd::{FileDescriptionRef, WeakFileDescriptionRef}; +use crate::shims::files::{ + EvalContextExt as _, FileDescription, FileDescriptionRef, WeakFileDescriptionRef, +}; +use crate::shims::unix::fd::UnixFileDescription; use crate::shims::unix::linux::epoll::{EpollReadyEvents, EvalContextExt as _}; -use crate::shims::unix::*; use crate::*; /// The maximum capacity of the socketpair buffer in bytes. @@ -60,52 +62,6 @@ impl FileDescription for AnonSocket { "socketpair" } - fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> { - // We only check the status of EPOLLIN, EPOLLOUT, EPOLLHUP and EPOLLRDHUP flags. - // If other event flags need to be supported in the future, the check should be added here. - - let mut epoll_ready_events = EpollReadyEvents::new(); - - // Check if it is readable. - if let Some(readbuf) = &self.readbuf { - if !readbuf.borrow().buf.is_empty() { - epoll_ready_events.epollin = true; - } - } else { - // Without a read buffer, reading never blocks, so we are always ready. - epoll_ready_events.epollin = true; - } - - // Check if is writable. - if let Some(peer_fd) = self.peer_fd().upgrade() { - if let Some(writebuf) = &peer_fd.downcast::().unwrap().readbuf { - let data_size = writebuf.borrow().buf.len(); - let available_space = MAX_SOCKETPAIR_BUFFER_CAPACITY.strict_sub(data_size); - if available_space != 0 { - epoll_ready_events.epollout = true; - } - } else { - // Without a write buffer, writing never blocks. - epoll_ready_events.epollout = true; - } - } else { - // Peer FD has been closed. This always sets both the RDHUP and HUP flags - // as we do not support `shutdown` that could be used to partially close the stream. - epoll_ready_events.epollrdhup = true; - epoll_ready_events.epollhup = true; - // Since the peer is closed, even if no data is available reads will return EOF and - // writes will return EPIPE. In other words, they won't block, so we mark this as ready - // for read and write. - epoll_ready_events.epollin = true; - epoll_ready_events.epollout = true; - // If there is data lost in peer_fd, set EPOLLERR. - if self.peer_lost_data.get() { - epoll_ready_events.epollerr = true; - } - } - interp_ok(epoll_ready_events) - } - fn close<'tcx>( self: Box, _communicate_allowed: bool, @@ -251,6 +207,58 @@ impl FileDescription for AnonSocket { ecx.return_write_success(actual_write_size, dest) } + + fn as_unix<'tcx>(&self) -> InterpResult<'tcx, &dyn UnixFileDescription> { + interp_ok(self) + } +} + +impl UnixFileDescription for AnonSocket { + fn get_epoll_ready_events<'tcx>(&self) -> InterpResult<'tcx, EpollReadyEvents> { + // We only check the status of EPOLLIN, EPOLLOUT, EPOLLHUP and EPOLLRDHUP flags. + // If other event flags need to be supported in the future, the check should be added here. + + let mut epoll_ready_events = EpollReadyEvents::new(); + + // Check if it is readable. + if let Some(readbuf) = &self.readbuf { + if !readbuf.borrow().buf.is_empty() { + epoll_ready_events.epollin = true; + } + } else { + // Without a read buffer, reading never blocks, so we are always ready. + epoll_ready_events.epollin = true; + } + + // Check if is writable. + if let Some(peer_fd) = self.peer_fd().upgrade() { + if let Some(writebuf) = &peer_fd.downcast::().unwrap().readbuf { + let data_size = writebuf.borrow().buf.len(); + let available_space = MAX_SOCKETPAIR_BUFFER_CAPACITY.strict_sub(data_size); + if available_space != 0 { + epoll_ready_events.epollout = true; + } + } else { + // Without a write buffer, writing never blocks. + epoll_ready_events.epollout = true; + } + } else { + // Peer FD has been closed. This always sets both the RDHUP and HUP flags + // as we do not support `shutdown` that could be used to partially close the stream. + epoll_ready_events.epollrdhup = true; + epoll_ready_events.epollhup = true; + // Since the peer is closed, even if no data is available reads will return EOF and + // writes will return EPIPE. In other words, they won't block, so we mark this as ready + // for read and write. + epoll_ready_events.epollin = true; + epoll_ready_events.epollout = true; + // If there is data lost in peer_fd, set EPOLLERR. + if self.peer_lost_data.get() { + epoll_ready_events.epollerr = true; + } + } + interp_ok(epoll_ready_events) + } } impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}