Skip to content

Commit

Permalink
Rework Spank logic to support a kerberos option
Browse files Browse the repository at this point in the history
  • Loading branch information
3XX0 committed Jan 17, 2025
1 parent f341816 commit 6f4a8c6
Show file tree
Hide file tree
Showing 4 changed files with 148 additions and 51 deletions.
2 changes: 1 addition & 1 deletion pkg/plugstack.conf
Original file line number Diff line number Diff line change
@@ -1 +1 @@
required spank_sybil.so min_tkt_lifetime=
required spank_sybil.so default=no min_tkt_lifetime=
172 changes: 134 additions & 38 deletions src/slurm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,10 @@ use tracing_subscriber::fmt::MakeWriter;

const ERROR_PREFIX: &str = "spank_sybil";

const OPT_FORWARD_CREDS: &str = "default";
const ENV_FORWARD_CREDS: &str = "SYBIL_SPANK_KERBEROS";
const ENV_REQUIRE_CREDS: &CStr = c"SYBIL_SPANK_KERBEROS_ENABLED";
const OPT_MIN_TKT_LIFETIME: &str = "min_tkt_lifetime";
const ENV_MIN_TKT_LIFETIME: &str = "SYBIL_MIN_TKT_LIFETIME";

#[no_mangle]
#[used]
Expand All @@ -55,6 +57,51 @@ thread_local! {
static SPANK_ERROR: Cell<Option<SpankError>> = const { Cell::new(None) };
}

enum ForwardCredsOpt {
Yes,
No,
Auto,
Force,
}

impl ForwardCredsOpt {
fn register(ctx: spank::spank_t) -> Result<(), SpankError> {
let mut opt = spank::spank_option {
name: c"kerberos".as_ptr() as _,
arginfo: c"[yes|no|auto|force]".as_ptr() as _,
usage: c"[sybil] forward kerberos credentials to the allocated nodes".as_ptr() as _,
has_arg: 1,
val: 0,
cb: Some(Self::callback),
};
match unsafe { spank::spank_option_register(ctx, &mut opt) } {
spank::ESPANK_SUCCESS => Ok(()),
_ => Err(SpankError::Fatal("failed to register command line option".into())),
}
}

extern "C" fn callback(_val: c_int, opt: *const c_char, _remote: c_int) -> c_int {
match unsafe { CStr::from_ptr(opt).to_string_lossy().as_ref() } {
"yes" | "no" | "auto" | "force" => spank::SLURM_SUCCESS as _,
_ => spank::SLURM_ERROR as _,
}
}

fn from_env(args: &SpankArgs) -> Self {
match env::var("_SLURM_SPANK_OPTION_sybil_kerberos")
.ok()
.or_else(|| env::var(ENV_FORWARD_CREDS).ok())
.as_deref()
.or_else(|| args.get(OPT_FORWARD_CREDS))
{
Some("yes") => Self::Yes,
Some("auto") => Self::Auto,
Some("force") => Self::Force,
Some("no") | Some(_) | None => Self::No,
}
}
}

pub struct SpankLogger;

impl io::Write for SpankLogger {
Expand Down Expand Up @@ -184,60 +231,104 @@ pub extern "C" fn slurm_spank_init(ctx: spank::spank_t, _argc: c_int, _argv: *mu
Err(err) => return SpankError::Fatal(format!("failed to initialize runtime: {}", err)).into(),
};

if let Err(err) = ForwardCredsOpt::register(ctx) {
return err.into();
}

if unsafe { spank::spank_remote(ctx) == 0 } {
return spank::SLURM_SUCCESS as _;
}

match unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_ID, &mut job_id as *mut u32) } {
match unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_ID, &mut job_id) } {
spank::ESPANK_SUCCESS => JOB_ID.set(job_id),
_ => return SpankError::Fatal("failed to get job ID".into()).into(),
};

if unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_UID, &mut job_uid as *mut u32) }
!= spank::ESPANK_SUCCESS
{
if unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_UID, &mut job_uid) } != spank::ESPANK_SUCCESS {
return SpankError::Fatal("failed to get job UID".into()).into();
};
}
if unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_STEPID, &mut step_id) } != spank::ESPANK_SUCCESS {
return SpankError::Fatal("failed to get jobstep ID".into()).into();
}

match unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_STEPID, &mut step_id as *mut u32) } {
spank::ESPANK_SUCCESS if step_id == spank::SLURM_EXTERN_CONT => RUNTIME.with_borrow(|rt| {
rt.block_on(async {
ops::fetch_credentials(job_uid)
.await
.map_or_else(|err| err.defer().into(), |_| spank::SLURM_SUCCESS as _)
})
}),
spank::ESPANK_SUCCESS => spank::SLURM_SUCCESS as _,
_ => SpankError::Fatal("failed to get jobstep ID".into()).into(),
if step_id != spank::SLURM_EXTERN_CONT {
return spank::SLURM_SUCCESS as _;
}

RUNTIME.with_borrow(|rt| {
rt.block_on(async {
ops::fetch_credentials(job_uid)
.await
.map_or_else(|err| err.defer().into(), |_| spank::SLURM_SUCCESS as _)
})
})
}

#[no_mangle]
pub extern "C" fn slurm_spank_init_post_opt(_ctx: spank::spank_t, argc: c_int, argv: *mut *mut c_char) -> c_int {
let min_tkt_lifetime = env::var(ENV_MIN_TKT_LIFETIME)
.ok()
.or_else(|| SpankArgs::new(argc, argv).get(OPT_MIN_TKT_LIFETIME).map(String::from));

match unsafe { spank::spank_context() } {
spank::spank_context_S_CTX_LOCAL | spank::spank_context_S_CTX_ALLOCATOR => RUNTIME.with_borrow(|rt| {
rt.block_on(async {
ops::store_credentials(min_tkt_lifetime.as_deref())
.await
.map_or_else(|err| err.fatal().into(), |_| spank::SLURM_SUCCESS as _)
})
}),
_ => spank::SLURM_SUCCESS as _,
spank::spank_context_S_CTX_LOCAL | spank::spank_context_S_CTX_ALLOCATOR => (),
_ => return spank::SLURM_SUCCESS as _,
};

let require_creds = ENV_REQUIRE_CREDS.to_str().unwrap();
if env::var(require_creds).is_ok() {
return spank::SLURM_SUCCESS as _;
}

let args = SpankArgs::new(argc, argv);
let forward = ForwardCredsOpt::from_env(&args);
let lifetime = args.get(OPT_MIN_TKT_LIFETIME);

match forward {
ForwardCredsOpt::Yes => {
env::set_var(require_creds, "1");
if let Err(err) = ops::check_credentials(lifetime) {
return err.fatal().into();
}
}
ForwardCredsOpt::No => {
env::set_var(require_creds, "0");
return spank::SLURM_SUCCESS as _;
}
ForwardCredsOpt::Auto => {
if ops::check_credentials(lifetime).is_ok() {
env::set_var(require_creds, "1");
} else {
env::set_var(require_creds, "0");
return spank::SLURM_SUCCESS as _;
}
}
ForwardCredsOpt::Force => env::set_var(require_creds, "1"),
};

RUNTIME.with_borrow(|rt| {
rt.block_on(async {
ops::store_credentials()
.await
.map_or_else(|err| err.fatal().into(), |_| spank::SLURM_SUCCESS as _)
})
})
}

#[no_mangle]
pub extern "C" fn slurm_spank_user_init(_ctx: spank::spank_t, _argc: c_int, _argv: *mut *mut c_char) -> c_int {
pub extern "C" fn slurm_spank_user_init(ctx: spank::spank_t, _argc: c_int, _argv: *mut *mut c_char) -> c_int {
let mut require_creds = [0u8; 2];

unsafe {
spank::spank_getenv(
ctx,
ENV_REQUIRE_CREDS.as_ptr(),
require_creds.as_mut_ptr() as _,
require_creds.len() as _,
)
};

match SpankError::deferred_error() {
Some(err) => {
Some(err) if require_creds[0] == b'1' => {
err.log();
err.into()
}
None => spank::SLURM_SUCCESS as _,
_ => spank::SLURM_SUCCESS as _,
}
}

Expand All @@ -253,13 +344,18 @@ pub extern "C" fn slurm_spank_task_init(_ctx: spank::spank_t, _argc: c_int, _arg
pub extern "C" fn slurm_spank_exit(ctx: spank::spank_t, _argc: c_int, _argv: *mut *mut c_char) -> c_int {
let mut step_id = 0u32;

if unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_STEPID, &mut step_id as *mut u32) }
== spank::ESPANK_SUCCESS
&& step_id == spank::SLURM_EXTERN_CONT
{
RUNTIME.with_borrow(|rt| rt.block_on(async { ops::terminate().await }));
SPANK_ERROR.set(None);

if unsafe { spank::spank_remote(ctx) == 0 } {
return spank::SLURM_SUCCESS as _;
}

SPANK_ERROR.take();
if unsafe { spank::spank_get_item(ctx, spank::spank_item_S_JOB_STEPID, &mut step_id) } != spank::ESPANK_SUCCESS {
return SpankError::Fatal("failed to get jobstep ID".into()).into();
}

if step_id == spank::SLURM_EXTERN_CONT {
RUNTIME.with_borrow(|rt| rt.block_on(async { ops::cleanup().await }));
}
spank::SLURM_SUCCESS as _
}
19 changes: 12 additions & 7 deletions src/slurm/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

use crate::slurm::*;
use crate::trace::*;

use std::{cell::Cell, error::Error, result::Result};
Expand All @@ -12,21 +13,21 @@ thread_local! {
static REFRESH_PROCESS: Cell<Option<crate::PrivSepChild>> = const { Cell::new(None) };
}

pub async fn store_credentials(min_tkt_lifetime: Option<&str>) -> Result<(), Box<dyn Error>> {
pub fn check_credentials(lifetime: Option<&str>) -> Result<(), Box<dyn Error>> {
let creds = crate::krb::default_ccache()
.and_then(|ccache| crate::krb::Credentials::fetch(&ccache, Some("5m"), false))
.map_err(|err| {
tracing::error!(error = err.chain(), "could not find active credentials");
"Kerberos credentials not found, make sure that `klist` shows active tickets"
})?;

if min_tkt_lifetime.is_some_and(|l| !l.is_empty() && l != "0") {
if lifetime.is_some_and(|l| !l.is_empty() && l != "0") {
creds
.will_last_for(min_tkt_lifetime.unwrap())
.will_last_for(lifetime.unwrap())
.unwrap_or_else(|err| {
tracing::error!(
error = err.chain(),
lifetime = min_tkt_lifetime.display(),
lifetime = lifetime.display(),
"could not evaluate minimum ticket lifetime requirement"
);
false
Expand All @@ -38,13 +39,17 @@ pub async fn store_credentials(min_tkt_lifetime: Option<&str>) -> Result<(), Box
)?;
}

Ok(())
}

pub async fn store_credentials() -> Result<(), crate::Error> {
let mut client = crate::new_client(None::<String>, None, crate::DelegatePolicy::ForceDelegate).await?;
client.authenticate().await?;
client.store().await?;
Ok(())
}

pub async fn fetch_credentials(uid: u32) -> Result<(), Box<dyn Error>> {
pub async fn fetch_credentials(uid: u32) -> Result<(), crate::Error> {
let mut client = crate::new_client(None::<String>, None, crate::DelegatePolicy::None).await?;
client.authenticate().await?;

Expand All @@ -53,14 +58,14 @@ pub async fn fetch_credentials(uid: u32) -> Result<(), Box<dyn Error>> {
.await?;

if let Some(ref mut proc) = proc {
tokio::task::spawn_blocking(proc.copy_output_blocking(crate::slurm::SpankLogger.make_writer()));
tokio::task::spawn_blocking(proc.copy_output_blocking(SpankLogger.make_writer()));
}

REFRESH_PROCESS.set(proc);
Ok(())
}

pub async fn terminate() {
pub async fn cleanup() {
if let Some(mut proc) = REFRESH_PROCESS.take() {
proc.kill();
proc.wait().await;
Expand Down
6 changes: 1 addition & 5 deletions src/slurm/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,9 @@ use nix::{
unistd,
};
use std::{
ffi::CStr,
io::Error as IOError,
ops::Deref,
os::{
fd::AsRawFd,
raw::{c_char, c_void},
},
os::{fd::AsRawFd, raw::c_void},
ptr::{self, NonNull},
slice,
};
Expand Down

0 comments on commit 6f4a8c6

Please sign in to comment.