feat: add agent module with Nginx service commands and routes
- Introduced a new agent module with commands for managing Nginx configurations. - Implemented `NginxService` for handling reload, validation, and configuration writing. - Added routes for status, validation, and configuration writing using Axum. - Created necessary command files: `reload.rs`, `run.rs`, `validate.rs`, `write_config.rs`. - Updated `Cargo.toml` and `Cargo.lock` to include new dependencies. - Added `.gitignore` for the agent module. - Updated `justfile` to include OpenAPI generation for the agent.
This commit is contained in:
292
apps/agent/src/commands.rs
Normal file
292
apps/agent/src/commands.rs
Normal file
@@ -0,0 +1,292 @@
|
||||
mod reload;
|
||||
mod run;
|
||||
mod validate;
|
||||
mod write_config;
|
||||
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
sync::{
|
||||
Arc,
|
||||
atomic::{AtomicU64, Ordering},
|
||||
},
|
||||
};
|
||||
|
||||
use tokio::sync::{Mutex, RwLock};
|
||||
use tokio_cron_scheduler::{Job, JobScheduler};
|
||||
use tracing::{error, info};
|
||||
|
||||
use crate::commands::write_config::INTERNAL_CONFIG_FOLDER_NAME;
|
||||
|
||||
const OLD_CONFIG_CLEANUP_THRESHOLD: u64 = 3600;
|
||||
|
||||
pub struct NginxService {
|
||||
// lock for nginx reload, and timestamp tracking
|
||||
nginx_lock: Mutex<()>,
|
||||
last_applied: AtomicU64,
|
||||
// lock for write_config per (config_name, timestamp)
|
||||
#[allow(clippy::type_complexity)]
|
||||
write_config_lock: RwLock<HashMap<(String, u64), Arc<RwLock<()>>>>,
|
||||
// commands
|
||||
reload_cmd: Arc<reload::ReloadCommand>,
|
||||
validate_cmd: Arc<validate::ValidateCommand>,
|
||||
write_config_cmd: Arc<write_config::WriteConfigCommand>,
|
||||
}
|
||||
|
||||
impl NginxService {
|
||||
pub async fn new(
|
||||
scheduler: Arc<JobScheduler>,
|
||||
nginx_config_dir: std::path::PathBuf,
|
||||
) -> Result<Arc<Self>, Box<dyn std::error::Error + Send + Sync>> {
|
||||
let nginx_service = Arc::new(NginxService {
|
||||
nginx_lock: Mutex::new(()),
|
||||
last_applied: AtomicU64::new(0),
|
||||
write_config_lock: RwLock::new(HashMap::new()),
|
||||
// commands
|
||||
reload_cmd: Arc::new(reload::ReloadCommand::default()),
|
||||
validate_cmd: Arc::new(validate::ValidateCommand::new(nginx_config_dir.clone())),
|
||||
write_config_cmd: Arc::new(write_config::WriteConfigCommand::new(nginx_config_dir)),
|
||||
});
|
||||
let mut nginx_service_clone = nginx_service.clone();
|
||||
|
||||
scheduler
|
||||
.clone()
|
||||
// cleanup every 10 minutes
|
||||
.add(Job::new_async("0 */10 * * * *", move |_uuid, _l| {
|
||||
info!("Running nginx_service cleanup job");
|
||||
let nginx_service_clone = nginx_service_clone.clone();
|
||||
let job = Box::pin(async move {
|
||||
nginx_service_clone.cleanup_unused_lock().await;
|
||||
});
|
||||
info!("NginxService cleanup job completed");
|
||||
job
|
||||
})?)
|
||||
.await?;
|
||||
|
||||
nginx_service_clone = nginx_service.clone();
|
||||
|
||||
scheduler
|
||||
.clone()
|
||||
// cleanup every hour
|
||||
.add(Job::new_async("0 0 */1 * * *", move |_uuid, _l| {
|
||||
info!("Running nginx_service old config cleanup job");
|
||||
let nginx_service_clone = nginx_service_clone.clone();
|
||||
let job = Box::pin(async move {
|
||||
nginx_service_clone.cleanup_old_configs().await;
|
||||
});
|
||||
info!("NginxService old config cleanup job completed");
|
||||
job
|
||||
})?)
|
||||
.await?;
|
||||
|
||||
Ok(nginx_service)
|
||||
}
|
||||
|
||||
pub async fn validate_and_reload(
|
||||
&self,
|
||||
config_name: &str,
|
||||
timestamp: u64,
|
||||
) -> Result<(i32, String), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let cur = self.last_applied.load(Ordering::SeqCst);
|
||||
if cur > timestamp {
|
||||
return Err("Another operation is in progress with higher timestamp value".into());
|
||||
}
|
||||
|
||||
// acquire write lock to update nginx_lock
|
||||
let _nginx_guard = self.nginx_lock.lock().await;
|
||||
// acquire write lock for this config+timestamp
|
||||
let rw_lock = self.acquire_file_write_lock(config_name, timestamp).await;
|
||||
let _guard = rw_lock.write().await;
|
||||
|
||||
match self
|
||||
.reload_cmd
|
||||
.validate_and_reload(config_name, timestamp, self.validate_cmd.clone())
|
||||
.await
|
||||
{
|
||||
Ok((code, output)) => {
|
||||
// update last_applied
|
||||
self.last_applied.store(timestamp, Ordering::SeqCst);
|
||||
Ok((code, output))
|
||||
}
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn write_config(
|
||||
&self,
|
||||
config_name: &str,
|
||||
timestamp: u64,
|
||||
content: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error + Send + Sync>> {
|
||||
let rw_lock = self.acquire_file_write_lock(config_name, timestamp).await;
|
||||
let _guard = rw_lock.write().await;
|
||||
// call the write_config command
|
||||
self.write_config_cmd
|
||||
.write_config(config_name, timestamp, content)
|
||||
.await
|
||||
}
|
||||
|
||||
pub async fn validate(
|
||||
&self,
|
||||
config_name: &str,
|
||||
timestamp: u64,
|
||||
) -> Result<(i32, String), Box<dyn std::error::Error + Send + Sync>> {
|
||||
self.validate_cmd.validate(config_name, timestamp).await
|
||||
}
|
||||
|
||||
async fn cleanup_unused_lock(&self) {
|
||||
let mut _write_lock = self.write_config_lock.write().await;
|
||||
(*_write_lock).retain(|_, lock| {
|
||||
// retain only locks that are currently held (readers or writers)
|
||||
lock.try_write().is_err()
|
||||
});
|
||||
}
|
||||
|
||||
async fn cleanup_old_configs(&self) {
|
||||
// list all files within nginx_config_dir/YANPM that is older than now - OLD_CONFIG_CLEANUP_THRESHOLD
|
||||
let cutoff = std::time::SystemTime::now()
|
||||
.duration_since(std::time::UNIX_EPOCH)
|
||||
.unwrap()
|
||||
.as_secs()
|
||||
- OLD_CONFIG_CLEANUP_THRESHOLD;
|
||||
|
||||
let nginx_config_dir = self.validate_cmd.nginx_config_dir();
|
||||
let yanpm_dir = nginx_config_dir.join(INTERNAL_CONFIG_FOLDER_NAME);
|
||||
|
||||
let read_dir = match tokio::fs::read_dir(&yanpm_dir).await {
|
||||
Ok(rd) => rd,
|
||||
Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
|
||||
// directory does not exist, nothing to clean up
|
||||
return;
|
||||
}
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Error reading {} config directory {}: {}",
|
||||
INTERNAL_CONFIG_FOLDER_NAME,
|
||||
yanpm_dir.display(),
|
||||
e
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
tokio::pin!(read_dir);
|
||||
while let Some(entry) = read_dir.next_entry().await.unwrap_or(None) {
|
||||
let metadata = match entry.metadata().await {
|
||||
Ok(md) => md,
|
||||
Err(e) => {
|
||||
error!(
|
||||
"Error getting metadata for file {}: {}",
|
||||
entry.path().display(),
|
||||
e
|
||||
);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Ok(modified) = metadata.modified()
|
||||
&& let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH)
|
||||
{
|
||||
let mtime_secs = duration.as_secs();
|
||||
if mtime_secs < cutoff {
|
||||
// file is older than cutoff, remove it
|
||||
if let Err(e) = tokio::fs::remove_file(entry.path()).await {
|
||||
error!(
|
||||
"Error removing old config file {}: {}",
|
||||
entry.path().display(),
|
||||
e
|
||||
);
|
||||
} else {
|
||||
info!("Removed old config file {}", entry.path().display());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async fn acquire_file_write_lock(&self, config_name: &str, timestamp: u64) -> Arc<RwLock<()>> {
|
||||
let mut write_lock = self.write_config_lock.write().await;
|
||||
write_lock
|
||||
.entry((config_name.to_string(), timestamp))
|
||||
.or_insert_with(|| Arc::new(RwLock::new(())))
|
||||
.clone()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::error::Error;
|
||||
use std::sync::Arc as StdArc;
|
||||
use tokio::time::{Duration, sleep};
|
||||
|
||||
impl NginxService {
|
||||
// Test helper that simulates a long-running reload without invoking external commands.
|
||||
pub async fn test_simulated_reload(
|
||||
&self,
|
||||
config_name: &str,
|
||||
timestamp: u64,
|
||||
delay_ms: u64,
|
||||
) -> Result<(), Box<dyn Error + Send + Sync>> {
|
||||
// pre-check
|
||||
let cur = self.last_applied.load(Ordering::SeqCst);
|
||||
if cur >= timestamp {
|
||||
return Err("stale".into());
|
||||
}
|
||||
|
||||
// acquire exclusive lock and re-check
|
||||
let _nginx_guard = self.nginx_lock.lock().await;
|
||||
let cur2 = self.last_applied.load(Ordering::SeqCst);
|
||||
if cur2 >= timestamp {
|
||||
return Err("stale".into());
|
||||
}
|
||||
|
||||
// per-file lock
|
||||
let rw_lock = self.acquire_file_write_lock(config_name, timestamp).await;
|
||||
let _guard = rw_lock.write().await;
|
||||
|
||||
// simulate operation
|
||||
sleep(Duration::from_millis(delay_ms)).await;
|
||||
|
||||
// on success update last_applied
|
||||
let mut prev = self.last_applied.load(Ordering::SeqCst);
|
||||
while prev < timestamp {
|
||||
match self.last_applied.compare_exchange(
|
||||
prev,
|
||||
timestamp,
|
||||
Ordering::SeqCst,
|
||||
Ordering::SeqCst,
|
||||
) {
|
||||
Ok(_) => break,
|
||||
Err(next) => prev = next,
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test]
|
||||
async fn concurrent_stale_is_rejected() {
|
||||
let scheduler = StdArc::new(JobScheduler::new().await.unwrap());
|
||||
let svc = NginxService::new(scheduler.clone(), std::env::temp_dir())
|
||||
.await
|
||||
.unwrap();
|
||||
|
||||
let s1 = svc.clone();
|
||||
let h1 = tokio::spawn(async move { s1.test_simulated_reload("cfg", 2, 200).await });
|
||||
|
||||
// let second start shortly after first so it will wait for the mutex
|
||||
sleep(Duration::from_millis(20)).await;
|
||||
|
||||
let s2 = svc.clone();
|
||||
let h2 = tokio::spawn(async move { s2.test_simulated_reload("cfg", 1, 10).await });
|
||||
|
||||
let r1 = h1.await.unwrap();
|
||||
assert!(r1.is_ok(), "first (newer) task should succeed");
|
||||
|
||||
let r2 = h2.await.unwrap();
|
||||
assert!(
|
||||
r2.is_err(),
|
||||
"second (older) task should be rejected as stale"
|
||||
);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user