diff --git a/codex-rs/tui/src/app.rs b/codex-rs/tui/src/app.rs index f1bfb3f006..99250cf68c 100644 --- a/codex-rs/tui/src/app.rs +++ b/codex-rs/tui/src/app.rs @@ -388,9 +388,21 @@ impl App { mode, include_paths, scope_prompt, + force_new, + } => { + self.chat_widget.start_security_review( + mode, + include_paths, + scope_prompt, + force_new, + ); + } + AppEvent::ResumeSecurityReview { + output_root, + metadata, } => { self.chat_widget - .start_security_review(mode, include_paths, scope_prompt); + .resume_security_review(output_root, metadata); } AppEvent::SecurityReviewAutoScopeConfirm { mode, diff --git a/codex-rs/tui/src/app_event.rs b/codex-rs/tui/src/app_event.rs index 24be16a800..d921b18b52 100644 --- a/codex-rs/tui/src/app_event.rs +++ b/codex-rs/tui/src/app_event.rs @@ -13,6 +13,7 @@ use tokio::sync::oneshot; use crate::bottom_pane::ApprovalRequest; use crate::history_cell::HistoryCell; use crate::security_review::SecurityReviewFailure; +use crate::security_review::SecurityReviewMetadata; use crate::security_review::SecurityReviewMode; use crate::security_review::SecurityReviewResult; @@ -114,6 +115,13 @@ pub(crate) enum AppEvent { mode: SecurityReviewMode, include_paths: Vec, scope_prompt: Option, + force_new: bool, + }, + + /// Resume a previously generated security review from disk. + ResumeSecurityReview { + output_root: PathBuf, + metadata: SecurityReviewMetadata, }, /// Prompt the user to confirm auto-detected scope selections. diff --git a/codex-rs/tui/src/bottom_pane/mod.rs b/codex-rs/tui/src/bottom_pane/mod.rs index 423c41a905..8b3f4d3997 100644 --- a/codex-rs/tui/src/bottom_pane/mod.rs +++ b/codex-rs/tui/src/bottom_pane/mod.rs @@ -72,6 +72,8 @@ pub(crate) struct BottomPane { status: Option, /// Queued user messages to show under the status indicator. queued_user_messages: Vec, + /// Recent log messages shown beneath the status header. + status_logs: Vec, context_window_percent: Option, } @@ -104,6 +106,7 @@ impl BottomPane { ctrl_c_quit_hint: false, status: None, queued_user_messages: Vec::new(), + status_logs: Vec::new(), esc_backtrack_hint: false, context_window_percent: None, } @@ -295,6 +298,7 @@ impl BottomPane { } pub(crate) fn update_status_snapshot(&mut self, snapshot: StatusSnapshot) { + self.status_logs = snapshot.logs.clone(); if let Some(status) = self.status.as_mut() { status.update_snapshot(snapshot); } else { @@ -302,6 +306,13 @@ impl BottomPane { } } + pub(crate) fn update_status_logs(&mut self, logs: Vec) { + self.status_logs = logs.clone(); + if let Some(status) = self.status.as_mut() { + status.set_logs(logs); + } + } + pub(crate) fn show_ctrl_c_quit_hint(&mut self) { self.ctrl_c_quit_hint = true; self.composer @@ -345,6 +356,7 @@ impl BottomPane { if running { if self.status.is_none() { + self.status_logs.clear(); self.status = Some(StatusIndicatorWidget::new( self.app_event_tx.clone(), self.frame_requester.clone(), @@ -352,11 +364,13 @@ impl BottomPane { } if let Some(status) = self.status.as_mut() { status.set_queued_messages(self.queued_user_messages.clone()); + status.set_logs(self.status_logs.clone()); } self.request_redraw(); } else { // Hide the status indicator when a task completes, but keep other modal views. self.hide_status_indicator(); + self.status_logs.clear(); } } diff --git a/codex-rs/tui/src/chatwidget.rs b/codex-rs/tui/src/chatwidget.rs index 5542ed0bcd..d639b2a873 100644 --- a/codex-rs/tui/src/chatwidget.rs +++ b/codex-rs/tui/src/chatwidget.rs @@ -1,5 +1,8 @@ use std::collections::HashMap; use std::collections::VecDeque; +use std::env; +use std::fs; +use std::path::Path; use std::path::PathBuf; use std::sync::Arc; use std::time::Instant; @@ -50,6 +53,7 @@ use crossterm::event::KeyCode; use crossterm::event::KeyEvent; use crossterm::event::KeyEventKind; use crossterm::event::KeyModifiers; +use dirs::home_dir; use rand::Rng; use ratatui::buffer::Buffer; use ratatui::layout::Constraint; @@ -94,10 +98,12 @@ use crate::history_cell::PlainHistoryCell; use crate::markdown::append_markdown; use crate::security_review::SECURITY_REVIEW_FOLLOW_UP_MARKER; use crate::security_review::SecurityReviewFailure; +use crate::security_review::SecurityReviewMetadata; use crate::security_review::SecurityReviewMode; use crate::security_review::SecurityReviewRequest; use crate::security_review::SecurityReviewResult; use crate::security_review::build_follow_up_user_prompt; +use crate::security_review::read_security_review_metadata; use crate::security_review::run_security_review; use crate::slash_command::SlashCommand; use crate::status::RateLimitSnapshotDisplay; @@ -112,7 +118,6 @@ use self::agent::spawn_agent_from_existing; mod session_header; use self::session_header::SessionHeader; use crate::streaming::controller::StreamController; -use std::path::Path; use chrono::Local; use codex_common::approval_presets::ApprovalPreset; @@ -338,6 +343,148 @@ fn annotate_scope_prompt(prompt: &str) -> String { ) } +fn strip_progress_prefix(text: &str) -> &str { + let trimmed = text.trim_start(); + let mut parts = trimmed.splitn(3, ' '); + let first = parts.next().unwrap_or_default(); + let second = parts.next(); + let rest = parts.next(); + + if let (Some(second), Some(rest)) = (second, rest) + && let Some(digits) = first.strip_suffix('%') + && !digits.is_empty() + && digits.chars().all(|c| c.is_ascii_digit()) + && second.chars().all(|c| c == '█' || c == '░') + { + return rest.trim_start(); + } + + text +} + +fn parse_progress_suffix(text: &str) -> Option<(u8, &str)> { + let trimmed = text.trim(); + let trimmed = trimmed.strip_suffix('.').unwrap_or(trimmed); + let idx = trimmed.rfind(" - ")?; + let tail = trimmed[idx + 3..].trim(); + let percent_str = tail.strip_suffix('%')?; + if percent_str.is_empty() || !percent_str.chars().all(|c| c.is_ascii_digit()) { + return None; + } + let percent = percent_str.parse::().ok()?.min(100) as u8; + let core = trimmed[..idx].trim_end(); + Some((percent, core)) +} + +fn build_percent_prefix(percent: u8) -> String { + let pct = percent.min(100); + let width = 10usize; + let filled = usize::from(pct) * width / 100; + let mut bar = String::with_capacity(width); + if filled > 0 { + bar.push_str(&"█".repeat(filled)); + } + if width > filled { + bar.push_str(&"░".repeat(width - filled)); + } + format!("{pct}% {bar} ") +} + +#[derive(Clone)] +struct SecurityReviewResumeCandidate { + folder_name: String, + output_root: PathBuf, + metadata: SecurityReviewMetadata, +} + +fn sanitize_repo_slug(repo_path: &Path) -> String { + let raw = repo_path + .file_name() + .and_then(|name| name.to_str()) + .map(std::string::ToString::to_string) + .unwrap_or_else(|| repo_path.to_string_lossy().into_owned()); + let mut slug = String::with_capacity(raw.len()); + for ch in raw.chars() { + let mapped = if ch.is_ascii_alphanumeric() { + ch.to_ascii_lowercase() + } else if matches!(ch, '-' | '_') { + '-' + } else { + '-' + }; + if mapped == '-' { + if !slug.ends_with('-') { + slug.push(mapped); + } + } else { + slug.push(mapped); + } + } + let trimmed = slug.trim_matches('-').to_string(); + if trimmed.is_empty() { + "repository".to_string() + } else { + trimmed + } +} + +fn security_review_storage_root(repo_path: &Path) -> PathBuf { + let base = env::var_os("CODEXHOME") + .map(PathBuf::from) + .or_else(|| home_dir().map(|dir| dir.join(".codex"))) + .unwrap_or_else(|| repo_path.to_path_buf()); + base.join("appsec_review") + .join(sanitize_repo_slug(repo_path)) +} + +fn latest_security_review_candidate(storage_root: &Path) -> Option { + let entries = fs::read_dir(storage_root).ok()?; + let mut candidates: Vec<(String, SecurityReviewResumeCandidate)> = Vec::new(); + for entry in entries.flatten() { + let path = entry.path(); + if !path.is_dir() { + continue; + } + let metadata_path = path.join("metadata.json"); + let snapshot_path = path.join("context").join("bugs_snapshot.json"); + if !metadata_path.exists() || !snapshot_path.exists() { + continue; + } + let metadata = match read_security_review_metadata(&metadata_path) { + Ok(meta) => meta, + Err(_) => continue, + }; + let folder_name = entry.file_name().into_string().unwrap_or_else(|_| { + path.file_name() + .and_then(|name| name.to_str()) + .unwrap_or("latest") + .to_string() + }); + candidates.push(( + folder_name.clone(), + SecurityReviewResumeCandidate { + folder_name, + output_root: path, + metadata, + }, + )); + } + if candidates.is_empty() { + return None; + } + candidates.sort_by(|a, b| b.0.cmp(&a.0)); + let mut candidate = candidates.into_iter().next().map(|(_, c)| c)?; + if candidate.folder_name.is_empty() { + candidate.folder_name = candidate + .output_root + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("latest") + .to_string(); + } + Some(candidate) +} + struct SecurityReviewContext { mode: SecurityReviewMode, include_paths: Vec, @@ -348,6 +495,8 @@ struct SecurityReviewContext { started_at: Instant, last_log: Option, thinking_lines: Vec, + log_lines: Vec, + progress_percent: Option, } struct SecurityReviewFollowUpState { @@ -365,6 +514,10 @@ struct SecurityReviewArtifactsState { bugs_path: PathBuf, report_path: Option, report_html_path: Option, + metadata_path: PathBuf, + api_overview_path: Option, + classification_json_path: Option, + classification_table_path: Option, } impl ChatWidget { @@ -378,6 +531,181 @@ impl ChatWidget { } } + fn prompt_security_review_resume( + &mut self, + mode: SecurityReviewMode, + include_paths: Vec, + scope_prompt: Option, + candidate: SecurityReviewResumeCandidate, + ) { + let repo_path = self.config.cwd.clone(); + let display_path = display_path_for(&candidate.output_root, &repo_path); + let scope_summary = if candidate.metadata.scope_paths.is_empty() { + "entire repository".to_string() + } else { + candidate.metadata.scope_paths.join(", ") + }; + + let mut items: Vec = Vec::new(); + let resume_candidate = candidate; + items.push(SelectionItem { + name: format!("Resume latest review ({})", resume_candidate.folder_name), + description: Some(format!( + "Mode: {} • Scope: {}", + resume_candidate.metadata.mode.as_str(), + scope_summary + )), + actions: vec![Box::new(move |tx: &AppEventSender| { + tx.send(AppEvent::ResumeSecurityReview { + output_root: resume_candidate.output_root.clone(), + metadata: resume_candidate.metadata.clone(), + }); + })], + dismiss_on_select: true, + search_value: Some(display_path.clone()), + ..Default::default() + }); + + let include_paths_for_retry = include_paths; + let scope_prompt_for_retry = scope_prompt; + items.push(SelectionItem { + name: "Start a new security review".to_string(), + description: Some("Create a fresh report".to_string()), + actions: vec![Box::new(move |tx: &AppEventSender| { + tx.send(AppEvent::StartSecurityReview { + mode, + include_paths: include_paths_for_retry.clone(), + scope_prompt: scope_prompt_for_retry.clone(), + force_new: true, + }); + })], + dismiss_on_select: true, + ..Default::default() + }); + + self.bottom_pane.show_selection_view(SelectionViewParams { + title: Some("Existing security review found".to_string()), + subtitle: Some(format!("Latest output at {display_path}")), + footer_hint: Some(standard_popup_hint_line()), + items, + ..Default::default() + }); + } + + pub(crate) fn resume_security_review( + &mut self, + output_root: PathBuf, + metadata: SecurityReviewMetadata, + ) { + if self.bottom_pane.is_task_running() || self.security_review_context.is_some() { + self.add_error_message( + "Cannot resume while a security review is running. Wait for it to finish first." + .to_string(), + ); + return; + } + + let repo_path = self.config.cwd.clone(); + if !output_root.exists() { + self.add_error_message(format!( + "Security review output path {} no longer exists.", + output_root.display() + )); + return; + } + + let context_dir = output_root.join("context"); + let bugs_path = context_dir.join("bugs.md"); + let snapshot_path = context_dir.join("bugs_snapshot.json"); + let metadata_path = output_root.join("metadata.json"); + + if !bugs_path.exists() { + self.add_error_message(format!( + "Cannot resume; expected bugs markdown missing at {}.", + bugs_path.display() + )); + return; + } + if !snapshot_path.exists() { + self.add_error_message(format!( + "Cannot resume; expected snapshot missing at {}.", + snapshot_path.display() + )); + return; + } + if !metadata_path.exists() { + self.add_error_message(format!( + "Cannot resume; expected metadata missing at {}.", + metadata_path.display() + )); + return; + } + + let report_path = output_root.join("report.md"); + let report_html_path = output_root.join("report.html"); + let report_path = report_path.exists().then_some(report_path); + let report_html_path = report_html_path.exists().then_some(report_html_path); + let api_candidate = context_dir.join("apis.md"); + let api_overview_path = api_candidate.exists().then_some(api_candidate); + let classification_json_candidate = context_dir.join("classification.jsonl"); + let classification_json_path = classification_json_candidate + .exists() + .then_some(classification_json_candidate); + let classification_table_candidate = context_dir.join("classification.md"); + let classification_table_path = classification_table_candidate + .exists() + .then_some(classification_table_candidate); + + self.clear_security_review_follow_up(); + self.security_review_task = None; + self.bottom_pane.set_task_running(false); + self.security_review_artifacts = Some(SecurityReviewArtifactsState { + repo_root: repo_path.clone(), + snapshot_path, + bugs_path: bugs_path.clone(), + report_path: report_path.clone(), + report_html_path: report_html_path.clone(), + metadata_path, + api_overview_path, + classification_json_path, + classification_table_path, + }); + + let follow_up_path = match metadata.mode { + SecurityReviewMode::Full => report_path + .clone() + .or(report_html_path.clone()) + .unwrap_or(bugs_path), + SecurityReviewMode::Bugs => bugs_path, + }; + let has_report = report_path.is_some() || report_html_path.is_some(); + let follow_up_label = if metadata.mode == SecurityReviewMode::Full && has_report { + "Report".to_string() + } else { + "Bugs".to_string() + }; + + self.security_review_follow_up = Some(SecurityReviewFollowUpState { + repo_root: repo_path.clone(), + scope_paths: metadata.scope_paths.clone(), + mode: metadata.mode, + follow_up_path: follow_up_path.clone(), + follow_up_label, + }); + self.bottom_pane + .set_placeholder_text("Ask a security review follow-up question".to_string()); + + let follow_up_display = display_path_for(&follow_up_path, &repo_path); + self.add_info_message( + format!( + "Resumed security review (mode: {}) — follow-up context loaded from {}.", + metadata.mode.as_str(), + follow_up_display + ), + None, + ); + } + fn flush_answer_stream_with_separator(&mut self) { if let Some(mut controller) = self.stream_controller.take() && let Some(cell) = controller.finalize() @@ -424,6 +752,7 @@ impl ChatWidget { progress: self.status_progress, thinking: self.status_thinking_lines.clone(), tool_calls, + logs: Vec::new(), }; self.bottom_pane.update_status_snapshot(snapshot); } @@ -2280,6 +2609,7 @@ impl ChatWidget { mode: SecurityReviewMode::Full, include_paths: Vec::new(), scope_prompt: None, + force_new: false, }); })], dismiss_on_select: true, @@ -2294,6 +2624,7 @@ impl ChatWidget { mode: SecurityReviewMode::Bugs, include_paths: Vec::new(), scope_prompt: None, + force_new: false, }); })], dismiss_on_select: true, @@ -2336,6 +2667,7 @@ impl ChatWidget { mode: SecurityReviewMode, include_paths: Vec, scope_prompt: Option, + force_new: bool, ) { if self.bottom_pane.is_task_running() || self.security_review_context.is_some() { self.add_error_message( @@ -2345,8 +2677,6 @@ impl ChatWidget { return; } - self.clear_security_review_follow_up(); - let repo_path = self.config.cwd.clone(); if !repo_path.exists() { self.add_error_message(format!( @@ -2356,6 +2686,14 @@ impl ChatWidget { return; } + let storage_root = security_review_storage_root(&repo_path); + if !force_new && let Some(candidate) = latest_security_review_candidate(&storage_root) { + self.prompt_security_review_resume(mode, include_paths, scope_prompt, candidate); + return; + } + + self.clear_security_review_follow_up(); + let mut resolved_paths: Vec = Vec::new(); let mut display_paths: Vec = Vec::new(); @@ -2399,8 +2737,29 @@ impl ChatWidget { let context_paths = display_paths.clone(); // Do not echo the auto-scope prompt into the scope list; keep the header concise. + if let Err(err) = fs::create_dir_all(&storage_root) { + self.add_error_message(format!( + "Failed to prepare security review output directory {}: {err}", + storage_root.display() + )); + return; + } + let timestamp = Utc::now().format("%Y%m%d-%H%M%S").to_string(); - let output_root = repo_path.join("appsec_review").join(timestamp); + let mut output_root = storage_root.join(×tamp); + let mut collision_counter: usize = 1; + while output_root.exists() { + let candidate = format!("{timestamp}-{collision_counter:02}"); + output_root = storage_root.join(candidate); + collision_counter = collision_counter.saturating_add(1); + } + if let Err(err) = fs::create_dir_all(&output_root) { + self.add_error_message(format!( + "Failed to create security review output directory {}: {err}", + output_root.display() + )); + return; + } self.bottom_pane.set_task_running(true); self.bottom_pane @@ -2427,7 +2786,7 @@ impl ChatWidget { self.security_review_context = Some(SecurityReviewContext { mode, - include_paths: context_paths, + include_paths: context_paths.clone(), output_root: output_root.clone(), repo_path: repo_path.clone(), model: self.config.model.clone(), @@ -2435,6 +2794,8 @@ impl ChatWidget { started_at: Instant::now(), last_log: None, thinking_lines: Vec::new(), + log_lines: Vec::new(), + progress_percent: Some(0), }); let annotated_scope_prompt = if matches!(mode, SecurityReviewMode::Bugs) { @@ -2448,6 +2809,7 @@ impl ChatWidget { let request = SecurityReviewRequest { repo_path, include_paths: resolved_paths, + scope_display_paths: context_paths, output_root, mode, include_spec_in_bug_analysis: true, @@ -2527,6 +2889,7 @@ impl ChatWidget { mode, include_paths, scope_prompt: scope_prompt_override, + force_new: false, }); }), ); @@ -2556,36 +2919,40 @@ impl ChatWidget { if message.starts_with("Still waiting for bug analysis response from model") { return; } + let previous_percent = ctx.progress_percent; // Extract trailing percent in the form " - NN%" and move it to the front. // Enhance with a small 10-slot progress bar. let mut percent_prefix = String::new(); let mut core = message.as_str(); - if let Some(idx) = message.rfind(" - ") { - let tail = &message[idx + 3..]; - if let Some(no_dot) = tail - .strip_suffix('.') - .or_else(|| Some(tail).filter(|t| !t.is_empty())) - && let Some(num_str) = no_dot.strip_suffix('%').filter(|s| !s.is_empty()) - && num_str.chars().all(|c| c.is_ascii_digit()) - { - let pct: usize = num_str.parse::().unwrap_or(0).min(100); - let width = 10usize; - let filled = (pct * width) / 100; - let mut bar = String::new(); - if filled > 0 { - bar.push_str(&"█".repeat(filled)); + let progress_changed; + if let Some((percent, trimmed_core)) = parse_progress_suffix(message.as_str()) { + ctx.progress_percent = Some(percent); + percent_prefix = build_percent_prefix(percent); + core = trimmed_core; + progress_changed = previous_percent != Some(percent); + } else { + ctx.progress_percent = None; + progress_changed = previous_percent.is_some(); + } + + let mut added_to_log = false; + if !message.starts_with("Model reasoning:") { + let trimmed_message = strip_progress_prefix(message.as_str()).trim(); + let is_explicit_progress = trimmed_message.starts_with("File triage progress:") + || trimmed_message.starts_with("Bug analysis progress:"); + if !trimmed_message.is_empty() && !is_explicit_progress { + ctx.log_lines.push(truncate_text(trimmed_message, 160)); + if ctx.log_lines.len() > 5 { + let excess = ctx.log_lines.len() - 5; + ctx.log_lines.drain(0..excess); } - if width > filled { - bar.push_str(&"░".repeat(width - filled)); - } - percent_prefix = format!("{pct}% {bar} "); - core = &message[..idx]; + added_to_log = true; } } ctx.last_log = Some(message.clone()); // Compact known progress messages: show counts succinctly. - let mut display_core = core.trim(); + let mut display_core = strip_progress_prefix(core).trim(); if let Some(rest) = display_core .strip_prefix("File triage progress:") .or_else(|| display_core.strip_prefix("Bug analysis progress:")) @@ -2625,10 +2992,14 @@ impl ChatWidget { progress: None, thinking: ctx.thinking_lines.clone(), tool_calls: Vec::new(), + logs: ctx.log_lines.clone(), }, ); } else { - self.bottom_pane.update_status_header(header); + self.bottom_pane.update_status_logs(ctx.log_lines.clone()); + if !added_to_log || !percent_prefix.is_empty() || progress_changed { + self.bottom_pane.update_status_header(header); + } } } } @@ -2647,16 +3018,30 @@ impl ChatWidget { SecurityReviewCommandState::NoMatches => "no matches", SecurityReviewCommandState::Error => "error", }; - let mut text = summary; + let mut header_text = summary.clone(); if let Some(first) = preview.first().map(|s| s.trim()).filter(|s| !s.is_empty()) { - text = format!("{text} — {first}"); + header_text = format!("{header_text} — {first}"); } - let truncated = truncate_text(&text, 96); - self.bottom_pane.update_status_header(format!( - "Security review ({}) — [{state_label}] {truncated}", - ctx.mode.as_str() - )); - ctx.last_log = Some(text); + let truncated = truncate_text(&header_text, 96); + // Also surface a sub-section under the status with the tool call and a few preview lines. + let mut tool_calls: Vec = Vec::new(); + tool_calls.push(format!("• {summary}")); + for line in &preview { + tool_calls.push(format!(" {line}")); + } + self.bottom_pane.update_status_snapshot( + crate::status_indicator_widget::StatusSnapshot { + header: format!( + "Security review ({}) — [{state_label}] {truncated}", + ctx.mode.as_str() + ), + progress: None, + thinking: ctx.thinking_lines.clone(), + tool_calls, + logs: ctx.log_lines.clone(), + }, + ); + ctx.last_log = Some(header_text); } } @@ -2666,6 +3051,20 @@ impl ChatWidget { .update_status_header(String::from("Working")); self.security_review_task = None; + // Merge security review token usage into the session total so the + // CLI exit summary reflects it, matching regular Codex sessions. + if !result.token_usage.is_zero() { + if let Some(info) = self.token_info.as_mut() { + info.append_last_usage(&result.token_usage); + } else { + self.token_info = codex_core::protocol::TokenUsageInfo::new_or_append( + &None, + &Some(result.token_usage.clone()), + self.config.model_context_window, + ); + } + } + let context = self.security_review_context.take(); let (mode, scope_paths, _output_root, repo_path, model, provider, started_at, last_log) = if let Some(ctx) = context { @@ -2727,6 +3126,13 @@ impl ChatWidget { ] .into(), ); + if !result.token_usage.is_zero() { + let usage_line = format!( + "{}", + codex_core::protocol::FinalOutput::from(result.token_usage.clone()) + ); + summary_lines.push(vec![" • ".into(), usage_line.into()].into()); + } summary_lines.push( vec![ " • ".into(), @@ -2753,6 +3159,45 @@ impl ChatWidget { ); } summary_lines.extend(artifact_lines); + if let Some(api_path) = result + .api_overview_path + .as_ref() + .map(|path| display_path_for(path, &repo_path)) + { + summary_lines.push( + vec![ + " • ".into(), + format!("API entry points: {}", api_path.as_str()).into(), + ] + .into(), + ); + } + if let Some(class_json) = result + .classification_json_path + .as_ref() + .map(|path| display_path_for(path, &repo_path)) + { + summary_lines.push( + vec![ + " • ".into(), + format!("Data classification (JSONL): {}", class_json.as_str()).into(), + ] + .into(), + ); + } + if let Some(class_table) = result + .classification_table_path + .as_ref() + .map(|path| display_path_for(path, &repo_path)) + { + summary_lines.push( + vec![ + " • ".into(), + format!("Data classification (markdown): {}", class_table.as_str()).into(), + ] + .into(), + ); + } if let Some(last_log) = last_log { summary_lines .push(vec![" • ".into(), format!("Last update: {last_log}").dim()].into()); @@ -2783,6 +3228,10 @@ impl ChatWidget { bugs_path: result.bugs_path.clone(), report_path: result.report_path.clone(), report_html_path: result.report_html_path.clone(), + metadata_path: result.metadata_path.clone(), + api_overview_path: result.api_overview_path.clone(), + classification_json_path: result.classification_json_path.clone(), + classification_table_path: result.classification_table_path.clone(), }); let follow_up_path = match mode { diff --git a/codex-rs/tui/src/security_prompts.rs b/codex-rs/tui/src/security_prompts.rs index 945d13d76a..b1313584de 100644 --- a/codex-rs/tui/src/security_prompts.rs +++ b/codex-rs/tui/src/security_prompts.rs @@ -20,6 +20,7 @@ You are assisting with an application security review. Identify the minimal set {conversation} # Available tools +- SEARCH: respond with `SEARCH: literal:` or `SEARCH: regex:` to run ripgrep over the repository root (returns colored matches with line numbers). - GREP_FILES: respond with `GREP_FILES: {"pattern":"needle","include":"*.rs","path":"subdir","limit":200}` to list files whose contents match. Fields: - pattern: regex string (required) - include: optional glob filter (ripgrep --glob) @@ -34,6 +35,7 @@ Issue at most one tool command per message and wait for the tool output before c - Return directories (not files). Use the highest level that contains the relevant implementation; avoid returning both a parent and its child. - Skip tests, docs, vendored dependencies, caches, build artefacts, editor configuration, or directories that do not exist. - Limit to the most relevant 3–8 directories when possible. +- Before including a directory, confirm it clearly relates to {user_query}; use SEARCH or READ to look for matching terminology (README, module names, config files) when uncertain. # Output format Return JSON Lines: each line must be a single JSON object with keys {"path", "include", "reason"}. Omit fences and additional commentary. If unsure, set include=false and explain in reason. Output `ALL` alone on one line to include the entire repository. @@ -58,11 +60,36 @@ Output format: JSON Lines, each {{"keyword": ""}}. Do not add commentary o // Spec generation prompts pub(crate) const SPEC_SYSTEM_PROMPT: &str = "You are an application security engineer documenting how a project is built. Produce an architecture specification that focuses on components, flows, and controls. Stay within the provided code locations and keep the output in markdown."; -pub(crate) const SPEC_COMBINE_SYSTEM_PROMPT: &str = "You are consolidating multiple specification drafts into a single, cohesive project specification. Merge overlapping content, keep terminology consistent, and follow the supplied template. Preserve important security-relevant details while avoiding repetition."; +pub(crate) const SPEC_COMBINE_SYSTEM_PROMPT: &str = "You are consolidating multiple specification drafts into a single, cohesive project specification. Merge overlapping content, keep terminology consistent, and follow the supplied template. Preserve every security-relevant detail; when in doubt, include rather than summarize away content."; pub(crate) const SPEC_PROMPT_TEMPLATE: &str = "You have access to the source code inside the following locations:\n{project_locations}\n\nFocus on {target_label}.\nGenerate a security-focused project specification. Parallelize discovery when enumerating files and avoid spending time on tests, vendored dependencies, or build artefacts. Follow the template exactly and return only markdown.\n\nTemplate:\n{spec_template}\n"; +pub(crate) const CONVERT_CLASSIFICATION_TO_JSON_PROMPT_TEMPLATE: &str = r#" +Read the project specification below and extract a normalized Data Classification list. + + +{spec_markdown} + + +# Goal +Produce newline-delimited JSON (NDJSON), one object per classified data type with keys: +- data_type (string — e.g., PII, PHI, PCI, credentials, secrets, telemetry) +- sensitivity (exactly one of: high, medium, low) +- storage_location (string) +- retention (short policy or duration) +- encryption_at_rest (string; use "unknown" if not stated) +- in_transit (string; use "unknown" if not stated) +- accessed_by (string describing services/roles/users) + +# Guidance +- Prefer the specification's Data Classification section; infer from context when necessary. +- Merge duplicate data types, choosing the strictest sensitivity. +- Keep values concise and human-readable. + +# Output +Emit only NDJSON lines. Each JSON object must contain exactly the keys listed above (no arrays, extra keys, or prose). +"#; pub(crate) const MARKDOWN_OUTPUT_GUARD: &str = "\n# Output Guard (strict)\n - Output only the final markdown content requested.\n - Do not include goal, analysis, planning, chain-of-thought, or step lists.\n - Do not echo prompt sections like \"Task\", \"Steps\", \"Output\", or \"Important\".\n - Do not include any XML/angle-bracket blocks (e.g., <...> inputs) in the output.\n - Do not wrap the entire response in code fences; use code fences only for code snippets.\n - Do not include apologies, disclaimers, or references to being an AI model.\n"; pub(crate) const MARKDOWN_FIX_SYSTEM_PROMPT: &str = "You are a meticulous technical editor. Polish markdown formatting while preserving the original security analysis content. Focus on fixing numbering, bullet spacing, code fences, and diagram syntax without adding or removing information."; -pub(crate) const SPEC_COMBINE_PROMPT_TEMPLATE: &str = "You previously generated specification drafts for the following code locations:\n{project_locations}\n\nDraft content:\n{spec_drafts}\n\nTask: merge these drafts into one comprehensive specification that describes the entire project. Remove duplication, keep terminology consistent, and ensure the final document reads as a single report. Follow the template exactly and return only markdown.\n\nTemplate:\n{combined_template}\n"; +pub(crate) const SPEC_COMBINE_PROMPT_TEMPLATE: &str = "You previously generated specification drafts for the following code locations:\n{project_locations}\n\nDraft content (each draft may include an \"API Entry Points\" section summarizing externally exposed interfaces):\n{spec_drafts}\n\nTask: merge these drafts into one comprehensive specification that describes the entire project. Remove duplication, keep terminology consistent, and ensure the final document reads as a single report that preserves API coverage. Follow the template exactly and return only markdown.\n\nNon-negotiable requirements:\n- Carry forward every concrete security-relevant fact, list, table, code block, and data classification entry from the drafts unless it is an exact duplicate.\n- When multiple drafts contribute to the same template section, include the union of their paragraphs and bullet points. If details differ, keep both and attribute them with inline labels such as `(from {location_label})` rather than dropping information.\n- Preserve API entry points verbatim (including tables) and incorporate them into the appropriate section without shortening columns.\n- Keep all identifiers (component names, queue names, environment variables, secrets, external services, metric names) exactly as written; do not rename or generalize.\n- Follow the template's structure exactly: populate every section, create the requested subsections, and include the explicit `Sources:` lines and bullet styles. Do not leave the instructional text in place or drop mandatory sections.\n- Populate the \"Relevant Source Files\" section with bullet points that reference each draft's location label and any concrete file paths mentioned in the drafts.\n- Ensure the \"Data Classification\" section exists even when the drafts were sparse; aggregate and preserve every classification detail there.\n- If multiple drafts contain tabular data (APIs, components, data classification), merge rows from all drafts and maintain duplicates when the sources disagree so the consumer can reconcile manually.\n- Do not introduce new speculation or remove nuance from mitigations, caveats, or risk descriptions provided in the drafts. Err on the side of length; the final document should be at least as detailed as the most verbose draft.\n\n# Available tools\n- READ: respond with `READ: #Lstart-Lend` (range optional) to open code or draft files. Use paths relative to the repository root.\n- GREP_FILES: respond with `GREP_FILES: {\"pattern\": \"...\", \"include\": \"*.rs\", \"path\": \"subdir\", \"limit\": 200}` to list files whose contents match.\nEmit at most one tool command in a single message and wait for the tool output before continuing. Prefer READ for prose context; SEARCH is not available during this step.\n\nTemplate:\n{combined_template}\n"; pub(crate) const SPEC_DIR_FILTER_SYSTEM_PROMPT: &str = r#" You triage directories for a security review specification. Only choose directories that hold core product or security-relevant code. - Prefer application source directories (services, packages, libs). @@ -70,8 +97,45 @@ You triage directories for a security review specification. Only choose director - Limit the selection to the most critical directories (ideally 3-8). Respond with a newline-separated list containing only the directory paths chosen from the provided list. Respond with `ALL` if every directory should be included. Do not add quotes or extra commentary. "#; -pub(crate) const SPEC_MARKDOWN_TEMPLATE: &str = "# Project Specification\n- Location: {target_label}\n- Prepared by: {model_name}\n- Date: {date}\n- In-scope paths:\n```\n{project_locations}\n```\n\n## Overview\nSummarize the product or service, primary users, and the business problem it solves. Highlight the most security relevant entry points.\n\n## Architecture Summary\nDescribe the high-level system architecture, major services, data stores, and external integrations. Include a concise mermaid flowchart when it improves clarity.\n\n## Components\nList 5-8 major components. For each, note the role, responsibilities, key dependencies, and security-critical behavior.\n\n## Business Flows\nDocument up to 5 important flows (CRUD, external integrations, workflow orchestration). For each flow capture triggers, main steps, data touched, and security notes. Include a short mermaid sequence diagram if helpful.\n\n## Authentication\nExplain how principals authenticate, token lifecycles, libraries used, and how secrets are managed.\n\n## Authorization\nDescribe the authorization model, enforcement points, privileged roles, and escalation paths.\n\n## Data Classification\nIdentify sensitive data types handled by the project and where they are stored or transmitted.\n\n## Infrastructure and Deployment\nSummarize infrastructure-as-code, runtime platforms, and configuration or secret handling that affects security posture.\n"; -pub(crate) const SPEC_COMBINED_MARKDOWN_TEMPLATE: &str = "# Project Specification\n## Executive Overview\nProvide a concise overview of the system, its primary entry points, and the highest-value assets.\n\n## Architecture\nDescribe the overall architecture, including diagrams (mermaid flowchart) where they add clarity. Call out trust boundaries and external dependencies.\n\n## Components\nSummarize each major component grouped by domain (frontend, API, workers, data stores, external integrations). For each component include responsibilities, key dependencies, and notable security considerations.\n\n## Business Flows\nDocument 3-6 critical flows (CRUD, integrations, orchestrations). Explain inputs, key steps, data touched, and defensive controls. Include concise mermaid sequence diagrams when useful.\n\n## Authentication\nDocument authentication methods, token lifecycles, libraries, and secret storage.\n\n## Authorization\nDescribe the authorization model, enforcement mechanisms, privilege boundaries, and escalation paths.\n\n## Data Classification\nSummarize sensitive data handled by the system and where it resides.\n\n## Infrastructure and Deployment\nHighlight infrastructure-as-code, runtime platforms, and configuration or secret delivery mechanisms that influence security posture.\n"; +pub(crate) const SPEC_MARKDOWN_TEMPLATE: &str = "# Project Specification\n- Location: {target_label}\n- Prepared by: {model_name}\n- Date: {date}\n- In-scope paths:\n```\n{project_locations}\n```\n\n## Overview\nSummarize the product or service, primary users, and the business problem it solves. Highlight the most security relevant entry points.\n\n## Architecture Summary\nDescribe the high-level system architecture, major services, data stores, and external integrations. Include a concise mermaid flowchart when it improves clarity.\n\n## Components\nList 5-8 major components. For each, note the role, responsibilities, key dependencies, and security-critical behavior.\n\n## Business Flows\nDocument up to 5 important flows (CRUD, external integrations, workflow orchestration). For each flow capture triggers, main steps, data touched, and security notes. Include a short mermaid sequence diagram if helpful.\n\n## Tech Stack\nCapture languages, frameworks, and infrastructure used by each major component. Tabulate runtimes, key libraries, storage technologies, and deployment targets.\n\n## Authentication\nExplain how principals authenticate, token lifecycles, libraries used, and how secrets are managed.\n\n## Authorization\nDescribe the authorization model, enforcement points, privileged roles, and escalation paths.\n\n## Data Classification\nIdentify sensitive data types handled by the project and where they are stored or transmitted.\n\n## Infrastructure and Deployment\nSummarize infrastructure-as-code, runtime platforms, and configuration or secret handling that affects security posture.\n\n## API Entry Points\nList externally reachable interfaces (HTTP/gRPC endpoints, message queues, CLIs, SDK methods) and how they handle security.\n\n### Server APIs\nProvide a markdown table with the exact columns:\n- endpoint path\n- authN method\n- authZ type\n- request parameters\n- example request (params, body, or method)\n- code location\n- parsing/validation logic\nIf the project exposes no server APIs, write `- None identified.` instead of a table.\n\n### Client APIs (optional)\nInclude a markdown table when the project ships an SDK, CLI, or other callable client surface. Columns:\n- api name (module.func or Class.method)\n- module/package\n- summary\n- parameters (omit if noisy)\n- returns (omit if noisy)\n- stability (public/official/internal)\n- code location\nIf there is no public client surface, state `- None.`\n"; +pub(crate) const SPEC_COMBINED_MARKDOWN_TEMPLATE: &str = r#"# Project Specification +Provide a 2–3 sentence executive overview summarizing the system's purpose, primary users, and the highest-value assets or flows that matter for security. + +## Relevant Source Files +List bullet points for the key files and directories covered by the drafts. Use inline code formatting for paths (for example, `src/service.rs`) and briefly note what each covers. Ensure every draft's location label appears at least once. + +## Architecture Components and Flow +Provide a concise overview of how control and data move through the system, highlighting major services, external dependencies, and trust boundaries. +Include exactly one overarching mermaid diagram here that captures the end-to-end flow (no per-component or sequence diagrams in this section). +Move any detailed or per-component diagrams to the relevant component subsections below. +End with a `Sources:` line enumerating the files or modules that support this description. + +## Core Components +Create `### ` subsections for the 4–8 major components, using sensible parent folder or service names (for example, `service-a/`, `packages/foo`, or `cometset-gateway/cometset_gateway`). Avoid file- or module-level subsections and do not title components after specific file paths. Do not create separate subsections for generic concepts like "Data Models" or individual routers/controllers; fold such details into the relevant component's bullets if truly necessary. +Within each subsection, provide bullet points covering: +- Role or responsibility +- Key dependencies and integrations +- Security-relevant behavior or controls +Place any detailed flows or sequence diagrams for that component here (not in the Architecture section) when they clarify behavior. +End every subsection with a line that starts with `Sources:` referencing the supporting directories (prefer directories over individual file paths). + +## External Interfaces +Detail HTTP/gRPC endpoints, CLI commands, message queues, or other integration points. Use markdown tables when listing multiple endpoints and note required authentication/authorization and input validation. +Include a `Sources:` line referencing the defining modules. + +## Data Classification +Summarize sensitive data types, storage locations, retention policies, and encryption/transport guarantees. Prefer markdown tables that consolidate the drafts' entries when possible. +Include a `Sources:` line showing where each data entry was documented. + +## Security Controls +Organize subsections as `### Authentication`, `### Authorization`, `### Secrets`, and `### Auditing & Observability` when applicable. For each, explain mechanisms, critical libraries, enforcement points, and failure handling. +Each subsection must end with a `Sources:` line citing the relevant files. + +## Operational Considerations +Discuss deployment topology, runtime dependencies, background jobs, scaling, resiliency patterns, and monitoring or alerting hooks. Call out infrastructure-as-code or runtime configuration that affects security posture. +Include a `Sources:` line referencing infrastructure or operational files. + +"#; // Threat model prompts pub(crate) const THREAT_MODEL_SYSTEM_PROMPT: &str = "You are a senior application security engineer preparing a threat model. Use the provided architecture specification and repository summary to enumerate realistic threats, prioritised by risk."; @@ -90,6 +154,8 @@ Evaluate the project for concrete, exploitable security vulnerabilities. Prefer Follow these rules: - Read this file in full and review the provided context to understand intended behavior before judging safety. +- Start locally: prefer `READ` to open the current file and its immediate neighbors (imports, same directory/module, referenced configs) before using `GREP_FILES`. Use `GREP_FILES` only when you need to locate unknown files across the repository. +- When you reference a function, method, or class, look up its definition and usages across files: search by the identifier, then open the definition and a few call sites to verify behavior end-to-end. - Use the search tools below to inspect additional in-scope files when tracing data flows or confirming a hypothesis; cite the relevant variables, functions, and any validation or sanitization steps you discover. - Trace attacker-controlled inputs through the call graph to the ultimate sink. Highlight any sanitization or missing validation along the way. - Ignore unit tests, example scripts, or tooling unless they ship to production in this repo. @@ -97,8 +163,9 @@ Follow these rules: - Quote code snippets and locations using GitHub-style ranges (e.g. `src/service.rs#L10-L24`). Include git blame details when you have them: ` L-L`. - Keep all output in markdown and avoid generic disclaimers. - If you need more repository context, request it explicitly while staying within the provided scope: - - Emit `GREP_FILES: {"pattern":"needle","include":"*.rs","path":"subdir","limit":200}` to list files whose contents match, ordered by modification time. Prefer searching for meaningful identifiers. - - If you need a specific file by path, prefer `READ: ` directly. + - Prefer `READ: ` to inspect specific files (start with the current file and immediate neighbors). + - Use `SEARCH: literal:` or `SEARCH: regex:` to locate definitions and call sites across files; then `READ` the most relevant results to confirm the dataflow. + - Use `GREP_FILES: {"pattern":"needle","include":"*.rs","path":"subdir","limit":200}` to discover candidate locations across the repository; prefer meaningful identifiers over generic terms. # Output format For each vulnerability, emit a markdown block: diff --git a/codex-rs/tui/src/security_report_assets/script.js b/codex-rs/tui/src/security_report_assets/script.js index 7d19a16c62..858751ba86 100644 --- a/codex-rs/tui/src/security_report_assets/script.js +++ b/codex-rs/tui/src/security_report_assets/script.js @@ -897,7 +897,24 @@ if (node.nodeType === 1) { const tag = node.tagName ? node.tagName.toLowerCase() : ''; if (/^h[1-6]$/.test(tag)) break; - parts.push(node.innerText || node.textContent || ''); + if (node.classList && node.classList.contains('ticket-box')) { + node = node.nextSibling; + continue; + } + if (tag === 'button' || tag === 'input' || tag === 'select' || tag === 'textarea') { + node = node.nextSibling; + continue; + } + if (tag === 'pre') { + const code = node.querySelector('code'); + const codeText = code ? code.textContent || '' : node.textContent || ''; + const trimmed = codeText.trimEnd(); + if (trimmed) parts.push(trimmed); + node = node.nextSibling; + continue; + } + const text = (node.innerText || node.textContent || '').trim(); + if (text) parts.push(text); } else if (node.nodeType === 3) { const t = (node.textContent || '').trim(); if (t) parts.push(t); diff --git a/codex-rs/tui/src/security_review.rs b/codex-rs/tui/src/security_review.rs index 331160abf3..53c715e99f 100644 --- a/codex-rs/tui/src/security_review.rs +++ b/codex-rs/tui/src/security_review.rs @@ -17,6 +17,7 @@ use codex_core::config::GPT_5_CODEX_MEDIUM_MODEL; use codex_core::default_retry_backoff; use codex_core::git_info::collect_git_info; use codex_core::git_info::get_git_repo_root; +use codex_core::protocol::TokenUsage; use futures::stream::FuturesUnordered; use futures::stream::StreamExt; use pathdiff::diff_paths; @@ -74,17 +75,22 @@ const MAX_FILE_ANALYSIS_ATTEMPTS: usize = 2; // Number of full passes over the triaged files during bug finding. // Not related to per-file search/tool attempts. Defaults to 3. const BUG_FINDING_PASSES: usize = 1; +const BUG_POLISH_CONCURRENCY: usize = 8; const COMMAND_PREVIEW_MAX_LINES: usize = 2; const COMMAND_PREVIEW_MAX_GRAPHEMES: usize = 96; const MODEL_REASONING_LOG_MAX_GRAPHEMES: usize = 240; const AUTO_SCOPE_MODEL: &str = "gpt-5-codex"; const SPEC_GENERATION_MODEL: &str = "gpt-5-codex"; +const THREAT_MODEL_MODEL: &str = "gpt-5-codex"; +const CLASSIFICATION_PROMPT_SPEC_LIMIT: usize = 16_000; // prompts moved to `security_prompts.rs` const BUG_RERANK_CHUNK_SIZE: usize = 1; const BUG_RERANK_MAX_CONCURRENCY: usize = 32; const BUG_RERANK_CONTEXT_MAX_CHARS: usize = 2000; const BUG_RERANK_MAX_TOOL_ROUNDS: usize = 4; const BUG_RERANK_MAX_COMMAND_ERRORS: usize = 5; +const SPEC_COMBINE_MAX_TOOL_ROUNDS: usize = 6; +const SPEC_COMBINE_MAX_COMMAND_ERRORS: usize = 5; // see BUG_RERANK_PROMPT_TEMPLATE in security_prompts const SPEC_DIR_FILTER_TARGET: usize = 8; // see SPEC_DIR_FILTER_SYSTEM_PROMPT in security_prompts @@ -92,6 +98,7 @@ const SPEC_DIR_FILTER_TARGET: usize = 8; const AUTO_SCOPE_MAX_PATHS: usize = 20; const AUTO_SCOPE_MAX_KEYWORDS: usize = 6; const AUTO_SCOPE_MAX_AGENT_STEPS: usize = 10; +const AUTO_SCOPE_INITIAL_KEYWORD_PROBES: usize = 4; const AUTO_SCOPE_DEFAULT_READ_WINDOW: usize = 120; const AUTO_SCOPE_KEYWORD_STOPWORDS: &[&str] = &[ "the", "and", "for", "with", "that", "this", "from", "into", "when", "where", "which", "while", @@ -144,7 +151,7 @@ static EXCLUDED_DIR_NAMES: [&str; 13] = [ "target", ]; -#[derive(Clone, Copy, Debug, PartialEq, Eq, Default)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Default, Serialize, Deserialize)] pub(crate) enum SecurityReviewMode { #[default] Full, @@ -173,6 +180,7 @@ impl SecurityReviewMode { pub(crate) struct SecurityReviewRequest { pub repo_path: PathBuf, pub include_paths: Vec, + pub scope_display_paths: Vec, pub output_root: PathBuf, pub mode: SecurityReviewMode, pub include_spec_in_bug_analysis: bool, @@ -195,7 +203,12 @@ pub(crate) struct SecurityReviewResult { pub report_path: Option, pub report_html_path: Option, pub snapshot_path: PathBuf, + pub metadata_path: PathBuf, + pub api_overview_path: Option, + pub classification_json_path: Option, + pub classification_table_path: Option, pub logs: Vec, + pub token_usage: TokenUsage, } #[derive(Clone, Debug)] @@ -204,6 +217,19 @@ pub(crate) struct SecurityReviewFailure { pub logs: Vec, } +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) struct SecurityReviewMetadata { + pub mode: SecurityReviewMode, + #[serde(default)] + pub scope_paths: Vec, +} + +pub(crate) fn read_security_review_metadata(path: &Path) -> Result { + let bytes = fs::read(path).map_err(|e| format!("Failed to read {}: {e}", path.display()))?; + serde_json::from_slice::(&bytes) + .map_err(|e| format!("Failed to parse {}: {e}", path.display())) +} + #[derive(Clone)] struct FileSnippet { relative_path: PathBuf, @@ -235,6 +261,13 @@ struct ReviewMetrics { read_calls: AtomicUsize, git_blame_calls: AtomicUsize, command_seq: AtomicU64, + + // Aggregated token usage across all model calls + input_tokens: std::sync::atomic::AtomicU64, + cached_input_tokens: std::sync::atomic::AtomicU64, + output_tokens: std::sync::atomic::AtomicU64, + reasoning_output_tokens: std::sync::atomic::AtomicU64, + total_tokens: std::sync::atomic::AtomicU64, } #[derive(Clone, Copy)] @@ -313,6 +346,46 @@ impl ReviewMetrics { .fetch_add(1, Ordering::Relaxed) .wrapping_add(1) } + + fn record_usage(&self, usage: &TokenUsage) { + use std::sync::atomic::Ordering::Relaxed; + self.input_tokens.fetch_add(usage.input_tokens, Relaxed); + self.cached_input_tokens + .fetch_add(usage.cached_input_tokens, Relaxed); + self.output_tokens.fetch_add(usage.output_tokens, Relaxed); + self.reasoning_output_tokens + .fetch_add(usage.reasoning_output_tokens, Relaxed); + self.total_tokens.fetch_add(usage.total_tokens, Relaxed); + } + + fn record_usage_raw( + &self, + input_tokens: u64, + cached_input_tokens: u64, + output_tokens: u64, + reasoning_output_tokens: u64, + total_tokens: u64, + ) { + let usage = TokenUsage { + input_tokens, + cached_input_tokens, + output_tokens, + reasoning_output_tokens, + total_tokens, + }; + self.record_usage(&usage); + } + + fn snapshot_usage(&self) -> TokenUsage { + use std::sync::atomic::Ordering::Relaxed; + TokenUsage { + input_tokens: self.input_tokens.load(Relaxed), + cached_input_tokens: self.cached_input_tokens.load(Relaxed), + output_tokens: self.output_tokens.load(Relaxed), + reasoning_output_tokens: self.reasoning_output_tokens.load(Relaxed), + total_tokens: self.total_tokens.load(Relaxed), + } + } } struct FileBugResult { @@ -352,18 +425,41 @@ struct FileTriageChunkResult { struct SpecEntry { location_label: String, markdown: String, + raw_path: PathBuf, + api_markdown: Option, +} + +#[derive(Clone)] +struct ApiEntry { + location_label: String, + markdown: String, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +struct DataClassificationRow { + data_type: String, + sensitivity: String, + storage_location: String, + retention: String, + encryption_at_rest: String, + in_transit: String, + accessed_by: String, } struct SpecGenerationOutcome { combined_markdown: String, locations: Vec, logs: Vec, + api_entries: Vec, + classification_rows: Vec, + classification_table: Option, } struct AutoScopeSelection { abs_path: PathBuf, display_path: String, reason: Option, + is_dir: bool, } fn truncate_auto_scope_selections( @@ -373,11 +469,68 @@ fn truncate_auto_scope_selections( if selections.len() > AUTO_SCOPE_MAX_PATHS { selections.truncate(AUTO_SCOPE_MAX_PATHS); logs.push(format!( - "Auto scope limited to the first {AUTO_SCOPE_MAX_PATHS} directories returned by the model." + "Auto scope limited to the first {AUTO_SCOPE_MAX_PATHS} paths returned by the model." )); } } +fn prune_auto_scope_parent_child_overlaps( + selections: &mut Vec, + logs: &mut Vec, +) { + if selections.len() <= 1 { + return; + } + + // Only prune parent directories when a child directory is already included. + let mut directory_indices: Vec = selections + .iter() + .enumerate() + .filter_map(|(idx, sel)| sel.is_dir.then_some(idx)) + .collect(); + if directory_indices.len() <= 1 { + return; + } + + directory_indices.sort_by(|&a, &b| { + let da = selections[a].abs_path.components().count(); + let db = selections[b].abs_path.components().count(); + db.cmp(&da) + }); + + let mut kept_dirs: Vec = Vec::new(); + let mut pruned_indices: HashSet = HashSet::new(); + + for idx in directory_indices { + let current = &selections[idx]; + if kept_dirs + .iter() + .any(|kept| kept.starts_with(current.abs_path.as_path())) + { + pruned_indices.insert(idx); + } else { + kept_dirs.push(current.abs_path.clone()); + } + } + + if pruned_indices.is_empty() { + return; + } + + let pruned = pruned_indices.len(); + let mut filtered: Vec = Vec::with_capacity(selections.len() - pruned); + for (idx, sel) in selections.drain(..).enumerate() { + if pruned_indices.contains(&idx) { + continue; + } + filtered.push(sel); + } + *selections = filtered; + logs.push(format!( + "Auto scope pruned {pruned} parent directories due to overlap." + )); +} + fn summarize_top_level(repo_root: &Path) -> String { let mut directories: Vec = Vec::new(); let mut files: Vec = Vec::new(); @@ -693,8 +846,6 @@ async fn run_grep_files( } } -// SEARCH_FILES is deprecated; map to content search earlier. - async fn execute_auto_scope_read( repo_root: &Path, command_path: &Path, @@ -870,6 +1021,10 @@ struct PersistedArtifacts { snapshot_path: PathBuf, report_path: Option, report_html_path: Option, + metadata_path: PathBuf, + api_overview_path: Option, + classification_json_path: Option, + classification_table_path: Option, } struct BugCommandPlan { @@ -996,7 +1151,7 @@ fn build_bug_records( (bugs, snapshots) } -fn render_bug_sections(snapshots: &[BugSnapshot]) -> String { +fn render_bug_sections(snapshots: &[BugSnapshot], git_link_info: Option<&GitLinkInfo>) -> String { let mut sections: Vec = Vec::new(); for snapshot in snapshots { let base = snapshot.original_markdown.trim(); @@ -1006,10 +1161,10 @@ fn render_bug_sections(snapshots: &[BugSnapshot]) -> String { let mut composed = String::new(); let anchor_snippet = format!("\n", snapshot.bug.summary_id)); - composed.push_str(base); + composed.push_str(&linkify_file_lines(base, git_link_info)); } if !matches!(snapshot.bug.validation.status, BugValidationStatus::Pending) { composed.push_str("\n\n#### Validation\n"); @@ -1055,6 +1210,134 @@ fn render_bug_sections(snapshots: &[BugSnapshot]) -> String { sections.join("\n\n") } +fn linkify_file_lines(markdown: &str, git_link_info: Option<&GitLinkInfo>) -> String { + if git_link_info.is_none() { + return markdown.to_string(); + } + let info = git_link_info.unwrap(); + let mut out_lines: Vec = Vec::new(); + for raw in markdown.lines() { + let trimmed = raw.trim_start(); + if let Some(rest) = trimmed.strip_prefix("- **File & Lines:**") { + let value = rest.trim().trim_matches('`'); + if value.is_empty() { + out_lines.push(raw.to_string()); + continue; + } + let pairs = parse_location_item(value, info); + if pairs.is_empty() { + out_lines.push(raw.to_string()); + continue; + } + // If ranges exist for a path, drop the bare link for that path + let filtered = filter_location_pairs(pairs); + let mut links: Vec = Vec::new(); + for (rel, frag) in filtered { + let mut url = format!("{}{}", info.github_prefix, rel); + let mut text = rel; + if let Some(f) = frag.as_ref() { + url.push('#'); + url.push_str(f); + text.push('#'); + text.push_str(f); + } + links.push(format!("[{text}]({url})")); + } + let rebuilt = format!("- **File & Lines:** {}", links.join(", ")); + // Preserve original indentation + let indent_len = raw.len().saturating_sub(trimmed.len()); + let indent = &raw[..indent_len]; + out_lines.push(format!("{indent}{rebuilt}")); + } else { + out_lines.push(raw.to_string()); + } + } + out_lines.join("\n") +} + +async fn polish_bug_markdowns( + client: &Client, + provider: &ModelProviderInfo, + auth: &Option, + summaries: &mut [BugSummary], + details: &mut [BugDetail], + metrics: Arc, +) -> Result, String> { + if summaries.is_empty() { + return Ok(Vec::new()); + } + + let mut detail_index: HashMap = HashMap::new(); + for (idx, detail) in details.iter().enumerate() { + detail_index.insert(detail.summary_id, idx); + } + + struct BugPolishUpdate { + id: usize, + markdown: String, + logs: Vec, + } + + let mut updates: HashMap = HashMap::new(); + let mut combined_logs: Vec = Vec::new(); + + let work_items: Vec<(usize, String)> = summaries + .iter() + .map(|summary| (summary.id, summary.markdown.clone())) + .collect(); + + let mut stream = futures::stream::iter(work_items.into_iter().map(|(bug_id, content)| { + let metrics = metrics.clone(); + async move { + if content.trim().is_empty() { + return Ok(BugPolishUpdate { + id: bug_id, + markdown: content, + logs: Vec::new(), + }); + } + let outcome = polish_markdown_block(client, provider, auth, metrics, &content, None) + .await + .map_err(|err| format!("Bug {bug_id}: {err}"))?; + let polished = fix_mermaid_blocks(&outcome.text); + let logs = outcome + .reasoning_logs + .into_iter() + .map(|line| format!("Bug {bug_id}: {line}")) + .collect(); + Ok(BugPolishUpdate { + id: bug_id, + markdown: polished, + logs, + }) + } + })) + .buffer_unordered(BUG_POLISH_CONCURRENCY); + + while let Some(result) = stream.next().await { + match result { + Ok(update) => { + combined_logs.extend(update.logs.iter().cloned()); + updates.insert(update.id, update); + } + Err(err) => return Err(err), + } + } + + drop(stream); + + for summary in summaries.iter_mut() { + if let Some(update) = updates.get(&summary.id) { + summary.markdown = update.markdown.clone(); + if let Some(&idx) = detail_index.get(&summary.id) { + details[idx].original_markdown = update.markdown.clone(); + } + } + } + + Ok(combined_logs) +} + fn snapshot_bugs(snapshot: &SecurityReviewSnapshot) -> Vec { snapshot .bugs @@ -1111,6 +1394,7 @@ pub(crate) async fn run_security_review( let metrics = Arc::new(ReviewMetrics::default()); let client = Client::new(); let overall_start = Instant::now(); + let mut record = |line: String| { if let Some(tx) = progress_sender.as_ref() { tx.send(AppEvent::SecurityReviewLog(line.clone())); @@ -1177,11 +1461,13 @@ pub(crate) async fn run_security_review( abs_path, display_path, reason, + is_dir, } = selection; + let kind = if is_dir { "directory" } else { "file" }; let message = if let Some(reason) = reason.as_ref() { - format!("Auto scope included {display_path} — {reason}") + format!("Auto scope included {kind} {display_path} — {reason}") } else { - format!("Auto scope included {display_path}") + format!("Auto scope included {kind} {display_path}") }; record(message); resolved_paths.push(abs_path); @@ -1456,7 +1742,7 @@ pub(crate) async fn run_security_review( &client, &request.provider, &request.auth, - &request.model, + THREAT_MODEL_MODEL, &repository_summary, &request.repo_path, spec, @@ -1710,6 +1996,39 @@ pub(crate) async fn run_security_review( record(msg.clone()); aggregated_logs.push(msg); } + + normalize_bug_identifiers(&mut all_summaries, &mut all_details); + } + + if !all_summaries.is_empty() { + let polish_message = format!( + "Polishing markdown for {} bug finding(s).", + all_summaries.len() + ); + record(polish_message.clone()); + aggregated_logs.push(polish_message); + let polish_logs = match polish_bug_markdowns( + &client, + &request.provider, + &request.auth, + &mut all_summaries, + &mut all_details, + metrics.clone(), + ) + .await + { + Ok(logs) => logs, + Err(err) => { + return Err(SecurityReviewFailure { + message: format!("Failed to polish bug markdown: {err}"), + logs: logs.clone(), + }); + } + }; + for line in polish_logs { + record(line.clone()); + aggregated_logs.push(line); + } } let allowed_paths: HashSet = all_summaries @@ -1759,46 +2078,6 @@ pub(crate) async fn run_security_review( bugs_markdown = format!("{table}\n\n{bugs_markdown}"); } bugs_markdown = fix_mermaid_blocks(&bugs_markdown); - if !bugs_markdown.trim().is_empty() { - record("Polishing bug markdown formatting.".to_string()); - let fix_prompt = build_fix_markdown_prompt(&bugs_markdown, None); - let polished_response = match call_model( - &client, - &request.provider, - &request.auth, - MARKDOWN_FIX_MODEL, - MARKDOWN_FIX_SYSTEM_PROMPT, - &fix_prompt, - metrics.clone(), - 0.0, - ) - .await - { - Ok(output) => { - if let Some(reasoning) = output.reasoning.as_ref() { - for line in reasoning - .lines() - .map(str::trim) - .filter(|line| !line.is_empty()) - { - let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); - let msg = format!("Model reasoning: {truncated}"); - record(msg.clone()); - } - } - output.text - } - Err(err) => { - let message = format!("Failed to polish bug markdown: {err}"); - record(message.clone()); - return Err(SecurityReviewFailure { - message, - logs: logs.clone(), - }); - } - }; - bugs_markdown = fix_mermaid_blocks(&polished_response); - } let mut report_sections_prefix: Vec = Vec::new(); if matches!(request.mode, SecurityReviewMode::Full) { @@ -1822,7 +2101,7 @@ pub(crate) async fn run_security_review( } else { Some(format!("# Security Findings\n\n{}", bugs_markdown.trim())) }; - let mut report_markdown = match request.mode { + let report_markdown = match request.mode { SecurityReviewMode::Full => { let mut sections = report_sections_prefix.clone(); if let Some(section) = findings_section.clone() { @@ -1850,47 +2129,6 @@ pub(crate) async fn run_security_review( } }; - if let Some(current) = report_markdown.clone() { - record("Polishing final report markdown formatting.".to_string()); - let fix_prompt = build_fix_markdown_prompt(¤t, None); - let polished_response = match call_model( - &client, - &request.provider, - &request.auth, - MARKDOWN_FIX_MODEL, - MARKDOWN_FIX_SYSTEM_PROMPT, - &fix_prompt, - metrics.clone(), - 0.0, - ) - .await - { - Ok(output) => { - if let Some(reasoning) = output.reasoning.as_ref() { - for line in reasoning - .lines() - .map(str::trim) - .filter(|line| !line.is_empty()) - { - let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); - let msg = format!("Model reasoning: {truncated}"); - record(msg.clone()); - } - } - output.text - } - Err(err) => { - let message = format!("Failed to polish final security report: {err}"); - record(message.clone()); - return Err(SecurityReviewFailure { - message, - logs: logs.clone(), - }); - } - }; - report_markdown = Some(fix_mermaid_blocks(&polished_response)); - } - let snapshot = SecurityReviewSnapshot { generated_at: OffsetDateTime::now_utc(), findings_summary: findings_summary.clone(), @@ -1899,10 +2137,29 @@ pub(crate) async fn run_security_review( }; // Intentionally avoid logging the output path pre-write to keep logs concise. + let metadata = SecurityReviewMetadata { + mode: request.mode, + scope_paths: request.scope_display_paths.clone(), + }; + let api_entries_for_persist = spec_generation + .as_ref() + .map(|spec| spec.api_entries.clone()) + .unwrap_or_default(); + let classification_rows_for_persist = spec_generation + .as_ref() + .map(|spec| spec.classification_rows.clone()) + .unwrap_or_default(); + let classification_table_for_persist = spec_generation + .as_ref() + .and_then(|spec| spec.classification_table.clone()); let artifacts = match persist_artifacts( &request.output_root, &request.repo_path, + &metadata, &bugs_markdown, + &api_entries_for_persist, + &classification_rows_for_persist, + classification_table_for_persist.as_deref(), report_markdown.as_deref(), &snapshot, ) @@ -1948,7 +2205,12 @@ pub(crate) async fn run_security_review( report_path: artifacts.report_path, report_html_path: artifacts.report_html_path, snapshot_path: artifacts.snapshot_path, + metadata_path: artifacts.metadata_path, + api_overview_path: artifacts.api_overview_path, + classification_json_path: artifacts.classification_json_path, + classification_table_path: artifacts.classification_table_path, logs, + token_usage: metrics.snapshot_usage(), }) } @@ -2128,6 +2390,22 @@ async fn triage_files_for_bug_analysis( let total_chunks = total.div_ceil(FILE_TRIAGE_CHUNK_SIZE.max(1)); let concurrency = FILE_TRIAGE_CONCURRENCY.min(total_chunks.max(1)); + // Emit a brief, on-screen preview of the parallel task and sample tool calls. + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(format!( + " └ Launching parallel file triage ({concurrency} workers)" + ))); + tx.send(AppEvent::SecurityReviewLog( + " Sample tool calls (simulated):".to_string(), + )); + tx.send(AppEvent::SecurityReviewLog( + " └ SEARCH literal:'auth'".to_string(), + )); + tx.send(AppEvent::SecurityReviewLog( + " GREP_FILES: {\"pattern\": \"token|password\", \"include\": \"*.rs\"}".to_string(), + )); + } + for _ in 0..concurrency { if let Some(request) = remaining.next() { in_flight.push(triage_chunk( @@ -2434,6 +2712,11 @@ async fn generate_specs( { Ok(result) => result, Err(err) => { + if let Some(tx) = progress_sender.as_ref() { + for line in &err.logs { + tx.send(AppEvent::SecurityReviewLog(line.clone())); + } + } logs.extend(err.logs); let message = format!( "Directory filter failed; using all directories. {}", @@ -2475,6 +2758,7 @@ async fn generate_specs( let specs_root = output_root.join("specs"); let raw_dir = specs_root.join("raw"); let combined_dir = specs_root.join("combined"); + let apis_dir = specs_root.join("apis"); tokio_fs::create_dir_all(&raw_dir) .await @@ -2488,6 +2772,12 @@ async fn generate_specs( message: format!("Failed to create {}: {e}", combined_dir.display()), logs: Vec::new(), })?; + tokio_fs::create_dir_all(&apis_dir) + .await + .map_err(|e| SecurityReviewFailure { + message: format!("Failed to create {}: {e}", apis_dir.display()), + logs: Vec::new(), + })?; let mut in_flight: FuturesUnordered<_> = FuturesUnordered::new(); for path in normalized { @@ -2527,24 +2817,132 @@ async fn generate_specs( return Ok(None); } + let mut api_entries: Vec = Vec::new(); + for entry in spec_entries.iter_mut() { + let location_label = entry.location_label.clone(); + if let Some(markdown) = entry + .api_markdown + .as_ref() + .map(|s| s.trim()) + .filter(|s| !s.is_empty()) + .map(str::to_string) + { + let slug = slugify_label(&location_label); + let api_path = apis_dir.join(format!("{slug}_apis.md")); + match tokio_fs::write(&api_path, markdown.as_bytes()).await { + Ok(()) => { + let msg = format!( + "API entry points for {location_label} saved to {}.", + api_path.display() + ); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + api_entries.push(ApiEntry { + location_label, + markdown, + }); + } + Err(err) => { + let msg = format!( + "Failed to write API entry points for {location_label} to {}: {err}", + api_path.display() + ); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + } + } + } else { + let msg = + format!("Specification for {location_label} did not include API entry points."); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + } + } + let combined_path = combined_dir.join("combined_specification.md"); - let (combined_markdown, mut combine_logs) = combine_spec_markdown( + let (mut combined_markdown, mut combine_logs) = combine_spec_markdown( client, provider, auth, &display_locations, &spec_entries, &combined_path, + repo_root, progress_sender.clone(), - metrics, + metrics.clone(), ) .await?; logs.append(&mut combine_logs); + let mut classification_rows: Vec = Vec::new(); + let mut classification_table: Option = None; + match extract_data_classification(client, provider, auth, &combined_markdown, metrics.clone()) + .await + { + Ok(Some(extraction)) => { + for line in extraction.reasoning_logs { + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(line.clone())); + } + logs.push(line); + } + classification_rows = extraction.rows.clone(); + classification_table = Some(extraction.table_markdown.clone()); + let injected = + inject_data_classification_section(&combined_markdown, &extraction.table_markdown); + combined_markdown = fix_mermaid_blocks(&injected); + let msg = format!( + "Injected data classification table with {} entr{} into combined specification.", + extraction.rows.len(), + if extraction.rows.len() == 1 { + "y" + } else { + "ies" + } + ); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + if let Err(err) = tokio_fs::write(&combined_path, combined_markdown.as_bytes()).await { + let warn = format!( + "Failed to update combined specification with data classification table: {err}" + ); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(warn.clone())); + } + logs.push(warn); + } + } + Ok(None) => { + let msg = "Data classification extraction produced no entries.".to_string(); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + } + Err(err) => { + let msg = format!("Failed to extract data classification: {err}"); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + } + } + Ok(Some(SpecGenerationOutcome { combined_markdown, locations: display_locations, logs, + api_entries, + classification_rows, + classification_table, })) } @@ -2952,10 +3350,91 @@ async fn auto_detect_scope( } } - let repo_overview = summarize_top_level(repo_root); let mut conversation: Vec = Vec::new(); let mut tool_rounds = 0usize; + if !keywords.is_empty() { + let mut initial_search_count = 0usize; + for keyword in keywords.iter().take(AUTO_SCOPE_INITIAL_KEYWORD_PROBES) { + let trimmed = keyword.trim(); + if trimmed.is_empty() { + continue; + } + let term = trimmed.to_string(); + initial_search_count += 1; + logs.push(format!( + "Auto scope executing initial SEARCH literal:{term}." + )); + conversation.push(format!("Assistant:\nSEARCH: literal:{term}")); + let (log_line, output) = + execute_auto_scope_search_content(repo_root, &term, SearchMode::Literal, &metrics) + .await; + logs.push(log_line); + conversation.push(format!("Tool SEARCH `{term}`:\n{output}")); + let preview = command_preview_snippets(&output); + if !preview.is_empty() { + logs.push(format!( + "Auto scope SEARCH literal:{term} preview:\n{}", + preview.join("\n") + )); + } + + let pattern_str = term.clone(); + let grep_limit = Some(200usize); + let grep_args = GrepFilesArgs { + pattern: pattern_str.clone(), + include: None, + path: None, + limit: grep_limit, + }; + let mut shown = serde_json::json!({ "pattern": pattern_str }); + if let Some(limit) = grep_limit { + shown["limit"] = serde_json::Value::Number(serde_json::Number::from(limit as u64)); + } + let shown_text = shown.to_string(); + logs.push(format!( + "Auto scope executing initial GREP_FILES {shown_text}." + )); + conversation.push(format!("Assistant:\nGREP_FILES: {shown_text}")); + let (grep_log, grep_output) = + match run_grep_files(repo_root, &grep_args, &metrics).await { + SearchResult::Matches(out) => ( + format!("Auto scope file search `{term}` returned matching files."), + out, + ), + SearchResult::NoMatches => ( + format!("Auto scope file search `{term}` returned no matches."), + "No matches found.".to_string(), + ), + SearchResult::Error(err) => ( + format!("Auto scope file search `{term}` failed: {err}"), + format!("Search error: {err}"), + ), + }; + logs.push(grep_log); + conversation.push(format!("Tool GREP_FILES {shown_text}:\n{grep_output}")); + let preview = command_preview_snippets(&grep_output); + if !preview.is_empty() { + logs.push(format!( + "Auto scope GREP_FILES {shown_text} preview:\n{}", + preview.join("\n") + )); + } + } + if initial_search_count > 0 { + let label = if initial_search_count == 1 { + "search" + } else { + "searches" + }; + logs.push(format!( + "Auto scope seeded the agent loop with {initial_search_count} initial keyword {label} (content + file matches)." + )); + } + } + + let repo_overview = summarize_top_level(repo_root); + loop { if tool_rounds >= AUTO_SCOPE_MAX_AGENT_STEPS { return Err(SecurityReviewFailure { @@ -3105,6 +3584,7 @@ async fn auto_detect_scope( display_path: display_path_for(&canonical, repo_root), abs_path: canonical, reason: Some("LLM requested full repository".to_string()), + is_dir: true, }], logs, )); @@ -3133,9 +3613,17 @@ async fn auto_detect_scope( Ok(path) => path, Err(_) => continue, }; - if !canonical.starts_with(repo_root) || !canonical.is_dir() { + if !canonical.starts_with(repo_root) { continue; } + let metadata = match fs::metadata(&canonical) { + Ok(metadata) => metadata, + Err(_) => continue, + }; + if !(metadata.is_dir() || metadata.is_file()) { + continue; + } + let is_dir = metadata.is_dir(); if !seen.insert(canonical.clone()) { continue; } @@ -3143,6 +3631,7 @@ async fn auto_detect_scope( display_path: display_path_for(&canonical, repo_root), abs_path: canonical, reason: raw.reason, + is_dir, }); } @@ -3153,6 +3642,8 @@ async fn auto_detect_scope( }); } + // Prefer specific children over broad parents, then cap to max. + prune_auto_scope_parent_child_overlaps(&mut selections, &mut logs); truncate_auto_scope_selections(&mut selections, &mut logs); return Ok((selections, logs)); @@ -3161,6 +3652,128 @@ async fn auto_detect_scope( } } +#[cfg(test)] +mod data_classification_tests { + use super::*; + use pretty_assertions::assert_eq; + + #[test] + fn build_table_sorts_by_sensitivity_then_name() { + let rows = vec![ + DataClassificationRow { + data_type: "Session Tokens".to_string(), + sensitivity: "high".to_string(), + storage_location: "redis".to_string(), + retention: "7 days".to_string(), + encryption_at_rest: "aes-256".to_string(), + in_transit: "tls 1.3".to_string(), + accessed_by: "web app".to_string(), + }, + DataClassificationRow { + data_type: "API Keys".to_string(), + sensitivity: "high".to_string(), + storage_location: "secrets manager".to_string(), + retention: "rotate quarterly".to_string(), + encryption_at_rest: "kms".to_string(), + in_transit: "tls 1.3".to_string(), + accessed_by: "deployment pipeline".to_string(), + }, + DataClassificationRow { + data_type: "Audit Logs".to_string(), + sensitivity: "medium".to_string(), + storage_location: "s3".to_string(), + retention: "13 months".to_string(), + encryption_at_rest: "aes-256".to_string(), + in_transit: "tls".to_string(), + accessed_by: "security team".to_string(), + }, + ]; + + let table = build_data_classification_table(&rows).expect("expected table output"); + let expected = ["## Data Classification", + "", + "| Data Type | Sensitivity | Storage Location | Retention | Encryption At Rest | In Transit | Accessed By |", + "|---|---|---|---|---|---|---|", + "| API Keys | high | secrets manager | rotate quarterly | kms | tls 1.3 | deployment pipeline |", + "| Session Tokens | high | redis | 7 days | aes-256 | tls 1.3 | web app |", + "| Audit Logs | medium | s3 | 13 months | aes-256 | tls | security team |", + ""] + .join("\n"); + assert_eq!(table, expected); + + assert_eq!(build_data_classification_table(&[]), None); + } + + #[test] + fn inject_replaces_existing_section() { + let spec = "\ +# Project Specification + +## Data Classification +Legacy content to be replaced. + +## Authentication +Existing auth details. +"; + let table_markdown = "\ +## Data Classification + +| Data Type | Sensitivity | Storage Location | Retention | Encryption At Rest | In Transit | Accessed By | +|---|---|---|---|---|---|---| +| Customer PII | high | postgres | 90 days | aes-256 | tls 1.2+ | support portal | + +"; + + let updated = inject_data_classification_section(spec, table_markdown); + let expected = "\ +# Project Specification + +## Data Classification + +| Data Type | Sensitivity | Storage Location | Retention | Encryption At Rest | In Transit | Accessed By | +|---|---|---|---|---|---|---| +| Customer PII | high | postgres | 90 days | aes-256 | tls 1.2+ | support portal | + + +## Authentication +Existing auth details."; + assert_eq!(updated, expected); + } + + #[test] + fn inject_appends_section_when_missing() { + let spec = "\ +# Project Specification + +## Overview +System overview text. +"; + let table_markdown = "\ +## Data Classification + +| Data Type | Sensitivity | Storage Location | Retention | Encryption At Rest | In Transit | Accessed By | +|---|---|---|---|---|---|---| +| Billing Data | high | stripe | 7 years | provider-managed | tls 1.2+ | finance team | + +"; + let updated = inject_data_classification_section(spec, table_markdown); + let expected = "\ +# Project Specification + +## Overview +System overview text. + +## Data Classification + +| Data Type | Sensitivity | Storage Location | Retention | Encryption At Rest | In Transit | Accessed By | +|---|---|---|---|---|---|---| +| Billing Data | high | stripe | 7 years | provider-managed | tls 1.2+ | finance team | + +"; + assert_eq!(updated, expected); + } +} + #[cfg(test)] mod auto_scope_tests { use super::*; @@ -3427,7 +4040,29 @@ async fn generate_spec_for_location( logs.push(msg); } } - let sanitized = fix_mermaid_blocks(&response.text); + let mut sanitized = fix_mermaid_blocks(&response.text); + + if !sanitized.trim().is_empty() { + let polish_message = format!("Polishing specification markdown for {location_label}."); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(polish_message.clone())); + } + logs.push(polish_message); + let outcome = + polish_markdown_block(&client, &provider, &auth, metrics.clone(), &sanitized, None) + .await + .map_err(|err| SecurityReviewFailure { + message: format!("Failed to polish specification for {location_label}: {err}"), + logs: Vec::new(), + })?; + if let Some(tx) = progress_sender.as_ref() { + for line in &outcome.reasoning_logs { + tx.send(AppEvent::SecurityReviewLog(line.clone())); + } + } + logs.extend(outcome.reasoning_logs.clone()); + sanitized = fix_mermaid_blocks(&outcome.text); + } let slug = slugify_label(&location_label); let file_path = raw_dir.join(format!("{slug}.md")); @@ -3448,10 +4083,14 @@ async fn generate_spec_for_location( } logs.push(done_message); + let api_markdown = extract_api_markdown(&sanitized); + Ok(( SpecEntry { location_label, markdown: sanitized, + raw_path: file_path, + api_markdown, }, logs, )) @@ -3603,6 +4242,39 @@ async fn generate_threat_model( } } + if !sanitized_response.trim().is_empty() { + let polish_message = "Polishing threat model markdown formatting.".to_string(); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(polish_message.clone())); + } + logs.push(polish_message); + let outcome = match polish_markdown_block( + client, + provider, + auth, + metrics.clone(), + &sanitized_response, + None, + ) + .await + { + Ok(outcome) => outcome, + Err(err) => { + return Err(SecurityReviewFailure { + message: format!("Failed to polish threat model: {err}"), + logs: logs.clone(), + }); + } + }; + if let Some(tx) = progress_sender.as_ref() { + for line in &outcome.reasoning_logs { + tx.send(AppEvent::SecurityReviewLog(line.clone())); + } + } + logs.extend(outcome.reasoning_logs.clone()); + sanitized_response = fix_mermaid_blocks(&outcome.text); + } + let threat_file = threats_dir.join("threat_model.md"); tokio_fs::write(&threat_file, sanitized_response.as_bytes()) .await @@ -3636,6 +4308,7 @@ async fn combine_spec_markdown( project_locations: &[String], specs: &[SpecEntry], combined_path: &Path, + repo_root: &Path, progress_sender: Option, metrics: Arc, ) -> Result<(String, Vec), SecurityReviewFailure> { @@ -3649,44 +4322,237 @@ async fn combine_spec_markdown( } logs.push(message); - let prompt = build_combine_specs_prompt(project_locations, specs); - let response = match call_model( - client, - provider, - auth, - SPEC_GENERATION_MODEL, - SPEC_COMBINE_SYSTEM_PROMPT, - &prompt, - metrics.clone(), - 0.1, - ) - .await - { - Ok(output) => { - if let Some(reasoning) = output.reasoning.as_ref() { - for line in reasoning - .lines() - .map(str::trim) - .filter(|line| !line.is_empty()) - { - let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); - let msg = format!("Model reasoning: {truncated}"); - if let Some(tx) = progress_sender.as_ref() { - tx.send(AppEvent::SecurityReviewLog(msg.clone())); - } - logs.push(msg); - } - } - output.text - } - Err(err) => { + let base_prompt = build_combine_specs_prompt(project_locations, specs); + let mut conversation: Vec = Vec::new(); + let mut seen_search_requests: HashSet = HashSet::new(); + let mut seen_read_requests: HashSet = HashSet::new(); + let mut tool_rounds = 0usize; + let mut command_error_count = 0usize; + + let combined_raw = loop { + if tool_rounds > SPEC_COMBINE_MAX_TOOL_ROUNDS { return Err(SecurityReviewFailure { - message: format!("Failed to combine specifications: {err}"), + message: format!("Spec merge exceeded {SPEC_COMBINE_MAX_TOOL_ROUNDS} tool rounds."), logs, }); } + + let mut prompt = base_prompt.clone(); + if !conversation.is_empty() { + prompt.push_str("\n\n# Conversation history\n"); + prompt.push_str(&conversation.join("\n\n")); + } + + let response = match call_model( + client, + provider, + auth, + SPEC_GENERATION_MODEL, + SPEC_COMBINE_SYSTEM_PROMPT, + &prompt, + metrics.clone(), + 0.0, + ) + .await + { + Ok(output) => output, + Err(err) => { + return Err(SecurityReviewFailure { + message: format!("Failed to combine specifications: {err}"), + logs, + }); + } + }; + + if let Some(reasoning) = response.reasoning.as_ref() { + for line in reasoning + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + { + let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); + let msg = format!("Spec merge reasoning: {truncated}"); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(msg.clone())); + } + logs.push(msg); + } + } + + let assistant_reply = response.text.trim().to_string(); + if assistant_reply.is_empty() { + conversation.push("Assistant:".to_string()); + } else { + conversation.push(format!("Assistant:\n{assistant_reply}")); + } + + let (after_read, read_requests) = extract_read_requests(&response.text); + let (cleaned_text, search_requests) = parse_search_requests(&after_read); + + let mut executed_command = false; + + for request in read_requests { + let key = request.dedupe_key(); + if !seen_read_requests.insert(key) { + let msg = format!( + "Spec merge READ `{}` skipped (already provided).", + request.path.display() + ); + logs.push(msg.clone()); + conversation.push(format!( + "Tool READ `{}` already provided earlier.", + request.path.display() + )); + executed_command = true; + continue; + } + + executed_command = true; + match execute_auto_scope_read( + repo_root, + &request.path, + request.start, + request.end, + metrics.as_ref(), + ) + .await + { + Ok(output) => { + logs.push(format!( + "Spec merge READ `{}` returned content.", + request.path.display() + )); + conversation.push(format!( + "Tool READ `{}`:\n{}", + request.path.display(), + output + )); + } + Err(err) => { + logs.push(format!( + "Spec merge READ `{}` failed: {err}", + request.path.display() + )); + conversation.push(format!( + "Tool READ `{}` error: {err}", + request.path.display() + )); + command_error_count += 1; + } + } + } + + for request in search_requests { + let key = request.dedupe_key(); + if !seen_search_requests.insert(key) { + match &request { + ToolRequest::Content { term, mode, .. } => { + let display_term = summarize_search_term(term, 80); + let msg = format!( + "Spec merge SEARCH `{display_term}` ({}) skipped (already provided).", + mode.as_str() + ); + logs.push(msg.clone()); + conversation.push(format!( + "Tool SEARCH `{display_term}` ({}) already provided earlier.", + mode.as_str() + )); + } + ToolRequest::GrepFiles { args, .. } => { + let mut shown = serde_json::json!({ "pattern": args.pattern }); + if let Some(ref inc) = args.include { + shown["include"] = serde_json::Value::String(inc.clone()); + } + if let Some(ref path) = args.path { + shown["path"] = serde_json::Value::String(path.clone()); + } + if let Some(limit) = args.limit { + shown["limit"] = + serde_json::Value::Number(serde_json::Number::from(limit as u64)); + } + let msg = + format!("Spec merge GREP_FILES {shown} skipped (already provided)."); + logs.push(msg.clone()); + conversation + .push(format!("Tool GREP_FILES {shown} already provided earlier.")); + } + } + executed_command = true; + continue; + } + + executed_command = true; + match request { + ToolRequest::Content { term, mode, .. } => { + let display_term = summarize_search_term(&term, 80); + let msg = format!( + "Spec merge SEARCH `{display_term}` ({}) skipped; SEARCH is disabled for this step.", + mode.as_str() + ); + logs.push(msg); + conversation.push(format!( + "Tool SEARCH `{display_term}` ({}) error: SEARCH is disabled during spec merge. Use READ (and optionally GREP_FILES) to gather context.", + mode.as_str() + )); + } + ToolRequest::GrepFiles { args, .. } => { + let mut shown = serde_json::json!({ "pattern": args.pattern }); + if let Some(ref inc) = args.include { + shown["include"] = serde_json::Value::String(inc.clone()); + } + if let Some(ref path) = args.path { + shown["path"] = serde_json::Value::String(path.clone()); + } + if let Some(limit) = args.limit { + shown["limit"] = + serde_json::Value::Number(serde_json::Number::from(limit as u64)); + } + logs.push(format!("Spec merge GREP_FILES {shown} executing.")); + match run_grep_files(repo_root, &args, &metrics).await { + SearchResult::Matches(output) => { + conversation.push(format!("Tool GREP_FILES {shown}:\n{output}")); + } + SearchResult::NoMatches => { + let message = "No matches found.".to_string(); + logs.push(format!( + "Spec merge GREP_FILES {shown} returned no matches." + )); + conversation.push(format!("Tool GREP_FILES {shown}:\n{message}")); + } + SearchResult::Error(err) => { + logs.push(format!("Spec merge GREP_FILES {shown} failed: {err}")); + conversation.push(format!("Tool GREP_FILES {shown} error: {err}")); + command_error_count += 1; + } + } + } + } + } + + if command_error_count >= SPEC_COMBINE_MAX_COMMAND_ERRORS { + return Err(SecurityReviewFailure { + message: format!("Spec merge hit {SPEC_COMBINE_MAX_COMMAND_ERRORS} tool errors."), + logs, + }); + } + + if executed_command { + tool_rounds = tool_rounds.saturating_add(1); + continue; + } + + let final_text = cleaned_text.trim(); + if final_text.is_empty() { + return Err(SecurityReviewFailure { + message: "Spec merge produced an empty response.".to_string(), + logs, + }); + } + + break final_text.to_string(); }; - let sanitized = fix_mermaid_blocks(&response); + + let sanitized = fix_mermaid_blocks(&combined_raw); let polish_message = "Polishing combined specification markdown formatting.".to_string(); if let Some(tx) = progress_sender.as_ref() { @@ -3715,7 +4581,7 @@ async fn combine_spec_markdown( .filter(|line| !line.is_empty()) { let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); - let msg = format!("Model reasoning: {truncated}"); + let msg = format!("Spec merge polish reasoning: {truncated}"); if let Some(tx) = progress_sender.as_ref() { tx.send(AppEvent::SecurityReviewLog(msg.clone())); } @@ -3781,6 +4647,28 @@ fn build_spec_prompt_text( .replace("{spec_template}", &template_body) } +fn extract_api_markdown(spec_markdown: &str) -> Option { + let heading = "## API Entry Points"; + let start = spec_markdown.find(heading)?; + let after_heading = &spec_markdown[start + heading.len()..]; + let after_trimmed = after_heading.trim_start_matches(['\n', '\r']); + if after_trimmed.is_empty() { + return None; + } + let next_heading_offset = after_trimmed.find("\n## "); + let content = if let Some(idx) = next_heading_offset { + &after_trimmed[..idx] + } else { + after_trimmed + }; + let trimmed = content.trim(); + if trimmed.is_empty() { + None + } else { + Some(trimmed.to_string()) + } +} + fn build_combine_specs_prompt(project_locations: &[String], specs: &[SpecEntry]) -> String { let locations_block = if project_locations.is_empty() { "repository root".to_string() @@ -3848,6 +4736,261 @@ Some common issues to fix:\n\ prompt } +fn clamp_prompt_text(input: &str, max_chars: usize) -> String { + let mut out = String::with_capacity(input.len().min(max_chars) + 32); + let mut count = 0usize; + for ch in input.chars() { + if count >= max_chars { + out.push_str("\n… (truncated)"); + break; + } + out.push(ch); + count += 1; + } + if count < input.chars().count() && !out.ends_with('\n') { + out.push('\n'); + } + out +} + +#[derive(Clone)] +struct ClassificationExtraction { + rows: Vec, + table_markdown: String, + reasoning_logs: Vec, +} + +fn build_data_classification_prompt(spec_markdown: &str) -> String { + CONVERT_CLASSIFICATION_TO_JSON_PROMPT_TEMPLATE.replace("{spec_markdown}", spec_markdown) +} + +fn sensitivity_rank(value: &str) -> usize { + match value.trim().to_ascii_lowercase().as_str() { + "high" => 0, + "medium" => 1, + "low" => 2, + _ => 3, + } +} + +fn build_data_classification_table(rows: &[DataClassificationRow]) -> Option { + if rows.is_empty() { + return None; + } + let mut sorted = rows.to_vec(); + sorted.sort_by(|a, b| { + let rank_a = sensitivity_rank(&a.sensitivity); + let rank_b = sensitivity_rank(&b.sensitivity); + rank_a.cmp(&rank_b).then_with(|| { + a.data_type + .to_ascii_lowercase() + .cmp(&b.data_type.to_ascii_lowercase()) + }) + }); + + let mut lines: Vec = Vec::new(); + lines.push("## Data Classification".to_string()); + lines.push(String::new()); + lines.push("| Data Type | Sensitivity | Storage Location | Retention | Encryption At Rest | In Transit | Accessed By |".to_string()); + lines.push("|---|---|---|---|---|---|---|".to_string()); + for row in &sorted { + lines.push(format!( + "| {} | {} | {} | {} | {} | {} | {} |", + row.data_type, + row.sensitivity, + row.storage_location, + row.retention, + row.encryption_at_rest, + row.in_transit, + row.accessed_by + )); + } + lines.push(String::new()); + Some(lines.join("\n")) +} + +fn inject_data_classification_section(spec_markdown: &str, table_markdown: &str) -> String { + let lines: Vec<&str> = spec_markdown.lines().collect(); + let mut output: Vec = Vec::new(); + let mut i = 0usize; + let mut replaced = false; + while i < lines.len() { + let line = lines[i]; + if line + .trim() + .to_ascii_lowercase() + .starts_with("## data classification") + { + replaced = true; + output.push(table_markdown.to_string()); + i += 1; + while i < lines.len() { + let candidate = lines[i]; + if candidate.starts_with("## ") + && !candidate + .trim() + .eq_ignore_ascii_case("## Data Classification") + { + break; + } + if candidate.starts_with("# ") + && !candidate + .trim() + .eq_ignore_ascii_case("# Project Specification") + { + break; + } + i += 1; + } + continue; + } + output.push(line.to_string()); + i += 1; + } + + if !replaced { + if let Some(last) = output.last() + && !last.trim().is_empty() + { + output.push(String::new()); + } + output.push(table_markdown.to_string()); + } + + output.join("\n") +} + +async fn extract_data_classification( + client: &Client, + provider: &ModelProviderInfo, + auth: &Option, + spec_markdown: &str, + metrics: Arc, +) -> Result, String> { + let trimmed = spec_markdown.trim(); + if trimmed.is_empty() { + return Ok(None); + } + + let truncated_spec = clamp_prompt_text(trimmed, CLASSIFICATION_PROMPT_SPEC_LIMIT); + let prompt = build_data_classification_prompt(&truncated_spec); + + let output = call_model( + client, + provider, + auth, + SPEC_GENERATION_MODEL, + SPEC_SYSTEM_PROMPT, + &prompt, + metrics, + 0.0, + ) + .await?; + + let mut reasoning_logs: Vec = Vec::new(); + if let Some(reasoning) = output.reasoning.as_ref() { + for line in reasoning + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + { + let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); + reasoning_logs.push(format!("Model reasoning: {truncated}")); + } + } + + let mut rows: Vec = Vec::new(); + for raw in output.text.lines() { + let trimmed = raw.trim(); + if trimmed.is_empty() { + continue; + } + match serde_json::from_str::(trimmed) { + Ok(mut row) => { + row.sensitivity = row.sensitivity.trim().to_ascii_lowercase(); + if row.sensitivity != "high" + && row.sensitivity != "medium" + && row.sensitivity != "low" + { + row.sensitivity = "unknown".to_string(); + } + rows.push(row); + } + Err(err) => { + reasoning_logs.push(format!( + "Skipping invalid classification line: {trimmed} ({err})" + )); + } + } + } + + if rows.is_empty() { + return Ok(None); + } + + let table_markdown = match build_data_classification_table(&rows) { + Some(table) => table, + None => return Ok(None), + }; + + Ok(Some(ClassificationExtraction { + rows, + table_markdown, + reasoning_logs, + })) +} + +struct MarkdownPolishOutcome { + text: String, + reasoning_logs: Vec, +} + +async fn polish_markdown_block( + client: &Client, + provider: &ModelProviderInfo, + auth: &Option, + metrics: Arc, + original_content: &str, + template_hint: Option<&str>, +) -> Result { + if original_content.trim().is_empty() { + return Ok(MarkdownPolishOutcome { + text: original_content.to_string(), + reasoning_logs: Vec::new(), + }); + } + + let fix_prompt = build_fix_markdown_prompt(original_content, template_hint); + let output = call_model( + client, + provider, + auth, + MARKDOWN_FIX_MODEL, + MARKDOWN_FIX_SYSTEM_PROMPT, + &fix_prompt, + metrics, + 0.0, + ) + .await?; + + let mut reasoning_logs: Vec = Vec::new(); + if let Some(reasoning) = output.reasoning.as_ref() { + for line in reasoning + .lines() + .map(str::trim) + .filter(|line| !line.is_empty()) + { + let truncated = truncate_text(line, MODEL_REASONING_LOG_MAX_GRAPHEMES); + reasoning_logs.push(format!("Model reasoning: {truncated}")); + } + } + + Ok(MarkdownPolishOutcome { + text: output.text, + reasoning_logs, + }) +} + async fn analyze_files_individually( client: &Client, provider: &ModelProviderInfo, @@ -3872,6 +5015,20 @@ async fn analyze_files_individually( let mut completed_files: usize = 0; let concurrency = MAX_CONCURRENT_FILE_ANALYSIS.min(snippets.len()); + if let Some(tx) = progress_sender.as_ref() { + tx.send(AppEvent::SecurityReviewLog(format!( + " └ Launching parallel bug analysis ({concurrency} workers)" + ))); + tx.send(AppEvent::SecurityReviewLog( + " Sample tool calls (simulated):".to_string(), + )); + tx.send(AppEvent::SecurityReviewLog( + " └ READ: src/main.rs#L1-L120".to_string(), + )); + tx.send(AppEvent::SecurityReviewLog( + " SEARCH regex:'(?i)api_key|secret|token'".to_string(), + )); + } for _ in 0..concurrency { if let Some((index, snippet)) = remaining.next() { in_flight.push(analyze_single_file( @@ -3933,6 +5090,11 @@ async fn analyze_files_individually( } } Err(failure) => { + if let Some(tx) = progress_sender.as_ref() { + for line in &failure.logs { + tx.send(AppEvent::SecurityReviewLog(line.clone())); + } + } let mut combined_logs = aggregated_logs; combined_logs.extend(failure.logs); return Err(SecurityReviewFailure { @@ -3976,6 +5138,11 @@ async fn analyze_files_individually( { let blame_logs = enrich_bug_summaries_with_blame(&mut bug_summaries, info, metrics.clone()).await; + if let Some(tx) = progress_sender.as_ref() { + for line in &blame_logs { + tx.send(AppEvent::SecurityReviewLog(line.clone())); + } + } aggregated_logs.extend(blame_logs); } @@ -4123,6 +5290,8 @@ async fn analyze_files_individually( if (before - after) == 1 { "" } else { "s" } )); } + + normalize_bug_identifiers(&mut bug_summaries, &mut bug_details); } snippets_with_findings.sort_by_key(|(index, _)| *index); @@ -4935,6 +6104,57 @@ fn dedupe_bug_summaries( (summaries, new_details, removed) } +fn bug_summary_cmp(a: &BugSummary, b: &BugSummary) -> CmpOrdering { + match (a.risk_rank, b.risk_rank) { + (Some(ra), Some(rb)) => ra.cmp(&rb), + (Some(_), None) => CmpOrdering::Less, + (None, Some(_)) => CmpOrdering::Greater, + _ => severity_rank(&a.severity) + .cmp(&severity_rank(&b.severity)) + .then_with(|| a.id.cmp(&b.id)), + } +} + +fn normalize_bug_identifiers(summaries: &mut Vec, details: &mut Vec) { + if summaries.is_empty() { + details.clear(); + return; + } + + let mut sorted: Vec = std::mem::take(summaries); + sorted.sort_by(bug_summary_cmp); + + let mut detail_lookup: HashMap = details + .iter() + .map(|detail| (detail.summary_id, detail.original_markdown.clone())) + .collect(); + let mut new_details: Vec = Vec::with_capacity(sorted.len()); + + for (index, summary) in sorted.iter_mut().enumerate() { + let new_id = index + 1; + let old_id = summary.id; + summary.id = new_id; + if let Some(updated) = + rewrite_bug_markdown_heading_id(summary.markdown.as_str(), summary.id) + { + summary.markdown = updated; + } + + let base_markdown = detail_lookup + .remove(&old_id) + .unwrap_or_else(|| summary.markdown.clone()); + let normalized_detail = rewrite_bug_markdown_heading_id(base_markdown.as_str(), summary.id) + .unwrap_or(base_markdown); + new_details.push(BugDetail { + summary_id: summary.id, + original_markdown: normalized_detail, + }); + } + + *summaries = sorted; + *details = new_details; +} + fn format_findings_summary(findings: usize, files_with_findings: usize) -> String { if findings == 0 { return "No findings identified.".to_string(); @@ -5017,8 +6237,8 @@ fn make_bug_summary_table(bugs: &[BugSummary]) -> Option { }); let mut table = String::new(); - table.push_str("| # | Severity | Title | Validation | Impact | Recommendation |\n"); - table.push_str("| --- | --- | --- | --- | --- | --- |\n"); + table.push_str("| # | Severity | Title | Validation | Impact |\n"); + table.push_str("| --- | --- | --- | --- | --- |\n"); for (display_idx, bug) in ordered.iter().enumerate() { let id = display_idx + 1; let anchor_id = bug.id; @@ -5048,12 +6268,11 @@ fn make_bug_summary_table(bugs: &[BugSummary]) -> Option { } let validation = validation_display(&bug.validation); table.push_str(&format!( - "| {id} | {} | {} | {} | {} | {} |\n", + "| {id} | {} | {} | {} | {} |\n", sanitize_table_field(&bug.severity), title_cell, sanitize_table_field(&validation), sanitize_table_field(&bug.impact), - sanitize_table_field(&bug.recommendation), )); } Some(table) @@ -5074,8 +6293,8 @@ fn make_bug_summary_table_from_bugs(bugs: &[SecurityReviewBug]) -> Option Option Option { async fn persist_artifacts( output_root: &Path, repo_path: &Path, + metadata: &SecurityReviewMetadata, bugs_markdown: &str, + api_entries: &[ApiEntry], + classification_rows: &[DataClassificationRow], + classification_table: Option<&str>, report_markdown: Option<&str>, snapshot: &SecurityReviewSnapshot, ) -> Result { @@ -6903,6 +8125,52 @@ async fn persist_artifacts( .await .map_err(|e| format!("Failed to write {}: {e}", snapshot_path.display()))?; + let mut api_overview_path: Option = None; + if !api_entries.is_empty() { + let mut content = String::new(); + for entry in api_entries { + if entry.markdown.trim().is_empty() { + continue; + } + content.push_str(&format!("## {}\n\n", entry.location_label)); + content.push_str(entry.markdown.trim()); + content.push_str("\n\n"); + } + if !content.trim().is_empty() { + let path = context_dir.join("apis.md"); + tokio_fs::write(&path, fix_mermaid_blocks(&content).as_bytes()) + .await + .map_err(|e| format!("Failed to write {}: {e}", path.display()))?; + api_overview_path = Some(path); + } + } + + let mut classification_json_path: Option = None; + let mut classification_table_path: Option = None; + if !classification_rows.is_empty() { + let mut json_lines: Vec = Vec::with_capacity(classification_rows.len()); + for row in classification_rows { + let line = serde_json::to_string(row) + .map_err(|e| format!("Failed to serialize classification row: {e}"))?; + json_lines.push(line); + } + let json_path = context_dir.join("classification.jsonl"); + tokio_fs::write(&json_path, json_lines.join("\n").as_bytes()) + .await + .map_err(|e| format!("Failed to write {}: {e}", json_path.display()))?; + classification_json_path = Some(json_path); + + if let Some(table) = classification_table + && !table.trim().is_empty() + { + let table_path = context_dir.join("classification.md"); + tokio_fs::write(&table_path, table.as_bytes()) + .await + .map_err(|e| format!("Failed to write {}: {e}", table_path.display()))?; + classification_table_path = Some(table_path); + } + } + let mut report_html_path = None; let sanitized_report = report_markdown.map(fix_mermaid_blocks); let report_path = if let Some(report) = sanitized_report.as_ref() { @@ -6926,11 +8194,22 @@ async fn persist_artifacts( None }; + let metadata_path = output_root.join("metadata.json"); + let metadata_bytes = serde_json::to_vec_pretty(metadata) + .map_err(|e| format!("Failed to serialize metadata: {e}"))?; + tokio_fs::write(&metadata_path, metadata_bytes) + .await + .map_err(|e| format!("Failed to write {}: {e}", metadata_path.display()))?; + Ok(PersistedArtifacts { bugs_path, snapshot_path, report_path, report_html_path, + metadata_path, + api_overview_path, + classification_json_path, + classification_table_path, }) } @@ -6966,14 +8245,14 @@ fn summarize_process_output(success: bool, stdout: &str, stderr: &str) -> String fn build_bugs_markdown( snapshot: &SecurityReviewSnapshot, - _git_link_info: Option<&GitLinkInfo>, + git_link_info: Option<&GitLinkInfo>, ) -> String { let bugs = snapshot_bugs(snapshot); let mut sections: Vec = Vec::new(); if let Some(table) = make_bug_summary_table_from_bugs(&bugs) { sections.push(table); } - let details = render_bug_sections(&snapshot.bugs); + let details = render_bug_sections(&snapshot.bugs, git_link_info); if !details.trim().is_empty() { sections.push(details); } @@ -7297,6 +8576,7 @@ async fn call_model( system_prompt, user_prompt, temperature, + metrics.clone(), ) .await { @@ -7330,6 +8610,7 @@ async fn call_model_attempt( system_prompt: &str, user_prompt: &str, temperature: f32, + metrics: Arc, ) -> Result { match provider.wire_api { WireApi::Responses => { @@ -7379,17 +8660,19 @@ async fn call_model_attempt( system_prompt, user_prompt, temperature, + metrics.clone(), ) .await; } return Err(format!("Model request failed with status {status}: {body}")); } - match parse_responses_stream_output(&body) { + match parse_responses_stream_output(&body, &metrics) { Ok(output) => Ok(output), Err(err) => { let snippet = truncate_text(&body, 400); if let Ok(value) = serde_json::from_str::(&body) { + // parse_responses_output may not include usage; nothing extra to record here. parse_responses_output(value).map_err(|fallback_err| { format!( "{err}; fallback parse failed: {fallback_err}. Response snippet: {snippet}" @@ -7412,6 +8695,7 @@ async fn call_model_attempt( system_prompt, user_prompt, temperature, + metrics, ) .await } @@ -7426,6 +8710,7 @@ async fn send_chat_request( system_prompt: &str, user_prompt: &str, temperature: f32, + metrics: Arc, ) -> Result { let builder = provider .create_request_builder(client, auth) @@ -7469,6 +8754,61 @@ async fn send_chat_request( } }; + // Try to record token usage if present in chat response + if let Some(usage) = value.get("usage") { + let input_tokens = usage + .get("input_tokens") + .and_then(serde_json::Value::as_u64) + .or_else(|| { + usage + .get("prompt_tokens") + .and_then(serde_json::Value::as_u64) + }) + .unwrap_or(0); + let cached_input_tokens = usage + .get("input_tokens_details") + .and_then(|d| d.get("cached_tokens").and_then(serde_json::Value::as_u64)) + .or_else(|| { + usage + .get("prompt_tokens_details") + .and_then(|d| d.get("cached_tokens").and_then(serde_json::Value::as_u64)) + }) + .unwrap_or(0); + let output_tokens = usage + .get("output_tokens") + .and_then(serde_json::Value::as_u64) + .or_else(|| { + usage + .get("completion_tokens") + .and_then(serde_json::Value::as_u64) + }) + .unwrap_or(0); + let reasoning_output_tokens = usage + .get("output_tokens_details") + .and_then(|d| { + d.get("reasoning_tokens") + .and_then(serde_json::Value::as_u64) + }) + .or_else(|| { + usage.get("completion_tokens_details").and_then(|d| { + d.get("reasoning_tokens") + .and_then(serde_json::Value::as_u64) + }) + }) + .unwrap_or(0); + let total_tokens = usage + .get("total_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(input_tokens.saturating_add(output_tokens)); + metrics.record_usage_raw( + input_tokens, + cached_input_tokens, + output_tokens, + reasoning_output_tokens, + total_tokens, + ); + } + parse_chat_output(value).map_err(|err| { let snippet = truncate_text(&body_text, 400); format!("{err}; response snippet: {snippet}") @@ -7484,7 +8824,10 @@ fn sanitize_model_error(error: &str) -> String { trimmed.split_whitespace().collect::>().join(" ") } -fn parse_responses_stream_output(body: &str) -> Result { +fn parse_responses_stream_output( + body: &str, + metrics: &ReviewMetrics, +) -> Result { let mut combined = String::new(); let mut reasoning = String::new(); let mut fallback: Option = None; @@ -7509,6 +8852,7 @@ fn parse_responses_stream_output(body: &str) -> Result &mut fallback, &mut failed_error, &mut last_parse_error, + metrics, ); data_buffer.clear(); } @@ -7522,6 +8866,7 @@ fn parse_responses_stream_output(body: &str) -> Result &mut fallback, &mut failed_error, &mut last_parse_error, + metrics, ); } @@ -7554,6 +8899,7 @@ fn handle_responses_event( fallback: &mut Option, failed_error: &mut Option, last_parse_error: &mut Option, + metrics: &ReviewMetrics, ) { let trimmed = data.trim(); if trimmed.is_empty() || trimmed == "[DONE]" { @@ -7599,6 +8945,61 @@ fn handle_responses_event( "response.completed" => { if let Some(resp) = event.get("response") { *fallback = Some(resp.clone()); + if let Some(usage) = resp.get("usage") + && let Some(input_tokens) = usage + .get("input_tokens") + .and_then(serde_json::Value::as_u64) + .or_else(|| { + usage + .get("prompt_tokens") + .and_then(serde_json::Value::as_u64) + }) + { + let cached_input_tokens = usage + .get("input_tokens_details") + .and_then(|d| { + d.get("cached_tokens").and_then(serde_json::Value::as_u64) + }) + .or_else(|| { + usage.get("prompt_tokens_details").and_then(|d| { + d.get("cached_tokens").and_then(serde_json::Value::as_u64) + }) + }) + .unwrap_or(0); + let output_tokens = usage + .get("output_tokens") + .and_then(serde_json::Value::as_u64) + .or_else(|| { + usage + .get("completion_tokens") + .and_then(serde_json::Value::as_u64) + }) + .unwrap_or(0); + let reasoning_output_tokens = usage + .get("output_tokens_details") + .and_then(|d| { + d.get("reasoning_tokens") + .and_then(serde_json::Value::as_u64) + }) + .or_else(|| { + usage.get("completion_tokens_details").and_then(|d| { + d.get("reasoning_tokens") + .and_then(serde_json::Value::as_u64) + }) + }) + .unwrap_or(0); + let total_tokens = usage + .get("total_tokens") + .and_then(serde_json::Value::as_u64) + .unwrap_or(input_tokens.saturating_add(output_tokens)); + metrics.record_usage_raw( + input_tokens, + cached_input_tokens, + output_tokens, + reasoning_output_tokens, + total_tokens, + ); + } } } "response.failed" => { diff --git a/codex-rs/tui/src/status_indicator_widget.rs b/codex-rs/tui/src/status_indicator_widget.rs index 8d9c247ea4..55e6299e90 100644 --- a/codex-rs/tui/src/status_indicator_widget.rs +++ b/codex-rs/tui/src/status_indicator_widget.rs @@ -26,6 +26,7 @@ pub(crate) struct StatusSnapshot { pub(crate) progress: Option, pub(crate) thinking: Vec, pub(crate) tool_calls: Vec, + pub(crate) logs: Vec, } pub(crate) struct StatusIndicatorWidget { @@ -37,6 +38,8 @@ pub(crate) struct StatusIndicatorWidget { thinking_lines: Vec, /// Labels of in-flight tool calls. tool_calls: Vec, + /// Recent log messages emitted by long-running tasks. + logs: Vec, /// Queued user messages to display under the status line. queued_messages: Vec, @@ -71,6 +74,7 @@ impl StatusIndicatorWidget { progress: None, thinking_lines: Vec::new(), tool_calls: Vec::new(), + logs: Vec::new(), queued_messages: Vec::new(), elapsed_running: Duration::ZERO, last_resume_at: Instant::now(), @@ -88,11 +92,12 @@ impl StatusIndicatorWidget { let mut total: u16 = 1; // status line total = total.saturating_add(self.thinking_lines.len().saturating_sub(1) as u16); total = total.saturating_add(self.tool_calls.len().saturating_sub(1) as u16); - if !self.queued_messages.is_empty() { - total = total.saturating_add(1); // blank line between supplemental and queued messages - } let text_width = inner_width.saturating_sub(3); // account for " ↳ " prefix if text_width > 0 { + for log in &self.logs { + let wrapped = textwrap::wrap(log, text_width); + total = total.saturating_add(wrapped.len() as u16); + } for q in &self.queued_messages { let wrapped = textwrap::wrap(q, text_width); let lines = wrapped.len().min(3) as u16; @@ -105,9 +110,15 @@ impl StatusIndicatorWidget { total = total.saturating_add(1); // keybind hint line } } else { + total = total.saturating_add(self.logs.len() as u16); // At least one line per message if width is extremely narrow total = total.saturating_add(self.queued_messages.len() as u16); } + if !self.logs.is_empty() && !self.queued_messages.is_empty() { + total = total.saturating_add(1); // blank line between logs and queued messages + } else if !self.queued_messages.is_empty() { + total = total.saturating_add(1); // blank line between supplemental and queued messages + } total.saturating_add(1) // spacer line } @@ -132,6 +143,7 @@ impl StatusIndicatorWidget { self.progress = snapshot.progress; self.thinking_lines = snapshot.thinking; self.tool_calls = snapshot.tool_calls; + self.logs = snapshot.logs; self.frame_requester.schedule_frame(); } @@ -142,6 +154,11 @@ impl StatusIndicatorWidget { self.frame_requester.schedule_frame(); } + pub(crate) fn set_logs(&mut self, logs: Vec) { + self.logs = logs; + self.frame_requester.schedule_frame(); + } + pub(crate) fn pause_timer(&mut self) { self.pause_timer_at(Instant::now()); } @@ -246,11 +263,26 @@ impl WidgetRef for StatusIndicatorWidget { lines.push(vec![" ↳ ".cyan(), call.clone().cyan()].into()); } } + let text_width = area.width.saturating_sub(3); // " ↳ " prefix + if !self.logs.is_empty() { + if text_width > 0 { + for log in &self.logs { + let wrapped = textwrap::wrap(log, text_width as usize); + for (i, piece) in wrapped.iter().enumerate() { + let prefix = if i == 0 { " ↳ ".dim() } else { " ".dim() }; + lines.push(vec![prefix, piece.to_string().into()].into()); + } + } + } else { + for log in &self.logs { + lines.push(vec![" ↳ ".dim(), log.clone().into()].into()); + } + } + } if !self.queued_messages.is_empty() { lines.push(Line::from("")); } // Wrap queued messages using textwrap and show up to the first 3 lines per message. - let text_width = area.width.saturating_sub(3); // " ↳ " prefix for q in &self.queued_messages { let wrapped = textwrap::wrap(q, text_width as usize); for (i, piece) in wrapped.iter().take(3).enumerate() {