mirror of
https://github.com/openai/codex.git
synced 2026-05-01 20:02:05 +03:00
Fix: proactive auth refresh to reload guarded disk state first (#15357)
## Summary Fix a managed ChatGPT auth bug where a stale Codex process could proactively refresh using an old in-memory refresh token even after another process had already rotated auth on disk. This changes the proactive `AuthManager::auth()` path to reuse the existing guarded `refresh_token()` flow instead of calling the refresh endpoint directly from cached auth state. ## Original Issue Users reported repeated `codexd` log lines like: ```text ERROR codex_core::auth: Failed to refresh token: error sending request for url (https://auth.openai.com/oauth/token) ``` In practice this showed up most often when multiple `codexd` processes were left running. Killing the extra processes stopped the noise, which suggested the issue was caused by stale auth state across processes rather than invalid user credentials. ## Diagnosis The bug was in the proactive refresh path used by `AuthManager::auth()`: - Process A could refresh successfully, rotate refresh token `R0` to `R1`, and persist the updated auth state plus `last_refresh` to disk. - Process B could keep an older auth snapshot cached in memory, still holding `R0` and the old `last_refresh`. - Later, when Process B called `auth()`, it checked staleness from its cached in-memory auth instead of first reloading from disk. - Because that cached `last_refresh` was stale, Process B would proactively call `/oauth/token` with stale refresh token `R0`. - On failure, `auth()` logged the refresh error but kept returning the same stale cached auth, so repeated `auth()` calls could keep retrying with dead state. This differed from the existing unauthorized-recovery flow, which already did the safer thing: guarded reload from disk first, then refresh only if the on-disk auth was unchanged. ## What Changed - Switched proactive refresh in `AuthManager::auth()` to: - do a pure staleness check on cached auth - call `refresh_token()` when stale - return the original cached auth on genuine refresh failure, preserving existing outward behavior - Removed the direct proactive refresh-from-cached-state path - Added regression tests covering: - stale cached auth with newer same-account auth already on disk - the same scenario even when the refresh endpoint would fail if called ## Why This Fix `refresh_token()` already contains the right cross-process safety behavior: - guarded reload from disk - same-account verification - skip-refresh when another process already changed auth Reusing that path makes proactive refresh consistent with unauthorized recovery and prevents stale processes from trying to refresh already-rotated tokens. ## Testing Test shape: - create a fresh temp `CODEX_HOME` from `~/.codex/auth.json` - force `last_refresh` to an old timestamp so proactive refresh is required - start two long-lived helper processes against the same auth file - start `B` first so it caches stale auth and sleeps - start `A` second so it refreshes first - point both at a local mock `/oauth/token` server - inspect whether `B` makes a second refresh request with the stale in-memory token, or reloads the rotated token from disk ### Before the fix The repro showed the bug clearly: the mock server saw two refreshes with the same stale token, `A` rotated to a new token, and `B` still returned the stale token instead of reloading from disk. ```text POST /oauth/token refresh_token=rt_j6s0... POST /oauth/token refresh_token=rt_j6s0... B:cached_before=rt_j6s0... B:cached_after=rt_j6s0... B:returned=rt_j6s0... A:cached_before=rt_j6s0... A:cached_after=rotated-refresh-token-logged-run-v2 A:returned=rotated-refresh-token-logged-run-v2 ``` ### After the fix After the fix, the mock server saw only one refresh request. `A` refreshed once, and `B` started with the stale token but reloaded and returned the rotated token. ```text POST /oauth/token refresh_token=rt_j6s0... B:cached_before=rt_j6s0... B:cached_after=rotated-refresh-token-fix-branch B:returned=rotated-refresh-token-fix-branch A:cached_before=rt_j6s0... A:cached_after=rotated-refresh-token-fix-branch A:returned=rotated-refresh-token-fix-branch ``` This shows the new behavior: `A` refreshes once, then `B` reuses the updated auth from disk instead of making a second refresh request with the stale token.
This commit is contained in:
@@ -883,6 +883,24 @@ mod tests {
|
||||
account_id: Option<&str>,
|
||||
access_token: &str,
|
||||
refresh_token: &str,
|
||||
) -> serde_json::Value {
|
||||
chatgpt_auth_json_with_last_refresh(
|
||||
plan_type,
|
||||
chatgpt_user_id,
|
||||
account_id,
|
||||
access_token,
|
||||
refresh_token,
|
||||
"2025-01-01T00:00:00Z",
|
||||
)
|
||||
}
|
||||
|
||||
fn chatgpt_auth_json_with_last_refresh(
|
||||
plan_type: &str,
|
||||
chatgpt_user_id: Option<&str>,
|
||||
account_id: Option<&str>,
|
||||
access_token: &str,
|
||||
refresh_token: &str,
|
||||
last_refresh: &str,
|
||||
) -> serde_json::Value {
|
||||
chatgpt_auth_json_with_mode(
|
||||
plan_type,
|
||||
@@ -890,6 +908,7 @@ mod tests {
|
||||
account_id,
|
||||
access_token,
|
||||
refresh_token,
|
||||
last_refresh,
|
||||
None,
|
||||
)
|
||||
}
|
||||
@@ -900,6 +919,7 @@ mod tests {
|
||||
account_id: Option<&str>,
|
||||
access_token: &str,
|
||||
refresh_token: &str,
|
||||
last_refresh: &str,
|
||||
auth_mode: Option<&str>,
|
||||
) -> serde_json::Value {
|
||||
let header = json!({ "alg": "none", "typ": "JWT" });
|
||||
@@ -925,7 +945,7 @@ mod tests {
|
||||
"refresh_token": refresh_token,
|
||||
"account_id": account_id,
|
||||
},
|
||||
"last_refresh": "2025-01-01T00:00:00Z",
|
||||
"last_refresh": last_refresh,
|
||||
});
|
||||
if let Some(auth_mode) = auth_mode {
|
||||
auth_json["auth_mode"] = serde_json::Value::String(auth_mode.to_string());
|
||||
@@ -1262,24 +1282,43 @@ enabled = false
|
||||
|
||||
#[tokio::test]
|
||||
async fn fetch_cloud_requirements_recovers_after_unauthorized_reload() {
|
||||
let auth = managed_auth_context(
|
||||
"business",
|
||||
Some("user-12345"),
|
||||
Some("account-12345"),
|
||||
"stale-access-token",
|
||||
"test-refresh-token",
|
||||
);
|
||||
let auth_home = tempdir().expect("tempdir");
|
||||
write_auth_json(
|
||||
auth._home.path(),
|
||||
chatgpt_auth_json(
|
||||
auth_home.path(),
|
||||
chatgpt_auth_json_with_last_refresh(
|
||||
"business",
|
||||
Some("user-12345"),
|
||||
Some("account-12345"),
|
||||
"stale-access-token",
|
||||
"test-refresh-token",
|
||||
// Keep auth "fresh" so the first request hits unauthorized recovery
|
||||
// instead of AuthManager::auth() proactively reloading from disk.
|
||||
"3025-01-01T00:00:00Z",
|
||||
),
|
||||
)
|
||||
.expect("write initial auth");
|
||||
let auth_manager = Arc::new(AuthManager::new(
|
||||
auth_home.path().to_path_buf(),
|
||||
false,
|
||||
AuthCredentialsStoreMode::File,
|
||||
));
|
||||
|
||||
write_auth_json(
|
||||
auth_home.path(),
|
||||
chatgpt_auth_json_with_last_refresh(
|
||||
"business",
|
||||
Some("user-12345"),
|
||||
Some("account-12345"),
|
||||
"fresh-access-token",
|
||||
"test-refresh-token",
|
||||
"3025-01-01T00:00:00Z",
|
||||
),
|
||||
)
|
||||
.expect("write refreshed auth");
|
||||
let auth = ManagedAuthContext {
|
||||
_home: auth_home,
|
||||
manager: auth_manager,
|
||||
};
|
||||
|
||||
let fetcher = Arc::new(TokenFetcher {
|
||||
expected_token: "fresh-access-token".to_string(),
|
||||
@@ -1314,24 +1353,41 @@ enabled = false
|
||||
|
||||
#[tokio::test]
|
||||
async fn fetch_cloud_requirements_recovers_after_unauthorized_reload_updates_cache_identity() {
|
||||
let auth = managed_auth_context(
|
||||
"business",
|
||||
Some("user-12345"),
|
||||
Some("account-12345"),
|
||||
"stale-access-token",
|
||||
"test-refresh-token",
|
||||
);
|
||||
let auth_home = tempdir().expect("tempdir");
|
||||
write_auth_json(
|
||||
auth._home.path(),
|
||||
chatgpt_auth_json(
|
||||
auth_home.path(),
|
||||
chatgpt_auth_json_with_last_refresh(
|
||||
"business",
|
||||
Some("user-12345"),
|
||||
Some("account-12345"),
|
||||
"stale-access-token",
|
||||
"test-refresh-token",
|
||||
"3025-01-01T00:00:00Z",
|
||||
),
|
||||
)
|
||||
.expect("write initial auth");
|
||||
let auth_manager = Arc::new(AuthManager::new(
|
||||
auth_home.path().to_path_buf(),
|
||||
false,
|
||||
AuthCredentialsStoreMode::File,
|
||||
));
|
||||
|
||||
write_auth_json(
|
||||
auth_home.path(),
|
||||
chatgpt_auth_json_with_last_refresh(
|
||||
"business",
|
||||
Some("user-99999"),
|
||||
Some("account-12345"),
|
||||
"fresh-access-token",
|
||||
"test-refresh-token",
|
||||
"3025-01-01T00:00:00Z",
|
||||
),
|
||||
)
|
||||
.expect("write refreshed auth");
|
||||
let auth = ManagedAuthContext {
|
||||
_home: auth_home,
|
||||
manager: auth_manager,
|
||||
};
|
||||
|
||||
let fetcher = Arc::new(TokenFetcher {
|
||||
expected_token: "fresh-access-token".to_string(),
|
||||
@@ -1432,6 +1488,7 @@ enabled = false
|
||||
Some("account-12345"),
|
||||
"test-access-token",
|
||||
"test-refresh-token",
|
||||
"2025-01-01T00:00:00Z",
|
||||
Some("chatgptAuthTokens"),
|
||||
),
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user