Skip to main content

tower/circuit_breaker/
policy.rs

1use std::time::{Duration, Instant};
2
3/// Determines when a [`CircuitBreaker`] should open, probe, and close.
4///
5/// Implement this trait to create custom circuit-breaking strategies —
6/// for example latency-based triggers, error-rate thresholds, or a
7/// manual operator-driven switch.  The built-in [`ConsecutiveFailures`]
8/// policy is a good starting point for most use cases.
9///
10/// # Thread safety
11///
12/// `CircuitPolicy` does **not** require [`Send`] or [`Sync`] as supertraits,
13/// so single-threaded or `!Send` implementations are valid.  However, because
14/// the policy is stored inside `Arc<Mutex<…>>` within [`CircuitBreaker`],
15/// the compiler will automatically require `P: Send` whenever
16/// `CircuitBreaker<S, P>` is sent across threads (e.g. handed to
17/// `tokio::spawn` or used with a multi-threaded runtime).  `P: Sync` is
18/// **not** needed — `Mutex` provides the necessary exclusion.
19///
20/// In practice, any policy that holds only owned data will be `Send`
21/// automatically.  If you store a raw pointer or `Rc` in your policy, it will
22/// not be usable in a multi-threaded context — the compiler will tell you so
23/// at the call site.
24///
25/// # Relationship to [`tower::retry::budget`]
26///
27/// [`Budget`][budget] governs *retry worthiness*: it caps the ratio of
28/// retried requests to original requests, preventing retry amplification.
29/// A circuit breaker governs *traffic admission*: it gates **all**
30/// requests (including first attempts) when a backend is known to be
31/// unhealthy.
32///
33/// The two compose naturally:
34///
35/// - A budget limits how aggressively clients retry individual requests.
36/// - A circuit breaker stops all traffic once failure is systemic, giving
37///   the backend time to recover without being drowned in retried load.
38///
39/// Using a circuit breaker *without* a budget still exposes you to retry
40/// amplification from layers above; the combination of both provides full
41/// protection against retry storms.
42///
43/// ```text
44/// ┌──────────────────────────────────────┐
45/// │           ServiceBuilder             │
46/// │  .layer(CircuitBreakerLayer::…)  ◄── gates all traffic when open
47/// │  .layer(RetryLayer::new(policy)) ◄── budget inside policy caps retries
48/// │  .service_fn(my_backend)             │
49/// └──────────────────────────────────────┘
50/// ```
51///
52/// [budget]: crate::retry::budget
53/// [`CircuitBreaker`]: super::service::CircuitBreaker
54pub trait CircuitPolicy {
55    /// Called after a **successful** response from the inner service.
56    ///
57    /// Return `true` to signal that the circuit should close.  This is
58    /// acted upon only while the circuit is
59    /// [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen];
60    /// returning `true` from a [`Closed`][crate::circuit_breaker::CircuitStatus::Closed]
61    /// state is a no-op.
62    fn on_success(&mut self) -> bool;
63
64    /// Called after a **failed** response from the inner service.
65    ///
66    /// Return `true` to signal that the circuit should open.  This is
67    /// acted upon when the circuit is
68    /// [`Closed`][crate::circuit_breaker::CircuitStatus::Closed].
69    ///
70    /// Any failure while the circuit is
71    /// [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen]
72    /// always reopens it, regardless of the return value — the probe
73    /// failed, so the backend is not yet ready.
74    fn on_failure(&mut self) -> bool;
75
76    /// Called while the circuit is [`Open`][crate::circuit_breaker::CircuitStatus::Open].
77    ///
78    /// Return `true` to allow a probe request through (transitions the
79    /// circuit to [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen]).
80    fn should_probe(&self) -> bool;
81
82    /// Called immediately after the circuit transitions to
83    /// [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen].
84    ///
85    /// Use this hook to reset per-window counters so that the recovery
86    /// success rate is measured only from post-recovery probes, not from
87    /// stale pre-outage history.
88    fn on_half_open(&mut self);
89}
90
91// ---------------------------------------------------------------------------
92// ConsecutiveFailures — the built-in policy
93// ---------------------------------------------------------------------------
94
95/// A [`CircuitPolicy`] that opens the circuit after *N* consecutive failures
96/// and closes it again once a sufficient fraction of probes succeed.
97///
98/// # Parameters
99///
100/// | Parameter | Description |
101/// |---|---|
102/// | `failure_threshold` | Number of consecutive failures needed to open the circuit. |
103/// | `success_threshold` | Fraction of HalfOpen probes (0.0–1.0) that must succeed to close. |
104/// | `timeout` | How long to stay Open before sending the first probe. |
105///
106/// # Example
107///
108/// ```rust,ignore
109/// use tower::circuit_breaker::{CircuitBreakerLayer, ConsecutiveFailures};
110/// use std::time::Duration;
111///
112/// let policy = ConsecutiveFailures::new(5, 0.8, Duration::from_secs(30));
113/// let layer  = CircuitBreakerLayer::with_policy(policy);
114/// ```
115#[derive(Clone, Debug)]
116pub struct ConsecutiveFailures {
117    failure_threshold: usize,
118    success_threshold: f64,
119    timeout: Duration,
120    consecutive_failures: usize,
121    /// Set when the circuit opens; used by `should_probe`.
122    open_since: Option<Instant>,
123    /// Sliding window of outcomes during HalfOpen (max 100 entries).
124    window: Vec<bool>,
125}
126
127impl ConsecutiveFailures {
128    /// Create a new [`ConsecutiveFailures`] policy.
129    pub fn new(failure_threshold: usize, success_threshold: f64, timeout: Duration) -> Self {
130        Self {
131            failure_threshold,
132            success_threshold,
133            timeout,
134            consecutive_failures: 0,
135            open_since: None,
136            window: Vec::with_capacity(32),
137        }
138    }
139}
140
141impl CircuitPolicy for ConsecutiveFailures {
142    fn on_success(&mut self) -> bool {
143        self.consecutive_failures = 0;
144        self.window.push(true);
145        if self.window.len() > 100 {
146            self.window.remove(0);
147        }
148        let rate = self.window.iter().filter(|&&v| v).count() as f64
149            / self.window.len() as f64;
150        rate >= self.success_threshold
151    }
152
153    fn on_failure(&mut self) -> bool {
154        self.consecutive_failures += 1;
155        self.window.push(false);
156        if self.window.len() > 100 {
157            self.window.remove(0);
158        }
159        let should_open = self.consecutive_failures >= self.failure_threshold;
160        if should_open {
161            self.open_since = Some(Instant::now());
162        }
163        should_open
164    }
165
166    fn should_probe(&self) -> bool {
167        self.open_since
168            .map(|t| t.elapsed() >= self.timeout)
169            .unwrap_or(false)
170    }
171
172    fn on_half_open(&mut self) {
173        self.window.clear();
174        self.consecutive_failures = 0;
175    }
176}