tower/circuit_breaker/policy.rs
1use std::time::{Duration, Instant};
2
3/// Determines when a [`CircuitBreaker`] should open, probe, and close.
4///
5/// Implement this trait to create custom circuit-breaking strategies —
6/// for example latency-based triggers, error-rate thresholds, or a
7/// manual operator-driven switch. The built-in [`ConsecutiveFailures`]
8/// policy is a good starting point for most use cases.
9///
10/// # Thread safety
11///
12/// `CircuitPolicy` does **not** require [`Send`] or [`Sync`] as supertraits,
13/// so single-threaded or `!Send` implementations are valid. However, because
14/// the policy is stored inside `Arc<Mutex<…>>` within [`CircuitBreaker`],
15/// the compiler will automatically require `P: Send` whenever
16/// `CircuitBreaker<S, P>` is sent across threads (e.g. handed to
17/// `tokio::spawn` or used with a multi-threaded runtime). `P: Sync` is
18/// **not** needed — `Mutex` provides the necessary exclusion.
19///
20/// In practice, any policy that holds only owned data will be `Send`
21/// automatically. If you store a raw pointer or `Rc` in your policy, it will
22/// not be usable in a multi-threaded context — the compiler will tell you so
23/// at the call site.
24///
25/// # Relationship to [`tower::retry::budget`]
26///
27/// [`Budget`][budget] governs *retry worthiness*: it caps the ratio of
28/// retried requests to original requests, preventing retry amplification.
29/// A circuit breaker governs *traffic admission*: it gates **all**
30/// requests (including first attempts) when a backend is known to be
31/// unhealthy.
32///
33/// The two compose naturally:
34///
35/// - A budget limits how aggressively clients retry individual requests.
36/// - A circuit breaker stops all traffic once failure is systemic, giving
37/// the backend time to recover without being drowned in retried load.
38///
39/// Using a circuit breaker *without* a budget still exposes you to retry
40/// amplification from layers above; the combination of both provides full
41/// protection against retry storms.
42///
43/// ```text
44/// ┌──────────────────────────────────────┐
45/// │ ServiceBuilder │
46/// │ .layer(CircuitBreakerLayer::…) ◄── gates all traffic when open
47/// │ .layer(RetryLayer::new(policy)) ◄── budget inside policy caps retries
48/// │ .service_fn(my_backend) │
49/// └──────────────────────────────────────┘
50/// ```
51///
52/// [budget]: crate::retry::budget
53/// [`CircuitBreaker`]: super::service::CircuitBreaker
54pub trait CircuitPolicy {
55 /// Called after a **successful** response from the inner service.
56 ///
57 /// Return `true` to signal that the circuit should close. This is
58 /// acted upon only while the circuit is
59 /// [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen];
60 /// returning `true` from a [`Closed`][crate::circuit_breaker::CircuitStatus::Closed]
61 /// state is a no-op.
62 fn on_success(&mut self) -> bool;
63
64 /// Called after a **failed** response from the inner service.
65 ///
66 /// Return `true` to signal that the circuit should open. This is
67 /// acted upon when the circuit is
68 /// [`Closed`][crate::circuit_breaker::CircuitStatus::Closed].
69 ///
70 /// Any failure while the circuit is
71 /// [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen]
72 /// always reopens it, regardless of the return value — the probe
73 /// failed, so the backend is not yet ready.
74 fn on_failure(&mut self) -> bool;
75
76 /// Called while the circuit is [`Open`][crate::circuit_breaker::CircuitStatus::Open].
77 ///
78 /// Return `true` to allow a probe request through (transitions the
79 /// circuit to [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen]).
80 fn should_probe(&self) -> bool;
81
82 /// Called immediately after the circuit transitions to
83 /// [`HalfOpen`][crate::circuit_breaker::CircuitStatus::HalfOpen].
84 ///
85 /// Use this hook to reset per-window counters so that the recovery
86 /// success rate is measured only from post-recovery probes, not from
87 /// stale pre-outage history.
88 fn on_half_open(&mut self);
89}
90
91// ---------------------------------------------------------------------------
92// ConsecutiveFailures — the built-in policy
93// ---------------------------------------------------------------------------
94
95/// A [`CircuitPolicy`] that opens the circuit after *N* consecutive failures
96/// and closes it again once a sufficient fraction of probes succeed.
97///
98/// # Parameters
99///
100/// | Parameter | Description |
101/// |---|---|
102/// | `failure_threshold` | Number of consecutive failures needed to open the circuit. |
103/// | `success_threshold` | Fraction of HalfOpen probes (0.0–1.0) that must succeed to close. |
104/// | `timeout` | How long to stay Open before sending the first probe. |
105///
106/// # Example
107///
108/// ```rust,ignore
109/// use tower::circuit_breaker::{CircuitBreakerLayer, ConsecutiveFailures};
110/// use std::time::Duration;
111///
112/// let policy = ConsecutiveFailures::new(5, 0.8, Duration::from_secs(30));
113/// let layer = CircuitBreakerLayer::with_policy(policy);
114/// ```
115#[derive(Clone, Debug)]
116pub struct ConsecutiveFailures {
117 failure_threshold: usize,
118 success_threshold: f64,
119 timeout: Duration,
120 consecutive_failures: usize,
121 /// Set when the circuit opens; used by `should_probe`.
122 open_since: Option<Instant>,
123 /// Sliding window of outcomes during HalfOpen (max 100 entries).
124 window: Vec<bool>,
125}
126
127impl ConsecutiveFailures {
128 /// Create a new [`ConsecutiveFailures`] policy.
129 pub fn new(failure_threshold: usize, success_threshold: f64, timeout: Duration) -> Self {
130 Self {
131 failure_threshold,
132 success_threshold,
133 timeout,
134 consecutive_failures: 0,
135 open_since: None,
136 window: Vec::with_capacity(32),
137 }
138 }
139}
140
141impl CircuitPolicy for ConsecutiveFailures {
142 fn on_success(&mut self) -> bool {
143 self.consecutive_failures = 0;
144 self.window.push(true);
145 if self.window.len() > 100 {
146 self.window.remove(0);
147 }
148 let rate = self.window.iter().filter(|&&v| v).count() as f64
149 / self.window.len() as f64;
150 rate >= self.success_threshold
151 }
152
153 fn on_failure(&mut self) -> bool {
154 self.consecutive_failures += 1;
155 self.window.push(false);
156 if self.window.len() > 100 {
157 self.window.remove(0);
158 }
159 let should_open = self.consecutive_failures >= self.failure_threshold;
160 if should_open {
161 self.open_since = Some(Instant::now());
162 }
163 should_open
164 }
165
166 fn should_probe(&self) -> bool {
167 self.open_since
168 .map(|t| t.elapsed() >= self.timeout)
169 .unwrap_or(false)
170 }
171
172 fn on_half_open(&mut self) {
173 self.window.clear();
174 self.consecutive_failures = 0;
175 }
176}