diff --git a/README.md b/README.md index 94d98c1..5b106b6 100644 --- a/README.md +++ b/README.md @@ -88,7 +88,7 @@ A curated collection of idiomatic design & application patterns for Go language. | Pattern | Description | Status | |:-------:|:----------- |:------:| | [Bulkheads](/stability/bulkhead.md) | Enforces a principle of failure containment (i.e. prevents cascading failures) | ✘ | -| [Circuit-Breaker](/stability/circuit_breaker.md) | Stops the flow of the requests when requests are likely to fail | ✔ | +| [Circuit-Breaker](/stability/circuit-breaker.md) | Stops the flow of the requests when requests are likely to fail | ✔ | | [Deadline](/stability/deadline.md) | Allows clients to stop waiting for a response once the probability of response becomes low (e.g. after waiting 10 seconds for a page refresh) | ✘ | | [Fail-Fast](/stability/fail_fast.md) | Checks the availability of required resources at the start of a request and fails if the requirements are not satisfied | ✘ | | [Handshaking](/stability/handshaking.md) | Asks a component if it can take any more load, if it can't the request is declined | ✘ | diff --git a/SUMMARY.md b/SUMMARY.md index ccf2247..2436622 100644 --- a/SUMMARY.md +++ b/SUMMARY.md @@ -51,7 +51,7 @@ * [Push & Pull](/messaging/push_pull.md) * [Stability Patterns](/README.md#stability-patterns) * [Bulkheads](/stability/bulkhead.md) - * [Circuit-Breaker](/stability/circuit_breaker.md) + * [Circuit-Breaker](/stability/circuit-breaker.md) * [Deadline](/stability/deadline.md) * [Fail-Fast](/stability/fail_fast.md) * [Handshaking](/stability/handshaking.md) diff --git a/stability/circuit-breaker.md b/stability/circuit-breaker.md new file mode 100644 index 0000000..cb8f580 --- /dev/null +++ b/stability/circuit-breaker.md @@ -0,0 +1,102 @@ +# Circuit Breaker Pattern + +Similar to electrical fuses that prevent fires when a circuit that is connected +to the electrical grid starts drawing a high amount of power which causes the +wires to heat up and combust, the circuit breaker design pattern is a fail-first +mechanism that shuts down the circuit, request/response relationship or a +service in the case of software development, to prevent bigger failures. + +**Note:** The words "circuit" and "service" are used synonymously throught this +document. + +## Implementation + +Below is the implementation of a very simple circuit breaker to illustrate the purpose +of the circuit breaker design pattern. + +### Operation Counter + +`circuit.Counter` is a simple counter that records success and failure states of +a circuit along with a timestamp and calculates the consecutive number of +failures. + +```go +package circuit + +import ( + "time" +) + +type State int + +const ( + UnknownState State = iota + FailureState + SuccessState +) + +type Counter interface { + Count(State) + ConsecutiveFailures() uint32 + LastActivity() time.Time + Reset() +} +``` + +### Circuit Breaker + +Circuit is wrapped using the `circuit.Breaker` closure that keeps an internal operation counter. +It returns a fast error if the circuit has failed consecutively more than the specified threshold. +After a while it retries the request and records it. + +**Note:** Context type is used here to carry deadlines, cancelation signals, and +other request-scoped values across API boundaries and between processes. + +```go +package circuit + +import ( + "context" + "time" +) + +type Circuit func(context.Context) error + +func Breaker(c Circuit, failureThreshold uint32) Circuit { + cnt := NewCounter() + + return func(ctx context) error { + if cnt.ConsecutiveFailures() >= failureThreshold { + canRetry := func(cnt Counter) { + backoffLevel := Cnt.ConsecutiveFailures() - failureThreshold + + // Calculates when should the circuit breaker resume propagating requests + // to the service + shouldRetryAt := cnt.LastActivity().Add(time.Seconds * 2 << backoffLevel) + + return time.Now().After(shouldRetryAt) + } + + if !canRetry(cnt) { + // Fails fast instead of propagating requests to the circuit since + // not enough time has passed since the last failure to retry + return ErrServiceUnavailable + } + } + + // Unless the failure threshold is exceeded the wrapped service mimics the + // old behavior and the difference in behavior is seen after consecutive failures + if err := c(ctx); err != nil { + cnt.Count(FailureState) + return err + } + + cnt.Count(SuccessState) + return nil + } +} +``` + +## Related Works + +- [sony/gobreaker](https://github.com/sony/go-breaker) is a well-tested and intuitive circuit breaker implementation for real-world use cases. diff --git a/stability/circuit_breaker.md b/stability/circuit_breaker.md deleted file mode 100644 index 0fb5d7c..0000000 --- a/stability/circuit_breaker.md +++ /dev/null @@ -1,7 +0,0 @@ -# Circuit Breaker Pattern - -The [circuit breaker design pattern](https://en.wikipedia.org/wiki/Circuit_breaker_design_pattern) is used to detect failures and encapsulates logic of preventing a failure to reoccur constantly. - -# Implementation and Example - -An example of implementation and usage can be found in [circuitbreaker.go](circuitbreaker.go). \ No newline at end of file diff --git a/stability/circuitbreaker.go b/stability/circuitbreaker.go deleted file mode 100644 index 99c6732..0000000 --- a/stability/circuitbreaker.go +++ /dev/null @@ -1,284 +0,0 @@ -package circuitbreaker - -import ( - "fmt" - "sync" - "time" -) - -// Interface contains behavior that needs to be implemented -// for an object to be wrapped in a circuit-breaker -type Interface interface { - OnFailure() - OnCircuitBreak() -} - -// Counts holds the numbers of requests and their successes/failures. -// CircuitBreaker clears the internal Counts either -// on the change of the state or at the closed-state intervals. -// Counts ignores the results of the requests sent before clearing. -type Counter struct { - Requests uint32 - TotalSuccesses uint32 - TotalFailures uint32 - ConsecutiveSuccesses uint32 - ConsecutiveFailures uint32 -} - -func (c *Counter) Request() { - c.Requests++ -} - -func (c *Counter) Success() { - c.TotalSuccesses++ - - c.ConsecutiveFailures = 0 - c.ConsecutiveSuccesses++ -} - -func (c *Counter) Failure() { - c.TotalFailures++ - - c.ConsecutiveSuccesses = 0 - c.ConsecutiveFailures++ -} - -func (c *Counter) Clear() { - c.Requests = 0 - c.TotalSuccesses = 0 - c.TotalFailures = 0 - c.ConsecutiveSuccesses = 0 - c.ConsecutiveFailures = 0 -} - -// Settings configures CircuitBreaker: -// -// Name is the name of the CircuitBreaker. -// -// MaxRequests is the maximum number of requests allowed to pass through -// when the CircuitBreaker is half-open. -// If MaxRequests is 0, the CircuitBreaker allows only 1 request. -// -// Interval is the cyclic period of the closed state -// for the CircuitBreaker to clear the internal Counts. -// If Interval is 0, the CircuitBreaker doesn't clear internal Counts during the closed state. -// -// Timeout is the period of the open state, -// after which the state of the CircuitBreaker becomes half-open. -// If Timeout is 0, the timeout value of the CircuitBreaker is set to 60 seconds. -// -// ReadyToTrip is called with a copy of Counts whenever a request fails in the closed state. -// If ReadyToTrip returns true, the CircuitBreaker will be placed into the open state. -// If ReadyToTrip is nil, default ReadyToTrip is used. -// Default ReadyToTrip returns true when the number of consecutive failures is more than 5. -// -// OnStateChange is called whenever the state of the CircuitBreaker changes. -type Settings struct { - Name string - MaxRequests uint32 - Interval time.Duration - Timeout time.Duration - ReadyToTrip func(counts Counts) bool - OnStateChange func(name string, from State, to State) -} - -// CircuitBreaker is a state machine to prevent sending requests that are likely to fail. -type CircuitBreaker struct { - name string - maxRequests uint32 - interval time.Duration - timeout time.Duration - readyToTrip func(counts Counts) bool - onStateChange func(name string, from State, to State) - - mutex sync.Mutex - state State - generation uint64 - counts Counts - expiry time.Time -} - -// NewCircuitBreaker returns a new CircuitBreaker configured with the given Settings. -func NewCircuitBreaker(st Settings) *CircuitBreaker { - cb := new(CircuitBreaker) - - cb.name = st.Name - cb.interval = st.Interval - cb.onStateChange = st.OnStateChange - - if st.MaxRequests == 0 { - cb.maxRequests = 1 - } else { - cb.maxRequests = st.MaxRequests - } - - if st.Timeout == 0 { - cb.timeout = defaultTimeout - } else { - cb.timeout = st.Timeout - } - - if st.ReadyToTrip == nil { - cb.readyToTrip = defaultReadyToTrip - } else { - cb.readyToTrip = st.ReadyToTrip - } - - cb.toNewGeneration(time.Now()) - - return cb -} - -const defaultTimeout = time.Duration(60) * time.Second - -func defaultReadyToTrip(counts Counts) bool { - return counts.ConsecutiveFailures > 5 -} - -// State returns the current state of the CircuitBreaker. -func (cb *CircuitBreaker) State() State { - cb.mutex.Lock() - defer cb.mutex.Unlock() - - now := time.Now() - state, _ := cb.currentState(now) - return state -} - -// Execute runs the given request if the CircuitBreaker accepts it. -// Execute returns an error instantly if the CircuitBreaker rejects the request. -// Otherwise, Execute returns the result of the request. -// If a panic occurs in the request, the CircuitBreaker handles it as an error -// and causes the same panic again. -func (cb *CircuitBreaker) Execute(req func() (interface{}, error)) (interface{}, error) { - generation, err := cb.beforeRequest() - if err != nil { - return nil, err - } - - defer func() { - e := recover() - if e != nil { - cb.afterRequest(generation, fmt.Errorf("panic in request")) - panic(e) - } - }() - - result, err := req() - cb.afterRequest(generation, err) - return result, err -} - -func (cb *CircuitBreaker) beforeRequest() (uint64, error) { - cb.mutex.Lock() - defer cb.mutex.Unlock() - - now := time.Now() - state, generation := cb.currentState(now) - - if state == StateOpen { - return generation, cb.errorStateOpen() - } else if state == StateHalfOpen && cb.counts.Requests >= cb.maxRequests { - return generation, fmt.Errorf("too many requests") - } - - cb.counts.onRequest() - return generation, nil -} - -func (cb *CircuitBreaker) afterRequest(before uint64, err error) { - cb.mutex.Lock() - defer cb.mutex.Unlock() - - now := time.Now() - state, generation := cb.currentState(now) - if generation != before { - return - } - - if err == nil { - cb.onSuccess(state, now) - } else { - cb.onFailure(state, now) - } -} - -func (cb *CircuitBreaker) onSuccess(state State, now time.Time) { - switch state { - case StateClosed: - cb.counts.onSuccess() - case StateHalfOpen: - cb.counts.onSuccess() - if cb.counts.ConsecutiveSuccesses >= cb.maxRequests { - cb.setState(StateClosed, now) - } - } -} - -func (cb *CircuitBreaker) onFailure(state State, now time.Time) { - switch state { - case StateClosed: - cb.counts.onFailure() - if cb.readyToTrip(cb.counts) { - cb.setState(StateOpen, now) - } - case StateHalfOpen: - cb.setState(StateOpen, now) - } -} - -func (cb *CircuitBreaker) currentState(now time.Time) (State, uint64) { - switch cb.state { - case StateClosed: - if !cb.expiry.IsZero() && cb.expiry.Before(now) { - cb.toNewGeneration(now) - } - case StateOpen: - if cb.expiry.Before(now) { - cb.setState(StateHalfOpen, now) - } - } - return cb.state, cb.generation -} - -func (cb *CircuitBreaker) setState(state State, now time.Time) { - if cb.state == state { - return - } - - prev := cb.state - cb.state = state - - cb.toNewGeneration(now) - - if cb.onStateChange != nil { - cb.onStateChange(cb.name, prev, state) - } -} - -func (cb *CircuitBreaker) toNewGeneration(now time.Time) { - cb.generation++ - cb.counts.clear() - - var zero time.Time - switch cb.state { - case StateClosed: - if cb.interval == 0 { - cb.expiry = zero - } else { - cb.expiry = now.Add(cb.interval) - } - case StateOpen: - cb.expiry = now.Add(cb.timeout) - default: // StateHalfOpen - cb.expiry = zero - } -} - -func (cb *CircuitBreaker) errorStateOpen() error { - if cb.name == "" { - return fmt.Errorf("circuit breaker is open") - } - - return fmt.Errorf("circuit breaker '%s' is open", cb.name) -}