joinMembersWithRetries joins the given members 100 at a time. After each batch of 100 it rediscovers the members. This helps when the list of members is big and by the time we reach the end the originally resolved addresses may be obsolete. joinMembersWithRetries returns an error iff it couldn't suc
(ctx context.Context, members []string, numAttempts int, logger log.Logger)
| 816 | // This helps when the list of members is big and by the time we reach the end the originally resolved addresses may be obsolete. |
| 817 | // joinMembersWithRetries returns an error iff it couldn't successfully join any node OR the context was cancelled. |
| 818 | func (m *KV) joinMembersWithRetries(ctx context.Context, members []string, numAttempts int, logger log.Logger) (int, error) { |
| 819 | var ( |
| 820 | cfg = backoff.Config{ |
| 821 | MinBackoff: m.cfg.MinJoinBackoff, |
| 822 | MaxBackoff: m.cfg.MaxJoinBackoff, |
| 823 | MaxRetries: numAttempts, |
| 824 | } |
| 825 | boff = backoff.New(ctx, cfg) |
| 826 | err error |
| 827 | successfullyJoined = 0 |
| 828 | ) |
| 829 | |
| 830 | for ; boff.Ongoing(); boff.Wait() { |
| 831 | successfullyJoined, err = m.joinMembersInBatches(ctx, members) |
| 832 | if successfullyJoined > 0 { |
| 833 | // If there are _some_ successful joins, then we can consider the join done. |
| 834 | // Mimicking the Join semantics we return an error only when we couldn't join any node at all |
| 835 | err = nil |
| 836 | break |
| 837 | } |
| 838 | level.Warn(logger).Log("msg", "joining memberlist cluster", "attempts", boff.NumRetries()+1, "max_attempts", numAttempts, "err", err) |
| 839 | } |
| 840 | if err == nil && boff.Err() != nil { |
| 841 | err = fmt.Errorf("joining memberlist: %w", boff.Err()) |
| 842 | } |
| 843 | |
| 844 | return successfullyJoined, err |
| 845 | } |
| 846 | |
| 847 | // joinMembersInBatches joins the given members and re-resolves their addresses after joining 100 nodes. |
| 848 | // joinMembersInBatches returns the number of nodes joined. joinMembersInBatches returns an error only when the |
no test coverage detected