From 53b2038d2e564db268c493e6d2205bcfe27d949c Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 18 Aug 2013 17:31:38 -0700 Subject: [PATCH 1/7] feat(command): add version to join command Add a version to the join command. Add a versioning document to discuss some of the design decisions. --- Documentation/internal-protocol-versioning.md | 61 +++++++++++++++++++ command.go | 8 ++- name_url_map.go | 4 +- raft_handlers.go | 7 +++ raft_server.go | 3 + 5 files changed, 80 insertions(+), 3 deletions(-) create mode 100644 Documentation/internal-protocol-versioning.md diff --git a/Documentation/internal-protocol-versioning.md b/Documentation/internal-protocol-versioning.md new file mode 100644 index 00000000000..503a3029b94 --- /dev/null +++ b/Documentation/internal-protocol-versioning.md @@ -0,0 +1,61 @@ +# Versioning + +Goal: We want to be able to upgrade an individual machine in an etcd cluster to a newer version of etcd. +The process will take the form of individual followers upgrading to the latest version until the entire cluster is on the new version. + +Immediate need: etcd is moving too fast to version the internal API right now. +But, we need to keep mixed version clusters from being started by a rollowing upgrade process (e.g. the CoreOS developer alpha). + +Longer term need: Having a mixed version cluster where all machines are not be running the exact same version of etcd itself but are able to speak one version of the internal protocol. + +Solution: The internal protocol needs to be versioned just as the client protocol is. +Initially during the 0.\*.\* series of etcd releases we won't allow mixed versions at all. + +## Join Control + +We will add a version field to the join command. +But, who decides whether a newly upgraded follower should be able to join a cluster? + +### Leader Controlled + +If the leader controls the version of followers joining the cluster then it compares its version to the version number presented by the follower in the JoinCommand and rejects the join if the number is less than the leader's version number. + +Advantages + +- Leader controls all cluster decisions still + +Disadvantages + +- Follower knows better what versions of the interal protocol it can talk than the leader + + +### Follower Controlled + +A newly upgraded follower should be able to figure out the leaders internal version from a defined internal backwards compatible API endpoint and figure out if it can join the cluster. +If it cannot join the cluster then it simply exits. + +Advantages + +- The follower is running newer code and knows better if it can talk older protocols + +Disadvantages + +- This cluster decision isn't made by the leader + +## Recommendation + +To solve the immediate need and to plan for the future lets do the following: + +- Add Version field to JoinCommand +- Have a joining follower read the Version field of the leader and if its own version doesn't match the leader then sleep for some random interval and retry later to see if the leader has upgraded. + +# Research + +## Zookeeper versioning + +Zookeeper very recently added versioning into the protocol and it doesn't seem to have seen any use yet. +https://issues.apache.org/jira/browse/ZOOKEEPER-1633 + +## doozerd + +doozerd stores the version number of the machine in the datastore for other clients to check, no decisions are made off of this number currently. diff --git a/command.go b/command.go index 5a5149a6e89..b4ee1646c28 100644 --- a/command.go +++ b/command.go @@ -117,6 +117,7 @@ func (c *WatchCommand) Apply(server *raft.Server) (interface{}, error) { // JoinCommand type JoinCommand struct { + Version string `json:"version"` Name string `json:"name"` RaftURL string `json:"raftURL"` EtcdURL string `json:"etcdURL"` @@ -124,6 +125,9 @@ type JoinCommand struct { func newJoinCommand() *JoinCommand { return &JoinCommand{ + // TODO: This will be the internal protocol version but tie it + // to the release tag for now. + Version: r.version, Name: r.name, RaftURL: r.url, EtcdURL: e.url, @@ -152,14 +156,14 @@ func (c *JoinCommand) Apply(raftServer *raft.Server) (interface{}, error) { return []byte("join fail"), etcdErr.NewError(103, "") } - addNameToURL(c.Name, c.RaftURL, c.EtcdURL) + addNameToURL(c.Name, c.Version, c.RaftURL, c.EtcdURL) // add peer in raft err := raftServer.AddPeer(c.Name, "") // add machine in etcd storage key := path.Join("_etcd/machines", c.Name) - value := fmt.Sprintf("raft=%s&etcd=%s", c.RaftURL, c.EtcdURL) + value := fmt.Sprintf("raft=%s&etcd=%s&version=%s", c.RaftURL, c.EtcdURL, c.Version) etcdStore.Set(key, value, time.Unix(0, 0), raftServer.CommitIndex()) return []byte("join success"), err diff --git a/name_url_map.go b/name_url_map.go index 7acd1848be6..13e95e606df 100644 --- a/name_url_map.go +++ b/name_url_map.go @@ -7,6 +7,7 @@ import ( // we map node name to url type nodeInfo struct { + version string raftURL string etcdURL string } @@ -39,8 +40,9 @@ func nameToRaftURL(name string) (string, bool) { } // addNameToURL add a name that maps to raftURL and etcdURL -func addNameToURL(name string, raftURL string, etcdURL string) { +func addNameToURL(name string, version string, raftURL string, etcdURL string) { namesMap[name] = &nodeInfo{ + version: version, raftURL: raftURL, etcdURL: etcdURL, } diff --git a/raft_handlers.go b/raft_handlers.go index 30272d42062..d061412008d 100644 --- a/raft_handlers.go +++ b/raft_handlers.go @@ -113,3 +113,10 @@ func NameHttpHandler(w http.ResponseWriter, req *http.Request) { w.WriteHeader(http.StatusOK) w.Write([]byte(r.name)) } + +// Response to the name request +func RaftVersionHttpHandler(w http.ResponseWriter, req *http.Request) { + debugf("[recv] Get %s/version/ ", r.url) + w.WriteHeader(http.StatusOK) + w.Write([]byte(r.version)) +} diff --git a/raft_server.go b/raft_server.go index 1bd190f5618..ec4a3feffc3 100644 --- a/raft_server.go +++ b/raft_server.go @@ -14,6 +14,7 @@ import ( type raftServer struct { *raft.Server + version string name string url string tlsConf *TLSConfig @@ -34,6 +35,7 @@ func newRaftServer(name string, url string, tlsConf *TLSConfig, tlsInfo *TLSInfo return &raftServer{ Server: server, + version: releaseVersion, name: name, url: url, tlsConf: tlsConf, @@ -144,6 +146,7 @@ func (r *raftServer) startTransport(scheme string, tlsConf tls.Config) { // internal commands raftMux.HandleFunc("/name", NameHttpHandler) + raftMux.HandleFunc("/version", RaftVersionHttpHandler) raftMux.Handle("/join", errorHandler(JoinHttpHandler)) raftMux.HandleFunc("/vote", VoteHttpHandler) raftMux.HandleFunc("/log", GetLogHttpHandler) From 2c9e90d6ad82b1280cdeebb41007d679d69cd186 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 18 Aug 2013 20:47:09 -0700 Subject: [PATCH 2/7] feat(raft_server): do not allow mixed versions fail to join if there is an internal version mismatch. --- etcd_test.go | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ raft_server.go | 31 +++++++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 2 deletions(-) diff --git a/etcd_test.go b/etcd_test.go index cb43a2a4f0e..786c6a74519 100644 --- a/etcd_test.go +++ b/etcd_test.go @@ -6,6 +6,8 @@ import ( "github.com/coreos/go-etcd/etcd" "math/rand" "net/http" + "net/http/httptest" + "net/url" "os" "strconv" "strings" @@ -54,6 +56,53 @@ func TestSingleNode(t *testing.T) { } } +// TestInternalVersionFail will ensure that etcd does not come up if the internal raft +// versions do not match. +func TestInternalVersionFail(t *testing.T) { + checkedVersion := false + testMux := http.NewServeMux() + + testMux.HandleFunc("/version", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintln(w, "This is not a version number") + checkedVersion = true + }) + + testMux.HandleFunc("/join", func(w http.ResponseWriter, r *http.Request) { + t.Fatal("should not attempt to join!") + }) + + ts := httptest.NewServer(testMux) + defer ts.Close() + + fakeURL, _ := url.Parse(ts.URL) + + procAttr := new(os.ProcAttr) + procAttr.Files = []*os.File{nil, os.Stdout, os.Stderr} + args := []string{"etcd", "-n=node1", "-f", "-d=/tmp/node1", "-vv", "-C="+fakeURL.Host} + + process, err := os.StartProcess("etcd", args, procAttr) + if err != nil { + t.Fatal("start process failed:" + err.Error()) + return + } + defer process.Kill() + + time.Sleep(time.Second) + + _, err = http.Get("http://127.0.0.1:4001") + + if err == nil { + t.Fatal("etcd node should not be up") + return + } + + if checkedVersion == false { + t.Fatal("etcd did not check the version") + return + } +} + + // This test creates a single node and then set a value to it. // Then this test kills the node and restart it and tries to get the value again. func TestSingleNodeRecovery(t *testing.T) { diff --git a/raft_server.go b/raft_server.go index ec4a3feffc3..800da015247 100644 --- a/raft_server.go +++ b/raft_server.go @@ -4,6 +4,7 @@ import ( "bytes" "crypto/tls" "encoding/json" + "io/ioutil" "fmt" etcdErr "github.com/coreos/etcd/error" "github.com/coreos/go-raft" @@ -163,15 +164,41 @@ func (r *raftServer) startTransport(scheme string, tlsConf tls.Config) { } +func getLeaderVersion(t transporter, versionURL url.URL) (string, error) { + resp, err := t.Get(versionURL.String()) + + if err != nil { + return "", err + } + + defer resp.Body.Close() + body, err := ioutil.ReadAll(resp.Body) + + return string(body), nil +} + // Send join requests to the leader. func joinCluster(s *raft.Server, raftURL string, scheme string) error { var b bytes.Buffer - json.NewEncoder(&b).Encode(newJoinCommand()) - // t must be ok t, _ := r.Transporter().(transporter) + // Our version must match the leaders version + versionURL := url.URL{Host: raftURL, Scheme: scheme, Path: "/version"} + version, err := getLeaderVersion(t, versionURL) + if err != nil { + return fmt.Errorf("Unable to join: %v", err) + } + + // TODO: versioning of the internal protocol. See: + // Documentation/internatl-protocol-versioning.md + if version != r.version { + return fmt.Errorf("Unable to join: internal version mismatch, entire cluster must be running identical versions of etcd") + } + + json.NewEncoder(&b).Encode(newJoinCommand()) + joinURL := url.URL{Host: raftURL, Scheme: scheme, Path: "/join"} debugf("Send Join Request to %s", raftURL) From e79f6842bbb1378de67e8fe165a88fa427f8099f Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 18 Aug 2013 21:46:17 -0700 Subject: [PATCH 3/7] fix(command): change Version to RaftVersion clear up confusion on what this field is used for: it is for the internal raft protocol version only. --- command.go | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/command.go b/command.go index b4ee1646c28..e2fa002ebd0 100644 --- a/command.go +++ b/command.go @@ -117,7 +117,7 @@ func (c *WatchCommand) Apply(server *raft.Server) (interface{}, error) { // JoinCommand type JoinCommand struct { - Version string `json:"version"` + RaftVersion string `json:"raftVersion"` Name string `json:"name"` RaftURL string `json:"raftURL"` EtcdURL string `json:"etcdURL"` @@ -125,9 +125,7 @@ type JoinCommand struct { func newJoinCommand() *JoinCommand { return &JoinCommand{ - // TODO: This will be the internal protocol version but tie it - // to the release tag for now. - Version: r.version, + RaftVersion: r.version, Name: r.name, RaftURL: r.url, EtcdURL: e.url, @@ -156,14 +154,14 @@ func (c *JoinCommand) Apply(raftServer *raft.Server) (interface{}, error) { return []byte("join fail"), etcdErr.NewError(103, "") } - addNameToURL(c.Name, c.Version, c.RaftURL, c.EtcdURL) + addNameToURL(c.Name, c.RaftVersion, c.RaftURL, c.EtcdURL) // add peer in raft err := raftServer.AddPeer(c.Name, "") // add machine in etcd storage key := path.Join("_etcd/machines", c.Name) - value := fmt.Sprintf("raft=%s&etcd=%s&version=%s", c.RaftURL, c.EtcdURL, c.Version) + value := fmt.Sprintf("raft=%s&etcd=%s&raftVersion=%s", c.RaftURL, c.EtcdURL, c.RaftVersion) etcdStore.Set(key, value, time.Unix(0, 0), raftServer.CommitIndex()) return []byte("join success"), err From fc776f2ad6b45f87ec22d9e8a80f98421d968531 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Sun, 18 Aug 2013 21:46:59 -0700 Subject: [PATCH 4/7] fix(raft_server): add comment on version field explain what the version field is for and why it is set to releaseVersion --- raft_server.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/raft_server.go b/raft_server.go index 800da015247..ef36d61871a 100644 --- a/raft_server.go +++ b/raft_server.go @@ -36,6 +36,8 @@ func newRaftServer(name string, url string, tlsConf *TLSConfig, tlsInfo *TLSInfo return &raftServer{ Server: server, + // TODO: This will be the internal protocol version but tie it + // to the release tag for now. version: releaseVersion, name: name, url: url, From 3fff0a3c2bd0f94a2a24bb5acfb1061c8e6ac259 Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Mon, 19 Aug 2013 08:45:58 -0700 Subject: [PATCH 5/7] fix(version): add raftVersion to the version file --- raft_server.go | 4 +--- version.go | 5 +++++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/raft_server.go b/raft_server.go index ef36d61871a..1918890efe0 100644 --- a/raft_server.go +++ b/raft_server.go @@ -36,9 +36,7 @@ func newRaftServer(name string, url string, tlsConf *TLSConfig, tlsInfo *TLSInfo return &raftServer{ Server: server, - // TODO: This will be the internal protocol version but tie it - // to the release tag for now. - version: releaseVersion, + version: raftVersion, name: name, url: url, tlsConf: tlsConf, diff --git a/version.go b/version.go index eae7569c7c7..2302ea205ef 100644 --- a/version.go +++ b/version.go @@ -1,3 +1,8 @@ package main const version = "v1" + +// TODO: The release version (generated from the git tag) will be the raft +// protocol version for now. When things settle down we will fix it like the +// client API above. +const raftVersion = releaseVersion From bfc68e8e37fe8bedb2c9b73a5d521ec18a76efec Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Mon, 19 Aug 2013 08:53:15 -0700 Subject: [PATCH 6/7] fix(raft_server): rename getLeaderVersion to getVersion --- raft_server.go | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/raft_server.go b/raft_server.go index 1918890efe0..6a778bb48a4 100644 --- a/raft_server.go +++ b/raft_server.go @@ -164,7 +164,10 @@ func (r *raftServer) startTransport(scheme string, tlsConf tls.Config) { } -func getLeaderVersion(t transporter, versionURL url.URL) (string, error) { +// getVersion fetches the raft version of a peer. This works for now but we +// will need to do something more sophisticated later when we allow mixed +// version clusters. +func getVersion(t transporter, versionURL url.URL) (string, error) { resp, err := t.Get(versionURL.String()) if err != nil { @@ -186,7 +189,7 @@ func joinCluster(s *raft.Server, raftURL string, scheme string) error { // Our version must match the leaders version versionURL := url.URL{Host: raftURL, Scheme: scheme, Path: "/version"} - version, err := getLeaderVersion(t, versionURL) + version, err := getVersion(t, versionURL) if err != nil { return fmt.Errorf("Unable to join: %v", err) } From b430a07e1bb5931eecf7ceb2235c4f48d8b18c2d Mon Sep 17 00:00:00 2001 From: Brandon Philips Date: Mon, 19 Aug 2013 09:35:34 -0700 Subject: [PATCH 7/7] chore(name_url_map): rename version to raftVersion make it more clear that we are referring to the raftVersion. --- name_url_map.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/name_url_map.go b/name_url_map.go index 13e95e606df..dd339d86fc0 100644 --- a/name_url_map.go +++ b/name_url_map.go @@ -7,7 +7,7 @@ import ( // we map node name to url type nodeInfo struct { - version string + raftVersion string raftURL string etcdURL string } @@ -42,7 +42,7 @@ func nameToRaftURL(name string) (string, bool) { // addNameToURL add a name that maps to raftURL and etcdURL func addNameToURL(name string, version string, raftURL string, etcdURL string) { namesMap[name] = &nodeInfo{ - version: version, + raftVersion: raftVersion, raftURL: raftURL, etcdURL: etcdURL, }