Skip to content

Commit df0bae0

Browse files
craig[bot]knz
craig[bot]
andcommitted
Merge #54758
54758: server: split server pre-start from accepting clients r=irfansharif,tbg a=knz This will enable the introduction of further cluster initialization using SQL in `start.go`, before clients become able to connect. I found this to be a prerequisite to #48647. Co-authored-by: Raphael 'kena' Poss <[email protected]>
2 parents 361acae + 50ffec8 commit df0bae0

File tree

4 files changed

+69
-19
lines changed

4 files changed

+69
-19
lines changed

pkg/cli/start.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -564,7 +564,7 @@ If problems persist, please see %s.`
564564
}
565565

566566
// Attempt to start the server.
567-
if err := s.Start(ctx); err != nil {
567+
if err := s.PreStart(ctx); err != nil {
568568
if le := (*server.ListenError)(nil); errors.As(err, &le) {
569569
const errorPrefix = "consider changing the port via --%s"
570570
if le.Addr == serverCfg.Addr {
@@ -596,6 +596,11 @@ If problems persist, please see %s.`
596596
return err
597597
}
598598

599+
// Now let SQL clients in.
600+
if err := s.AcceptClients(ctx); err != nil {
601+
return err
602+
}
603+
599604
// Now inform the user that the server is running and tell the
600605
// user about its run-time derived parameters.
601606
var buf redact.StringBuilder

pkg/server/server.go

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -972,15 +972,32 @@ func getServerEndpointCounter(method string) telemetry.Counter {
972972
return telemetry.GetCounter(fmt.Sprintf("%s.%s", counterPrefix, method))
973973
}
974974

975-
// Start starts the server on the specified port, starts gossip and initializes
976-
// the node using the engines from the server's context. This is complex since
977-
// it sets up the listeners and the associated port muxing, but especially since
978-
// it has to solve the "bootstrapping problem": nodes need to connect to Gossip
979-
// fairly early, but what drives Gossip connectivity are the first range
980-
// replicas in the kv store. This in turn suggests opening the Gossip server
981-
// early. However, naively doing so also serves most other services prematurely,
982-
// which exposes a large surface of potentially underinitialized services. This
983-
// is avoided with some additional complexity that can be summarized as follows:
975+
// Start calls PreStart() and AcceptClient() in sequence.
976+
// This is suitable for use e.g. in tests.
977+
func (s *Server) Start(ctx context.Context) error {
978+
if err := s.PreStart(ctx); err != nil {
979+
return err
980+
}
981+
return s.AcceptClients(ctx)
982+
}
983+
984+
// PreStart starts the server on the specified port, starts gossip and
985+
// initializes the node using the engines from the server's context.
986+
//
987+
// It does not activate the pgwire listener over the network / unix
988+
// socket, which is done by the AcceptClients() method. The separation
989+
// between the two exists so that SQL initialization can take place
990+
// before the first client is accepted.
991+
//
992+
// PreStart is complex since it sets up the listeners and the associated
993+
// port muxing, but especially since it has to solve the
994+
// "bootstrapping problem": nodes need to connect to Gossip fairly
995+
// early, but what drives Gossip connectivity are the first range
996+
// replicas in the kv store. This in turn suggests opening the Gossip
997+
// server early. However, naively doing so also serves most other
998+
// services prematurely, which exposes a large surface of potentially
999+
// underinitialized services. This is avoided with some additional
1000+
// complexity that can be summarized as follows:
9841001
//
9851002
// - before blocking trying to connect to the Gossip network, we already open
9861003
// the admin UI (so that its diagnostics are available)
@@ -990,7 +1007,7 @@ func getServerEndpointCounter(method string) telemetry.Counter {
9901007
//
9911008
// The passed context can be used to trace the server startup. The context
9921009
// should represent the general startup operation.
993-
func (s *Server) Start(ctx context.Context) error {
1010+
func (s *Server) PreStart(ctx context.Context) error {
9941011
ctx = s.AnnotateCtx(ctx)
9951012

9961013
// Start the time sanity checker.
@@ -1691,7 +1708,7 @@ func (s *Server) Start(ctx context.Context) error {
16911708
// executes a SQL query, this must be done after the SQL layer is ready.
16921709
s.node.recordJoinEvent()
16931710

1694-
if err := s.sqlServer.start(
1711+
if err := s.sqlServer.preStart(
16951712
workersCtx,
16961713
s.stopper,
16971714
s.cfg.TestingKnobs,
@@ -1712,6 +1729,24 @@ func (s *Server) Start(ctx context.Context) error {
17121729
// been run so it is safe to upgrade to the binary's current version.
17131730
s.startAttemptUpgrade(ctx)
17141731

1732+
log.Event(ctx, "server initialized")
1733+
return nil
1734+
}
1735+
1736+
// AcceptClients starts listening for incoming SQL clients over the network.
1737+
func (s *Server) AcceptClients(ctx context.Context) error {
1738+
workersCtx := s.AnnotateCtx(context.Background())
1739+
1740+
if err := s.sqlServer.startServeSQL(
1741+
workersCtx,
1742+
s.stopper,
1743+
s.sqlServer.connManager,
1744+
s.sqlServer.pgL,
1745+
s.cfg.SocketFile,
1746+
); err != nil {
1747+
return err
1748+
}
1749+
17151750
log.Event(ctx, "server ready")
17161751
return nil
17171752
}

pkg/server/server_sql.go

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,12 @@ type sqlServer struct {
9292
stmtDiagnosticsRegistry *stmtdiagnostics.Registry
9393
sqlLivenessProvider sqlliveness.Provider
9494
metricsRegistry *metric.Registry
95+
96+
// pgL is the shared RPC/SQL listener, opened when RPC was initialized.
97+
pgL net.Listener
98+
// connManager is the connection manager to use to set up additional
99+
// SQL listeners in AcceptClients().
100+
connManager netutil.Server
95101
}
96102

97103
// sqlServerOptionalKVArgs are the arguments supplied to newSQLServer which are
@@ -649,7 +655,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*sqlServer, error) {
649655
}, nil
650656
}
651657

652-
func (s *sqlServer) start(
658+
func (s *sqlServer) preStart(
653659
ctx context.Context,
654660
stopper *stop.Stopper,
655661
knobs base.TestingKnobs,
@@ -665,6 +671,8 @@ func (s *sqlServer) start(
665671
return err
666672
}
667673
}
674+
s.connManager = connManager
675+
s.pgL = pgL
668676
s.sqlLivenessProvider.Start(ctx)
669677
s.execCfg.GCJobNotifier.Start(ctx)
670678
s.temporaryObjectCleaner.Start(ctx, stopper)
@@ -752,11 +760,6 @@ func (s *sqlServer) start(
752760

753761
log.Infof(ctx, "done ensuring all necessary migrations have run")
754762

755-
// Start serving SQL clients.
756-
if err := s.startServeSQL(ctx, stopper, connManager, pgL, socketFile); err != nil {
757-
return err
758-
}
759-
760763
// Start the async migration to upgrade namespace entries from the old
761764
// namespace table (id 2) to the new one (id 30).
762765
if err := migMgr.StartSystemNamespaceMigration(ctx, bootstrapVersion); err != nil {

pkg/server/testserver.go

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -711,7 +711,7 @@ func StartTenant(
711711
return "", "", err
712712
}
713713

714-
if err := s.start(ctx,
714+
if err := s.preStart(ctx,
715715
args.stopper,
716716
args.TestingKnobs,
717717
connManager,
@@ -721,6 +721,13 @@ func StartTenant(
721721
); err != nil {
722722
return "", "", err
723723
}
724+
if err := s.startServeSQL(ctx,
725+
args.stopper,
726+
s.connManager,
727+
s.pgL,
728+
socketFile); err != nil {
729+
return "", "", err
730+
}
724731

725732
return pgLAddr, httpLAddr, nil
726733
}

0 commit comments

Comments
 (0)