Improve queue and logger context (#24924)

Before there was a "graceful function": RunWithShutdownFns, it's mainly for some modules which doesn't support context. The old queue system doesn't work well with context, so the old queues need it. After the queue refactoring, the new queue works with context well, so, use Golang context as much as possible, the `RunWithShutdownFns` could be removed (replaced by RunWithCancel for context cancel mechanism), the related code could be simplified. This PR also fixes some legacy queue-init problems, eg: * typo : archiver: "unable to create codes indexer queue" => "unable to create repo-archive queue" * no nil check for failed queues, which causes unfriendly panic After this PR, many goroutines could have better display name: ![image](701b2a9b-8065-4137-aeaa-0bda2b34604a) ![image](f1d5f50f-0534-40f0-b0be-f2c9daa5fe92)
2023-05-26 15:31:55 +08:00 · 2023-05-26 15:31:55 +08:00 · 18f26cfbf7
commit 18f26cfbf7
parent e4922d484b
31 changed files with 204 additions and 263 deletions
--- a/modules/queue/workergroup.go
+++ b/modules/queue/workergroup.go
@ -5,6 +5,7 @@ package queue

 import (
 	"context"
+	"runtime/pprof"
 	"sync"
 	"sync/atomic"
 	"time"
@ -13,9 +14,10 @@ import (
 )

 var (
-	infiniteTimerC        = make(chan time.Time)
-	batchDebounceDuration = 100 * time.Millisecond
-	workerIdleDuration    = 1 * time.Second
+	infiniteTimerC         = make(chan time.Time)
+	batchDebounceDuration  = 100 * time.Millisecond
+	workerIdleDuration     = 1 * time.Second
+	shutdownDefaultTimeout = 2 * time.Second

 	unhandledItemRequeueDuration atomic.Int64 // to avoid data race during test
 )
@ -116,13 +118,15 @@ func (q *WorkerPoolQueue[T]) doWorkerHandle(batch []T) {
 // If the queue is shutting down, it returns true and try to push the items
 // Otherwise it does nothing and returns false
 func (q *WorkerPoolQueue[T]) basePushForShutdown(items ...T) bool {
-	ctxShutdown := q.ctxShutdown.Load()
-	if ctxShutdown == nil {
+	shutdownTimeout := time.Duration(q.shutdownTimeout.Load())
+	if shutdownTimeout == 0 {
 		return false
 	}
+	ctxShutdown, ctxShutdownCancel := context.WithTimeout(context.Background(), shutdownTimeout)
+	defer ctxShutdownCancel()
 	for _, item := range items {
 		// if there is still any error, the queue can do nothing instead of losing the items
-		if err := q.baseQueue.PushItem(*ctxShutdown, q.marshal(item)); err != nil {
+		if err := q.baseQueue.PushItem(ctxShutdown, q.marshal(item)); err != nil {
 			log.Error("Failed to requeue item for queue %q when shutting down: %v", q.GetName(), err)
 		}
 	}
@ -246,6 +250,8 @@ var skipFlushChan = make(chan flushType) // an empty flush chan, used to skip re

 // doRun is the main loop of the queue. All related "doXxx" functions are executed in its context.
 func (q *WorkerPoolQueue[T]) doRun() {
+	pprof.SetGoroutineLabels(q.ctxRun)
+
 	log.Debug("Queue %q starts running", q.GetName())
 	defer log.Debug("Queue %q stops running", q.GetName())

@ -271,8 +277,8 @@ func (q *WorkerPoolQueue[T]) doRun() {
 			}
 		}

-		ctxShutdownPtr := q.ctxShutdown.Load()
-		if ctxShutdownPtr != nil {
+		shutdownTimeout := time.Duration(q.shutdownTimeout.Load())
+		if shutdownTimeout != 0 {
 			// if there is a shutdown context, try to push the items back to the base queue
 			q.basePushForShutdown(unhandled...)
 			workerDone := make(chan struct{})
@ -280,7 +286,7 @@ func (q *WorkerPoolQueue[T]) doRun() {
 			go func() { wg.wg.Wait(); close(workerDone) }()
 			select {
 			case <-workerDone:
-			case <-(*ctxShutdownPtr).Done():
+			case <-time.After(shutdownTimeout):
 				log.Error("Queue %q is shutting down, but workers are still running after timeout", q.GetName())
 			}
 		} else {