Restart zero worker if there is still work to do (#18658)

* Restart zero worker if there is still work to do It is possible for the zero worker to timeout before all the work is finished. This may mean that work may take a long time to complete because a worker will only be induced on repushing. Also ensure that requested count is reset after pulls and push mirror sync requests and add some more trace logging to the queue push. Fix #18607 Signed-off-by: Andrew Thornton <art27@cantab.net>
2022-02-08 14:02:32 +00:00 · 2022-02-08 14:02:32 +00:00 · df44017328
commit df44017328
parent 4d939845d2
2 changed files with 32 additions and 10 deletions
--- a/modules/queue/workerpool.go
+++ b/modules/queue/workerpool.go
@ -115,6 +115,9 @@ func (p *WorkerPool) hasNoWorkerScaling() bool {
 	return p.numberOfWorkers == 0 && (p.boostTimeout == 0 || p.boostWorkers == 0 || p.maxNumberOfWorkers == 0)
 }

+// zeroBoost will add a temporary boost worker for a no worker queue
+// p.lock must be locked at the start of this function BUT it will be unlocked by the end of this function
+// (This is because addWorkers has to be called whilst unlocked)
 func (p *WorkerPool) zeroBoost() {
 	ctx, cancel := context.WithTimeout(p.baseCtx, p.boostTimeout)
 	mq := GetManager().GetManagedQueue(p.qid)
@ -316,6 +319,17 @@ func (p *WorkerPool) addWorkers(ctx context.Context, cancel context.CancelFunc,
 				}
 				p.pause()
 			}
+			select {
+			case <-p.baseCtx.Done():
+				// this worker queue is shut-down don't reboost
+			default:
+				if p.numberOfWorkers == 0 && atomic.LoadInt64(&p.numInQueue) > 0 {
+					// OK there are no workers but... there's still work to be done -> Reboost
+					p.zeroBoost()
+					// p.lock will be unlocked by zeroBoost
+					return
+				}
+			}
 			p.lock.Unlock()
 		}()
 	}