Improve queue & process & stacktrace (#24636)

Although some features are mixed together in this PR, this PR is not
that large, and these features are all related.

Actually there are more than 70 lines are for a toy "test queue", so
this PR is quite simple.

Major features:

1. Allow site admin to clear a queue (remove all items in a queue)
* Because there is no transaction, the "unique queue" could be corrupted
in rare cases, that's unfixable.
* eg: the item is in the "set" but not in the "list", so the item would
never be able to be pushed into the queue.
* Now site admin could simply clear the queue, then everything becomes
correct, the lost items could be re-pushed into queue by future
operations.
3. Split the "admin/monitor" to separate pages
4. Allow to download diagnosis report
* In history, there were many users reporting that Gitea queue gets
stuck, or Gitea's CPU is 100%
    * With diagnosis report, maintainers could know what happens clearly

The diagnosis report sample:
[gitea-diagnosis-20230510-192913.zip](11441346/gitea-diagnosis-20230510-192913.zip)
, use "go tool pprof profile.dat" to view the report.


Screenshots:


![image](320659b4-2eda-4def-8dc0-5ea08d578063)


![image](c5c46fae-9dc0-44ca-8cd3-57beedc5035e)


![image](6168a811-42a1-4e64-a263-0617a6c8c4fe)

---------

Co-authored-by: Jason Song <i@wolfogre.com>
Co-authored-by: Giteabot <teabot@gitea.io>
This commit is contained in:
wxiaoguang 2023-05-11 15:45:47 +08:00 committed by GitHub
parent b3af7484bc
commit 58dfaf3a75
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 388 additions and 229 deletions

View file

@ -13,8 +13,6 @@ import (
activities_model "code.gitea.io/gitea/models/activities"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/updatechecker"
"code.gitea.io/gitea/modules/web"
@ -24,7 +22,8 @@ import (
const (
tplDashboard base.TplName = "admin/dashboard"
tplMonitor base.TplName = "admin/monitor"
tplCron base.TplName = "admin/cron"
tplQueue base.TplName = "admin/queue"
tplStacktrace base.TplName = "admin/stacktrace"
tplQueueManage base.TplName = "admin/queue_manage"
)
@ -142,47 +141,15 @@ func DashboardPost(ctx *context.Context) {
}
}
if form.From == "monitor" {
ctx.Redirect(setting.AppSubURL + "/admin/monitor")
ctx.Redirect(setting.AppSubURL + "/admin/monitor/cron")
} else {
ctx.Redirect(setting.AppSubURL + "/admin")
}
}
// Monitor show admin monitor page
func Monitor(ctx *context.Context) {
ctx.Data["Title"] = ctx.Tr("admin.monitor")
ctx.Data["PageIsAdminMonitor"] = true
ctx.Data["Processes"], ctx.Data["ProcessCount"] = process.GetManager().Processes(false, true)
func CronTasks(ctx *context.Context) {
ctx.Data["Title"] = ctx.Tr("admin.monitor.cron")
ctx.Data["PageIsAdminMonitorCron"] = true
ctx.Data["Entries"] = cron.ListTasks()
ctx.Data["Queues"] = queue.GetManager().ManagedQueues()
ctx.HTML(http.StatusOK, tplMonitor)
}
// GoroutineStacktrace show admin monitor goroutines page
func GoroutineStacktrace(ctx *context.Context) {
ctx.Data["Title"] = ctx.Tr("admin.monitor")
ctx.Data["PageIsAdminMonitor"] = true
processStacks, processCount, goroutineCount, err := process.GetManager().ProcessStacktraces(false, false)
if err != nil {
ctx.ServerError("GoroutineStacktrace", err)
return
}
ctx.Data["ProcessStacks"] = processStacks
ctx.Data["GoroutineCount"] = goroutineCount
ctx.Data["ProcessCount"] = processCount
ctx.HTML(http.StatusOK, tplStacktrace)
}
// MonitorCancel cancels a process
func MonitorCancel(ctx *context.Context) {
pid := ctx.Params("pid")
process.GetManager().Cancel(process.IDType(pid))
ctx.JSON(http.StatusOK, map[string]interface{}{
"redirect": setting.AppSubURL + "/admin/monitor",
})
ctx.HTML(http.StatusOK, tplCron)
}

View file

@ -0,0 +1,61 @@
// Copyright 2023 The Gitea Authors.
// SPDX-License-Identifier: MIT
package admin
import (
"archive/zip"
"fmt"
"runtime/pprof"
"time"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/httplib"
)
func MonitorDiagnosis(ctx *context.Context) {
seconds := ctx.FormInt64("seconds")
if seconds <= 5 {
seconds = 5
}
if seconds > 300 {
seconds = 300
}
httplib.ServeSetHeaders(ctx.Resp, &httplib.ServeHeaderOptions{
ContentType: "application/zip",
Disposition: "attachment",
Filename: fmt.Sprintf("gitea-diagnosis-%s.zip", time.Now().Format("20060102-150405")),
})
zipWriter := zip.NewWriter(ctx.Resp)
defer zipWriter.Close()
f, err := zipWriter.CreateHeader(&zip.FileHeader{Name: "goroutine-before.txt", Method: zip.Deflate, Modified: time.Now()})
if err != nil {
ctx.ServerError("Failed to create zip file", err)
return
}
_ = pprof.Lookup("goroutine").WriteTo(f, 1)
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "cpu-profile.dat", Method: zip.Deflate, Modified: time.Now()})
if err != nil {
ctx.ServerError("Failed to create zip file", err)
return
}
err = pprof.StartCPUProfile(f)
if err == nil {
time.Sleep(time.Duration(seconds) * time.Second)
pprof.StopCPUProfile()
} else {
_, _ = f.Write([]byte(err.Error()))
}
f, err = zipWriter.CreateHeader(&zip.FileHeader{Name: "goroutine-after.txt", Method: zip.Deflate, Modified: time.Now()})
if err != nil {
ctx.ServerError("Failed to create zip file", err)
return
}
_ = pprof.Lookup("goroutine").WriteTo(f, 1)
}

View file

@ -12,8 +12,18 @@ import (
"code.gitea.io/gitea/modules/setting"
)
// Queue shows details for a specific queue
func Queue(ctx *context.Context) {
func Queues(ctx *context.Context) {
if !setting.IsProd {
initTestQueueOnce()
}
ctx.Data["Title"] = ctx.Tr("admin.monitor.queue")
ctx.Data["PageIsAdminMonitorQueue"] = true
ctx.Data["Queues"] = queue.GetManager().ManagedQueues()
ctx.HTML(http.StatusOK, tplQueue)
}
// QueueManage shows details for a specific queue
func QueueManage(ctx *context.Context) {
qid := ctx.ParamsInt64("qid")
mq := queue.GetManager().GetManagedQueue(qid)
if mq == nil {
@ -57,3 +67,23 @@ func QueueSet(ctx *context.Context) {
ctx.Flash.Success(ctx.Tr("admin.monitor.queue.settings.changed"))
ctx.Redirect(setting.AppSubURL + "/admin/monitor/queue/" + strconv.FormatInt(qid, 10))
}
func QueueRemoveAllItems(ctx *context.Context) {
// Gitea's queue doesn't have transaction support
// So in rare cases, the queue could be corrupted/out-of-sync
// Site admin could remove all items from the queue to make it work again
qid := ctx.ParamsInt64("qid")
mq := queue.GetManager().GetManagedQueue(qid)
if mq == nil {
ctx.Status(http.StatusNotFound)
return
}
if err := mq.RemoveAllItems(ctx); err != nil {
ctx.ServerError("RemoveAllItems", err)
return
}
ctx.Flash.Success(ctx.Tr("admin.monitor.queue.settings.remove_all_items_done"))
ctx.Redirect(setting.AppSubURL + "/admin/monitor/queue/" + strconv.FormatInt(qid, 10))
}

View file

@ -0,0 +1,72 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package admin
import (
gocontext "context"
"sync"
"time"
"code.gitea.io/gitea/modules/graceful"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/queue"
"code.gitea.io/gitea/modules/setting"
)
var testQueueOnce sync.Once
// initTestQueueOnce initializes the test queue for dev mode
// the test queue will also be shown in the queue list
// developers could see the queue length / worker number / items number on the admin page and try to remove the items
func initTestQueueOnce() {
testQueueOnce.Do(func() {
qs := setting.QueueSettings{
Name: "test-queue",
Type: "channel",
Length: 20,
BatchLength: 2,
MaxWorkers: 3,
}
testQueue, err := queue.NewWorkerPoolQueueBySetting("test-queue", qs, func(t ...int64) (unhandled []int64) {
for range t {
select {
case <-graceful.GetManager().ShutdownContext().Done():
case <-time.After(5 * time.Second):
}
}
return nil
}, true)
if err != nil {
log.Error("unable to create test queue: %v", err)
return
}
queue.GetManager().AddManagedQueue(testQueue)
testQueue.SetWorkerMaxNumber(5)
go graceful.GetManager().RunWithShutdownFns(testQueue.Run)
go graceful.GetManager().RunWithShutdownContext(func(ctx gocontext.Context) {
cnt := int64(0)
adding := true
for {
select {
case <-ctx.Done():
case <-time.After(500 * time.Millisecond):
if adding {
if testQueue.GetQueueItemNumber() == qs.Length {
adding = false
}
} else {
if testQueue.GetQueueItemNumber() == 0 {
adding = true
}
}
if adding {
_ = testQueue.Push(cnt)
cnt++
}
}
}
})
})
}

View file

@ -0,0 +1,48 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package admin
import (
"net/http"
"runtime"
"code.gitea.io/gitea/modules/context"
"code.gitea.io/gitea/modules/process"
"code.gitea.io/gitea/modules/setting"
)
// Stacktrace show admin monitor goroutines page
func Stacktrace(ctx *context.Context) {
ctx.Data["Title"] = ctx.Tr("admin.monitor")
ctx.Data["PageIsAdminMonitorStacktrace"] = true
ctx.Data["GoroutineCount"] = runtime.NumGoroutine()
show := ctx.FormString("show")
ctx.Data["ShowGoroutineList"] = show
// by default, do not do anything which might cause server errors, to avoid unnecessary 500 pages.
// this page is the entrance of the chance to collect diagnosis report.
if show != "" {
showNoSystem := show == "process"
processStacks, processCount, _, err := process.GetManager().ProcessStacktraces(false, showNoSystem)
if err != nil {
ctx.ServerError("GoroutineStacktrace", err)
return
}
ctx.Data["ProcessStacks"] = processStacks
ctx.Data["ProcessCount"] = processCount
}
ctx.HTML(http.StatusOK, tplStacktrace)
}
// StacktraceCancel cancels a process
func StacktraceCancel(ctx *context.Context) {
pid := ctx.Params("pid")
process.GetManager().Cancel(process.IDType(pid))
ctx.JSON(http.StatusOK, map[string]interface{}{
"redirect": setting.AppSubURL + "/admin/monitor/stacktrace",
})
}

View file

@ -546,13 +546,16 @@ func registerRoutes(m *web.Route) {
})
m.Group("/monitor", func() {
m.Get("", admin.Monitor)
m.Get("/stacktrace", admin.GoroutineStacktrace)
m.Post("/cancel/{pid}", admin.MonitorCancel)
m.Get("/cron", admin.CronTasks)
m.Get("/stacktrace", admin.Stacktrace)
m.Post("/stacktrace/cancel/{pid}", admin.StacktraceCancel)
m.Get("/queue", admin.Queues)
m.Group("/queue/{qid}", func() {
m.Get("", admin.Queue)
m.Get("", admin.QueueManage)
m.Post("/set", admin.QueueSet)
m.Post("/remove-all-items", admin.QueueRemoveAllItems)
})
m.Get("/diagnosis", admin.MonitorDiagnosis)
})
m.Group("/users", func() {