Cron job to cleanup hook_task table (#13080)

Close **Prune hook_task Table (#10741)**

Added a cron job to delete webhook deliveries in the hook_task table. It can be turned on/off and the schedule controlled globally via app.ini. The data can be deleted by either the age of the delivery which is the default or by deleting the all but the most recent deliveries _per webhook_.

Note: I had previously submitted pr #11416  but I closed it when I realized that I had deleted per repository instead of per webhook. Also, I decided allowing the settings to be overridden via the ui was overkill. Also this version allows the deletion by age which is probably what most people would want.
This commit is contained in:
Brad Albright 2021-01-26 15:02:42 -06:00 committed by GitHub
parent 0f726caf97
commit a598877fdf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 251 additions and 0 deletions

View file

@ -6,6 +6,7 @@
package models
import (
"context"
"encoding/json"
"fmt"
"strings"
@ -39,6 +40,26 @@ func ToHookContentType(name string) HookContentType {
return hookContentTypes[name]
}
// HookTaskCleanupType is the type of cleanup to perform on hook_task
type HookTaskCleanupType int
const (
// OlderThan hook_task rows will be cleaned up by the age of the row
OlderThan HookTaskCleanupType = iota
// PerWebhook hook_task rows will be cleaned up by leaving the most recent deliveries for each webhook
PerWebhook
)
var hookTaskCleanupTypes = map[string]HookTaskCleanupType{
"OlderThan": OlderThan,
"PerWebhook": PerWebhook,
}
// ToHookTaskCleanupType returns HookTaskCleanupType by given name.
func ToHookTaskCleanupType(name string) HookTaskCleanupType {
return hookTaskCleanupTypes[name]
}
// Name returns the name of a given web hook's content type
func (t HookContentType) Name() string {
switch t {
@ -738,3 +759,69 @@ func FindRepoUndeliveredHookTasks(repoID int64) ([]*HookTask, error) {
}
return tasks, nil
}
// CleanupHookTaskTable deletes rows from hook_task as needed.
func CleanupHookTaskTable(ctx context.Context, cleanupType HookTaskCleanupType, olderThan time.Duration, numberToKeep int) error {
log.Trace("Doing: CleanupHookTaskTable")
if cleanupType == OlderThan {
deleteOlderThan := time.Now().Add(-olderThan).UnixNano()
deletes, err := x.
Where("is_delivered = ? and delivered < ?", true, deleteOlderThan).
Delete(new(HookTask))
if err != nil {
return err
}
log.Trace("Deleted %d rows from hook_task", deletes)
} else if cleanupType == PerWebhook {
hookIDs := make([]int64, 0, 10)
err := x.Table("webhook").
Where("id > 0").
Cols("id").
Find(&hookIDs)
if err != nil {
return err
}
for _, hookID := range hookIDs {
select {
case <-ctx.Done():
return ErrCancelledf("Before deleting hook_task records for hook id %d", hookID)
default:
}
if err = deleteDeliveredHookTasksByWebhook(hookID, numberToKeep); err != nil {
return err
}
}
}
log.Trace("Finished: CleanupHookTaskTable")
return nil
}
func deleteDeliveredHookTasksByWebhook(hookID int64, numberDeliveriesToKeep int) error {
log.Trace("Deleting hook_task rows for webhook %d, keeping the most recent %d deliveries", hookID, numberDeliveriesToKeep)
var deliveryDates = make([]int64, 0, 10)
err := x.Table("hook_task").
Where("hook_task.hook_id = ? AND hook_task.is_delivered = ? AND hook_task.delivered is not null", hookID, true).
Cols("hook_task.delivered").
Join("INNER", "webhook", "hook_task.hook_id = webhook.id").
OrderBy("hook_task.delivered desc").
Limit(1, int(numberDeliveriesToKeep)).
Find(&deliveryDates)
if err != nil {
return err
}
if len(deliveryDates) > 0 {
deletes, err := x.
Where("hook_id = ? and is_delivered = ? and delivered <= ?", hookID, true, deliveryDates[0]).
Delete(new(HookTask))
if err != nil {
return err
}
log.Trace("Deleted %d hook_task rows for webhook %d", deletes, hookID)
} else {
log.Trace("No hook_task rows to delete for webhook %d", hookID)
}
return nil
}

View file

@ -5,8 +5,10 @@
package models
import (
"context"
"encoding/json"
"testing"
"time"
api "code.gitea.io/gitea/modules/structs"
@ -223,3 +225,115 @@ func TestUpdateHookTask(t *testing.T) {
assert.NoError(t, UpdateHookTask(hook))
AssertExistsAndLoadBean(t, hook)
}
func TestCleanupHookTaskTable_PerWebhook_DeletesDelivered(t *testing.T) {
assert.NoError(t, PrepareTestDatabase())
hookTask := &HookTask{
RepoID: 3,
HookID: 3,
Typ: GITEA,
URL: "http://www.example.com/unit_test",
Payloader: &api.PushPayload{},
IsDelivered: true,
Delivered: time.Now().UnixNano(),
}
AssertNotExistsBean(t, hookTask)
assert.NoError(t, CreateHookTask(hookTask))
AssertExistsAndLoadBean(t, hookTask)
assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 0))
AssertNotExistsBean(t, hookTask)
}
func TestCleanupHookTaskTable_PerWebhook_LeavesUndelivered(t *testing.T) {
assert.NoError(t, PrepareTestDatabase())
hookTask := &HookTask{
RepoID: 2,
HookID: 4,
Typ: GITEA,
URL: "http://www.example.com/unit_test",
Payloader: &api.PushPayload{},
IsDelivered: false,
}
AssertNotExistsBean(t, hookTask)
assert.NoError(t, CreateHookTask(hookTask))
AssertExistsAndLoadBean(t, hookTask)
assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 0))
AssertExistsAndLoadBean(t, hookTask)
}
func TestCleanupHookTaskTable_PerWebhook_LeavesMostRecentTask(t *testing.T) {
assert.NoError(t, PrepareTestDatabase())
hookTask := &HookTask{
RepoID: 2,
HookID: 4,
Typ: GITEA,
URL: "http://www.example.com/unit_test",
Payloader: &api.PushPayload{},
IsDelivered: true,
Delivered: time.Now().UnixNano(),
}
AssertNotExistsBean(t, hookTask)
assert.NoError(t, CreateHookTask(hookTask))
AssertExistsAndLoadBean(t, hookTask)
assert.NoError(t, CleanupHookTaskTable(context.Background(), PerWebhook, 168*time.Hour, 1))
AssertExistsAndLoadBean(t, hookTask)
}
func TestCleanupHookTaskTable_OlderThan_DeletesDelivered(t *testing.T) {
assert.NoError(t, PrepareTestDatabase())
hookTask := &HookTask{
RepoID: 3,
HookID: 3,
Typ: GITEA,
URL: "http://www.example.com/unit_test",
Payloader: &api.PushPayload{},
IsDelivered: true,
Delivered: time.Now().AddDate(0, 0, -8).UnixNano(),
}
AssertNotExistsBean(t, hookTask)
assert.NoError(t, CreateHookTask(hookTask))
AssertExistsAndLoadBean(t, hookTask)
assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
AssertNotExistsBean(t, hookTask)
}
func TestCleanupHookTaskTable_OlderThan_LeavesUndelivered(t *testing.T) {
assert.NoError(t, PrepareTestDatabase())
hookTask := &HookTask{
RepoID: 2,
HookID: 4,
Typ: GITEA,
URL: "http://www.example.com/unit_test",
Payloader: &api.PushPayload{},
IsDelivered: false,
}
AssertNotExistsBean(t, hookTask)
assert.NoError(t, CreateHookTask(hookTask))
AssertExistsAndLoadBean(t, hookTask)
assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
AssertExistsAndLoadBean(t, hookTask)
}
func TestCleanupHookTaskTable_OlderThan_LeavesTaskEarlierThanAgeToDelete(t *testing.T) {
assert.NoError(t, PrepareTestDatabase())
hookTask := &HookTask{
RepoID: 2,
HookID: 4,
Typ: GITEA,
URL: "http://www.example.com/unit_test",
Payloader: &api.PushPayload{},
IsDelivered: true,
Delivered: time.Now().AddDate(0, 0, -6).UnixNano(),
}
AssertNotExistsBean(t, hookTask)
assert.NoError(t, CreateHookTask(hookTask))
AssertExistsAndLoadBean(t, hookTask)
assert.NoError(t, CleanupHookTaskTable(context.Background(), OlderThan, 168*time.Hour, 0))
AssertExistsAndLoadBean(t, hookTask)
}