Change language statistics to save size instead of percentage (#11681)
* Change language statistics to save size instead of percentage in database Co-Authored-By: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com> * Do not exclude if only language * Fix edge cases with special langauges Co-authored-by: Cirno the Strongest <1447794+CirnoT@users.noreply.github.com>
This commit is contained in:
parent
4395c607ed
commit
ea4c139cd2
5 changed files with 150 additions and 38 deletions
|
@ -212,6 +212,8 @@ var migrations = []Migration{
|
||||||
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
|
NewMigration("Add ResolveDoerID to Comment table", addResolveDoerIDCommentColumn),
|
||||||
// v139 -> v140
|
// v139 -> v140
|
||||||
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
|
NewMigration("prepend refs/heads/ to issue refs", prependRefsHeadsToIssueRefs),
|
||||||
|
// v140 -> v141
|
||||||
|
NewMigration("Save detected language file size to database instead of percent", fixLanguageStatsToSaveSize),
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetCurrentDBVersion returns the current db version
|
// GetCurrentDBVersion returns the current db version
|
||||||
|
|
56
models/migrations/v140.go
Normal file
56
models/migrations/v140.go
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||||
|
// Use of this source code is governed by a MIT-style
|
||||||
|
// license that can be found in the LICENSE file.
|
||||||
|
|
||||||
|
package migrations
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"code.gitea.io/gitea/modules/setting"
|
||||||
|
|
||||||
|
"xorm.io/xorm"
|
||||||
|
)
|
||||||
|
|
||||||
|
func fixLanguageStatsToSaveSize(x *xorm.Engine) error {
|
||||||
|
// LanguageStat see models/repo_language_stats.go
|
||||||
|
type LanguageStat struct {
|
||||||
|
Size int64 `xorm:"NOT NULL DEFAULT 0"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// RepoIndexerType specifies the repository indexer type
|
||||||
|
type RepoIndexerType int
|
||||||
|
|
||||||
|
const (
|
||||||
|
// RepoIndexerTypeCode code indexer
|
||||||
|
RepoIndexerTypeCode RepoIndexerType = iota // 0
|
||||||
|
// RepoIndexerTypeStats repository stats indexer
|
||||||
|
RepoIndexerTypeStats // 1
|
||||||
|
)
|
||||||
|
|
||||||
|
// RepoIndexerStatus see models/repo_indexer.go
|
||||||
|
type RepoIndexerStatus struct {
|
||||||
|
IndexerType RepoIndexerType `xorm:"INDEX(s) NOT NULL DEFAULT 0"`
|
||||||
|
}
|
||||||
|
|
||||||
|
if err := x.Sync2(new(LanguageStat)); err != nil {
|
||||||
|
return fmt.Errorf("Sync2: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
x.Delete(&RepoIndexerStatus{IndexerType: RepoIndexerTypeStats})
|
||||||
|
|
||||||
|
// Delete language stat statuses
|
||||||
|
truncExpr := "TRUNCATE TABLE"
|
||||||
|
if setting.Database.UseSQLite3 {
|
||||||
|
truncExpr = "DELETE FROM"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete language stats
|
||||||
|
if _, err := x.Exec(fmt.Sprintf("%s language_stat", truncExpr)); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
sess := x.NewSession()
|
||||||
|
defer sess.Close()
|
||||||
|
return dropTableColumns(sess, "language_stat", "percentage")
|
||||||
|
}
|
|
@ -20,11 +20,28 @@ type LanguageStat struct {
|
||||||
CommitID string
|
CommitID string
|
||||||
IsPrimary bool
|
IsPrimary bool
|
||||||
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
|
Language string `xorm:"VARCHAR(30) UNIQUE(s) INDEX NOT NULL"`
|
||||||
Percentage float32 `xorm:"NUMERIC(5,2) NOT NULL DEFAULT 0"`
|
Percentage float32 `xorm:"-"`
|
||||||
|
Size int64 `xorm:"NOT NULL DEFAULT 0"`
|
||||||
Color string `xorm:"-"`
|
Color string `xorm:"-"`
|
||||||
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
|
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// specialLanguages defines list of languages that are excluded from the calculation
|
||||||
|
// unless they are the only language present in repository. Only languages which under
|
||||||
|
// normal circumstances are not considered to be code should be listed here.
|
||||||
|
var specialLanguages = map[string]struct{}{
|
||||||
|
"XML": {},
|
||||||
|
"JSON": {},
|
||||||
|
"TOML": {},
|
||||||
|
"YAML": {},
|
||||||
|
"INI": {},
|
||||||
|
"SQL": {},
|
||||||
|
"SVG": {},
|
||||||
|
"Text": {},
|
||||||
|
"Markdown": {},
|
||||||
|
"other": {},
|
||||||
|
}
|
||||||
|
|
||||||
// LanguageStatList defines a list of language statistics
|
// LanguageStatList defines a list of language statistics
|
||||||
type LanguageStatList []*LanguageStat
|
type LanguageStatList []*LanguageStat
|
||||||
|
|
||||||
|
@ -34,12 +51,53 @@ func (stats LanguageStatList) loadAttributes() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
|
||||||
|
langPerc := make(map[string]float32)
|
||||||
|
var otherPerc float32 = 100
|
||||||
|
var total int64
|
||||||
|
// Check that repository has at least one non-special language
|
||||||
|
var skipSpecial bool
|
||||||
|
for _, stat := range stats {
|
||||||
|
if _, ok := specialLanguages[stat.Language]; !ok {
|
||||||
|
skipSpecial = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, stat := range stats {
|
||||||
|
// Exclude specific languages from percentage calculation
|
||||||
|
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
total += stat.Size
|
||||||
|
}
|
||||||
|
if total > 0 {
|
||||||
|
for _, stat := range stats {
|
||||||
|
// Exclude specific languages from percentage calculation
|
||||||
|
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
|
||||||
|
if perc <= 0.1 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
otherPerc -= perc
|
||||||
|
langPerc[stat.Language] = perc
|
||||||
|
}
|
||||||
|
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
|
||||||
|
} else {
|
||||||
|
otherPerc = 100
|
||||||
|
}
|
||||||
|
if otherPerc > 0 {
|
||||||
|
langPerc["other"] = otherPerc
|
||||||
|
}
|
||||||
|
return langPerc
|
||||||
|
}
|
||||||
|
|
||||||
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
|
func (repo *Repository) getLanguageStats(e Engine) (LanguageStatList, error) {
|
||||||
stats := make(LanguageStatList, 0, 6)
|
stats := make(LanguageStatList, 0, 6)
|
||||||
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`percentage`").Find(&stats); err != nil {
|
if err := e.Where("`repo_id` = ?", repo.ID).Desc("`size`").Find(&stats); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
stats.loadAttributes()
|
|
||||||
return stats, nil
|
return stats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -54,13 +112,18 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
perc := stats.getLanguagePercentages()
|
||||||
topstats := make(LanguageStatList, 0, limit)
|
topstats := make(LanguageStatList, 0, limit)
|
||||||
var other float32
|
var other float32
|
||||||
for i := range stats {
|
for i := range stats {
|
||||||
if stats[i].Language == "other" || len(topstats) >= limit {
|
if _, ok := perc[stats[i].Language]; !ok {
|
||||||
other += stats[i].Percentage
|
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if stats[i].Language == "other" || len(topstats) >= limit {
|
||||||
|
other += perc[stats[i].Language]
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
stats[i].Percentage = perc[stats[i].Language]
|
||||||
topstats = append(topstats, stats[i])
|
topstats = append(topstats, stats[i])
|
||||||
}
|
}
|
||||||
if other > 0 {
|
if other > 0 {
|
||||||
|
@ -71,11 +134,12 @@ func (repo *Repository) GetTopLanguageStats(limit int) (LanguageStatList, error)
|
||||||
Percentage: float32(math.Round(float64(other)*10) / 10),
|
Percentage: float32(math.Round(float64(other)*10) / 10),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
topstats.loadAttributes()
|
||||||
return topstats, nil
|
return topstats, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// UpdateLanguageStats updates the language statistics for repository
|
// UpdateLanguageStats updates the language statistics for repository
|
||||||
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]float32) error {
|
func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]int64) error {
|
||||||
sess := x.NewSession()
|
sess := x.NewSession()
|
||||||
if err := sess.Begin(); err != nil {
|
if err := sess.Begin(); err != nil {
|
||||||
return err
|
return err
|
||||||
|
@ -87,15 +151,15 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
var topLang string
|
var topLang string
|
||||||
var p float32
|
var s int64
|
||||||
for lang, perc := range stats {
|
for lang, size := range stats {
|
||||||
if perc > p {
|
if size > s {
|
||||||
p = perc
|
s = size
|
||||||
topLang = strings.ToLower(lang)
|
topLang = strings.ToLower(lang)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for lang, perc := range stats {
|
for lang, size := range stats {
|
||||||
upd := false
|
upd := false
|
||||||
llang := strings.ToLower(lang)
|
llang := strings.ToLower(lang)
|
||||||
for _, s := range oldstats {
|
for _, s := range oldstats {
|
||||||
|
@ -103,8 +167,8 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
|
||||||
if strings.ToLower(s.Language) == llang {
|
if strings.ToLower(s.Language) == llang {
|
||||||
s.CommitID = commitID
|
s.CommitID = commitID
|
||||||
s.IsPrimary = llang == topLang
|
s.IsPrimary = llang == topLang
|
||||||
s.Percentage = perc
|
s.Size = size
|
||||||
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`percentage`", "`is_primary`").Update(s); err != nil {
|
if _, err := sess.ID(s.ID).Cols("`commit_id`", "`size`", "`is_primary`").Update(s); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
upd = true
|
upd = true
|
||||||
|
@ -118,7 +182,7 @@ func (repo *Repository) UpdateLanguageStats(commitID string, stats map[string]fl
|
||||||
CommitID: commitID,
|
CommitID: commitID,
|
||||||
IsPrimary: llang == topLang,
|
IsPrimary: llang == topLang,
|
||||||
Language: lang,
|
Language: lang,
|
||||||
Percentage: perc,
|
Size: size,
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
@ -153,7 +217,7 @@ func CopyLanguageStat(originalRepo, destRepo *Repository) error {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
RepoLang := make(LanguageStatList, 0, 6)
|
RepoLang := make(LanguageStatList, 0, 6)
|
||||||
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`percentage`").Find(&RepoLang); err != nil {
|
if err := sess.Where("`repo_id` = ?", originalRepo.ID).Desc("`size`").Find(&RepoLang); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if len(RepoLang) > 0 {
|
if len(RepoLang) > 0 {
|
||||||
|
|
|
@ -8,7 +8,6 @@ import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"math"
|
|
||||||
|
|
||||||
"code.gitea.io/gitea/modules/analyze"
|
"code.gitea.io/gitea/modules/analyze"
|
||||||
|
|
||||||
|
@ -21,7 +20,7 @@ import (
|
||||||
const fileSizeLimit int64 = 16 * 1024 * 1024
|
const fileSizeLimit int64 = 16 * 1024 * 1024
|
||||||
|
|
||||||
// GetLanguageStats calculates language stats for git repository at specified commit
|
// GetLanguageStats calculates language stats for git repository at specified commit
|
||||||
func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, error) {
|
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
|
||||||
r, err := git.PlainOpen(repo.Path)
|
r, err := git.PlainOpen(repo.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -43,7 +42,6 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
|
||||||
}
|
}
|
||||||
|
|
||||||
sizes := make(map[string]int64)
|
sizes := make(map[string]int64)
|
||||||
var total int64
|
|
||||||
err = tree.Files().ForEach(func(f *object.File) error {
|
err = tree.Files().ForEach(func(f *object.File) error {
|
||||||
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
|
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
|
||||||
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
|
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
|
||||||
|
@ -60,11 +58,10 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
|
||||||
|
|
||||||
language := analyze.GetCodeLanguage(f.Name, content)
|
language := analyze.GetCodeLanguage(f.Name, content)
|
||||||
if language == enry.OtherLanguage || language == "" {
|
if language == enry.OtherLanguage || language == "" {
|
||||||
return nil
|
language = "other"
|
||||||
}
|
}
|
||||||
|
|
||||||
sizes[language] += f.Size
|
sizes[language] += f.Size
|
||||||
total += f.Size
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
})
|
})
|
||||||
|
@ -72,21 +69,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
stats := make(map[string]float32)
|
if len(sizes) == 0 {
|
||||||
var otherPerc float32 = 100
|
sizes["other"] = 0
|
||||||
for language, size := range sizes {
|
|
||||||
perc := float32(math.Round(float64(size)/float64(total)*1000) / 10)
|
|
||||||
if perc <= 0.1 {
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
otherPerc -= perc
|
|
||||||
stats[language] = perc
|
return sizes, nil
|
||||||
}
|
|
||||||
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
|
|
||||||
if otherPerc > 0 {
|
|
||||||
stats["other"] = otherPerc
|
|
||||||
}
|
|
||||||
return stats, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func readFile(f *object.File, limit int64) ([]byte, error) {
|
func readFile(f *object.File, limit int64) ([]byte, error) {
|
||||||
|
|
|
@ -34,6 +34,9 @@ func TestRepoStatsIndex(t *testing.T) {
|
||||||
|
|
||||||
repo, err := models.GetRepositoryByID(1)
|
repo, err := models.GetRepositoryByID(1)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
status, err := repo.GetIndexerStatus(models.RepoIndexerTypeStats)
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
|
||||||
langs, err := repo.GetTopLanguageStats(5)
|
langs, err := repo.GetTopLanguageStats(5)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
assert.Len(t, langs, 1)
|
assert.Len(t, langs, 1)
|
||||||
|
|
Loading…
Reference in a new issue