From 1cdc840f8f4db57341c32200bb959e77edef482f Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 10 Oct 2022 19:47:00 +0000 Subject: [PATCH 1/4] Keep languages from attributes. --- modules/git/repo_language_stats_gogit.go | 8 ++++++-- modules/git/repo_language_stats_nogogit.go | 7 ++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 34b0dc45d3749..50c4e0f3929a3 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -45,6 +45,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err defer deferable() sizes := make(map[string]int64) + keepLanguage := make(map[string]struct{}) err = tree.Files().ForEach(func(f *object.File) error { if f.Size == 0 { return nil @@ -76,7 +77,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size - + keepLanguage[language] = struct{}{} return nil } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -91,6 +92,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size + keepLanguage[language] = struct{}{} return nil } } @@ -137,7 +139,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err for language := range sizes { langtype := enry.GetLanguageType(language) if langtype != enry.Programming && langtype != enry.Markup { - delete(sizes, language) + if _, keep := keepLanguage[language]; !keep { + delete(sizes, language) + } } } } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 7388ef403b923..22cf4571957cc 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -68,6 +68,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err contentBuf := bytes.Buffer{} var content []byte sizes := make(map[string]int64) + keepLanguage := make(map[string]struct{}) for _, f := range entries { select { case <-repo.Ctx.Done(): @@ -108,6 +109,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size() + keepLanguage[language] = struct{}{} continue } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -122,6 +124,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size() + keepLanguage[language] = struct{}{} continue } } @@ -189,7 +192,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err for language := range sizes { langtype := enry.GetLanguageType(language) if langtype != enry.Programming && langtype != enry.Markup { - delete(sizes, language) + if _, keep := keepLanguage[language]; !keep { + delete(sizes, language) + } } } } From aba04be879371843757bba1b414e891ecb831515 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Tue, 11 Oct 2022 10:36:29 +0000 Subject: [PATCH 2/4] Add suggestion. --- modules/git/repo_language_stats_gogit.go | 8 ++++---- modules/git/repo_language_stats_nogogit.go | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 50c4e0f3929a3..1f7f0a45c5e2a 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -45,7 +45,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err defer deferable() sizes := make(map[string]int64) - keepLanguage := make(map[string]struct{}) + explicitIncludedLanguage := make(map[string]struct{}) err = tree.Files().ForEach(func(f *object.File) error { if f.Size == 0 { return nil @@ -77,7 +77,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size - keepLanguage[language] = struct{}{} + explicitIncludedLanguage[language] = struct{}{} return nil } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -92,7 +92,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size - keepLanguage[language] = struct{}{} + explicitIncludedLanguage[language] = struct{}{} return nil } } @@ -139,7 +139,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err for language := range sizes { langtype := enry.GetLanguageType(language) if langtype != enry.Programming && langtype != enry.Markup { - if _, keep := keepLanguage[language]; !keep { + if _, keep := explicitIncludedLanguage[language]; !keep { delete(sizes, language) } } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 22cf4571957cc..bbf8e8d0a724e 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -68,7 +68,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err contentBuf := bytes.Buffer{} var content []byte sizes := make(map[string]int64) - keepLanguage := make(map[string]struct{}) + explicitIncludedLanguage := make(map[string]struct{}) for _, f := range entries { select { case <-repo.Ctx.Done(): @@ -109,7 +109,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size() - keepLanguage[language] = struct{}{} + explicitIncludedLanguage[language] = struct{}{} continue } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -124,7 +124,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size() - keepLanguage[language] = struct{}{} + explicitIncludedLanguage[language] = struct{}{} continue } } @@ -192,7 +192,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err for language := range sizes { langtype := enry.GetLanguageType(language) if langtype != enry.Programming && langtype != enry.Markup { - if _, keep := keepLanguage[language]; !keep { + if _, keep := explicitIncludedLanguage[language]; !keep { delete(sizes, language) } } From 23bdc78157594e425f92b820bf6970ec6c94547c Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Wed, 12 Oct 2022 06:26:58 +0000 Subject: [PATCH 3/4] Use set. --- modules/git/repo_language_stats_gogit.go | 15 ++++++++------- modules/git/repo_language_stats_nogogit.go | 15 ++++++++------- 2 files changed, 16 insertions(+), 14 deletions(-) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index 1f7f0a45c5e2a..cf2901266d3e7 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -12,6 +12,7 @@ import ( "strings" "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/container" "github.com/go-enry/go-enry/v2" "github.com/go-git/go-git/v5" @@ -45,7 +46,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err defer deferable() sizes := make(map[string]int64) - explicitIncludedLanguage := make(map[string]struct{}) + // Languages explicit specifiyed with linguist-language or gitlab-language are always displayed + // even if they are not of type programming or markup. + explicitIncludedLanguage := make(container.Set[string]) err = tree.Files().ForEach(func(f *object.File) error { if f.Size == 0 { return nil @@ -77,7 +80,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size - explicitIncludedLanguage[language] = struct{}{} + explicitIncludedLanguage.Add(language) return nil } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -92,7 +95,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size - explicitIncludedLanguage[language] = struct{}{} + explicitIncludedLanguage.Add(language) return nil } } @@ -138,10 +141,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if len(sizes) > 1 { for language := range sizes { langtype := enry.GetLanguageType(language) - if langtype != enry.Programming && langtype != enry.Markup { - if _, keep := explicitIncludedLanguage[language]; !keep { - delete(sizes, language) - } + if langtype != enry.Programming && langtype != enry.Markup && !explicitIncludedLanguage.Contains(language) { + delete(sizes, language) } } } diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index bbf8e8d0a724e..8d982f487c996 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -14,6 +14,7 @@ import ( "strings" "code.gitea.io/gitea/modules/analyze" + "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/log" "github.com/go-enry/go-enry/v2" @@ -68,7 +69,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err contentBuf := bytes.Buffer{} var content []byte sizes := make(map[string]int64) - explicitIncludedLanguage := make(map[string]struct{}) + // Languages explicit specifiyed with linguist-language or gitlab-language are always displayed + // even if they are not of type programming or markup. + explicitIncludedLanguage := make(container.Set[string]) for _, f := range entries { select { case <-repo.Ctx.Done(): @@ -109,7 +112,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size() - explicitIncludedLanguage[language] = struct{}{} + explicitIncludedLanguage.Add(language) continue } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -124,7 +127,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err } sizes[language] += f.Size() - explicitIncludedLanguage[language] = struct{}{} + explicitIncludedLanguage.Add(language) continue } } @@ -191,10 +194,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err if len(sizes) > 1 { for language := range sizes { langtype := enry.GetLanguageType(language) - if langtype != enry.Programming && langtype != enry.Markup { - if _, keep := explicitIncludedLanguage[language]; !keep { - delete(sizes, language) - } + if langtype != enry.Programming && langtype != enry.Markup && !explicitIncludedLanguage.Contains(language) { + delete(sizes, language) } } } From 3c159b5d214966ce2cdb4afb3212736f82b15da4 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Sun, 16 Oct 2022 10:26:38 +0000 Subject: [PATCH 4/4] Add suggestions. --- modules/git/repo_language_stats_gogit.go | 40 +++++++++++++-------- modules/git/repo_language_stats_nogogit.go | 41 ++++++++++++++-------- 2 files changed, 51 insertions(+), 30 deletions(-) diff --git a/modules/git/repo_language_stats_gogit.go b/modules/git/repo_language_stats_gogit.go index cf2901266d3e7..503e774e7a33e 100644 --- a/modules/git/repo_language_stats_gogit.go +++ b/modules/git/repo_language_stats_gogit.go @@ -12,7 +12,6 @@ import ( "strings" "code.gitea.io/gitea/modules/analyze" - "code.gitea.io/gitea/modules/container" "github.com/go-enry/go-enry/v2" "github.com/go-git/go-git/v5" @@ -45,10 +44,15 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err checker, deferable := repo.CheckAttributeReader(commitID) defer deferable() + // sizes contains the current calculated size of all files by language sizes := make(map[string]int64) - // Languages explicit specifiyed with linguist-language or gitlab-language are always displayed - // even if they are not of type programming or markup. - explicitIncludedLanguage := make(container.Set[string]) + // by default we will only count the sizes of programming languages or markup languages + // unless they are explicitly set using linguist-language + includedLanguage := map[string]bool{} + // or if there's only one language in the repository + firstExcludedLanguage := "" + firstExcludedLanguageSize := int64(0) + err = tree.Files().ForEach(func(f *object.File) error { if f.Size == 0 { return nil @@ -79,8 +83,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } + // this language will always be added to the size sizes[language] += f.Size - explicitIncludedLanguage.Add(language) return nil } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -94,8 +98,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } + // this language will always be added to the size sizes[language] += f.Size - explicitIncludedLanguage.Add(language) return nil } } @@ -129,7 +133,18 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } - sizes[language] += f.Size + included, checked := includedLanguage[language] + if !checked { + langtype := enry.GetLanguageType(language) + included = langtype == enry.Programming || langtype == enry.Markup + includedLanguage[language] = included + } + if included { + sizes[language] += f.Size + } else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) { + firstExcludedLanguage = language + firstExcludedLanguageSize += f.Size + } return nil }) @@ -137,14 +152,9 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err return nil, err } - // filter special languages unless they are the only language - if len(sizes) > 1 { - for language := range sizes { - langtype := enry.GetLanguageType(language) - if langtype != enry.Programming && langtype != enry.Markup && !explicitIncludedLanguage.Contains(language) { - delete(sizes, language) - } - } + // If there are no included languages add the first excluded language + if len(sizes) == 0 && firstExcludedLanguage != "" { + sizes[firstExcludedLanguage] = firstExcludedLanguageSize } return sizes, nil diff --git a/modules/git/repo_language_stats_nogogit.go b/modules/git/repo_language_stats_nogogit.go index 8d982f487c996..baeb1149098f1 100644 --- a/modules/git/repo_language_stats_nogogit.go +++ b/modules/git/repo_language_stats_nogogit.go @@ -14,7 +14,6 @@ import ( "strings" "code.gitea.io/gitea/modules/analyze" - "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/log" "github.com/go-enry/go-enry/v2" @@ -68,10 +67,16 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err contentBuf := bytes.Buffer{} var content []byte + + // sizes contains the current calculated size of all files by language sizes := make(map[string]int64) - // Languages explicit specifiyed with linguist-language or gitlab-language are always displayed - // even if they are not of type programming or markup. - explicitIncludedLanguage := make(container.Set[string]) + // by default we will only count the sizes of programming languages or markup languages + // unless they are explicitly set using linguist-language + includedLanguage := map[string]bool{} + // or if there's only one language in the repository + firstExcludedLanguage := "" + firstExcludedLanguageSize := int64(0) + for _, f := range entries { select { case <-repo.Ctx.Done(): @@ -111,8 +116,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } + // this language will always be added to the size sizes[language] += f.Size() - explicitIncludedLanguage.Add(language) continue } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" { // strip off a ? if present @@ -126,8 +131,8 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } + // this language will always be added to the size sizes[language] += f.Size() - explicitIncludedLanguage.Add(language) continue } } @@ -186,18 +191,24 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err language = group } - sizes[language] += f.Size() + included, checked := includedLanguage[language] + if !checked { + langtype := enry.GetLanguageType(language) + included = langtype == enry.Programming || langtype == enry.Markup + includedLanguage[language] = included + } + if included { + sizes[language] += f.Size() + } else if len(sizes) == 0 && (firstExcludedLanguage == "" || firstExcludedLanguage == language) { + firstExcludedLanguage = language + firstExcludedLanguageSize += f.Size() + } continue } - // filter special languages unless they are the only language - if len(sizes) > 1 { - for language := range sizes { - langtype := enry.GetLanguageType(language) - if langtype != enry.Programming && langtype != enry.Markup && !explicitIncludedLanguage.Contains(language) { - delete(sizes, language) - } - } + // If there are no included languages add the first excluded language + if len(sizes) == 0 && firstExcludedLanguage != "" { + sizes[firstExcludedLanguage] = firstExcludedLanguageSize } return sizes, nil