Skip to content

Commit 3c4724d

Browse files
authored
Add .gitattribute assisted language detection to blame, diff and render (#17590)
Use check attribute code to check the assigned language of a file and send that in to chroma as a hint for the language of the file. Signed-off-by: Andrew Thornton <[email protected]>
1 parent 81a4fc7 commit 3c4724d

File tree

13 files changed

+221
-95
lines changed

13 files changed

+221
-95
lines changed

docs/content/doc/advanced/config-cheat-sheet.en-us.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -982,6 +982,14 @@ Multiple sanitisation rules can be defined by adding unique subsections, e.g. `[
982982
To apply a sanitisation rules only for a specify external renderer they must use the renderer name, e.g. `[markup.sanitizer.asciidoc.rule-1]`.
983983
If the rule is defined above the renderer ini section or the name does not match a renderer it is applied to every renderer.
984984

985+
## Highlight Mappings (`highlight.mapping`)
986+
987+
- `file_extension e.g. .toml`: **language e.g. ini**. File extension to language mapping overrides.
988+
989+
- Gitea will highlight files using the `linguist-language` or `gitlab-language` attribute from the `.gitattributes` file
990+
if available. If this is not set or the language is unavailable, the file extension will be looked up
991+
in this mapping or the filetype using heuristics.
992+
985993
## Time (`time`)
986994

987995
- `FORMAT`: Time format to display on UI. i.e. RFC1123 or 2006-01-02 15:04:05

modules/git/repo_attribute.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ type CheckAttributeOpts struct {
2222
AllAttributes bool
2323
Attributes []string
2424
Filenames []string
25+
IndexFile string
26+
WorkTree string
2527
}
2628

2729
// CheckAttribute return the Blame object of file
@@ -31,6 +33,19 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
3133
return nil, fmt.Errorf("git version missing: %v", err)
3234
}
3335

36+
env := []string{}
37+
38+
if len(opts.IndexFile) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
39+
env = append(env, "GIT_INDEX_FILE="+opts.IndexFile)
40+
}
41+
if len(opts.WorkTree) > 0 && CheckGitVersionAtLeast("1.7.8") == nil {
42+
env = append(env, "GIT_WORK_TREE="+opts.WorkTree)
43+
}
44+
45+
if len(env) > 0 {
46+
env = append(os.Environ(), env...)
47+
}
48+
3449
stdOut := new(bytes.Buffer)
3550
stdErr := new(bytes.Buffer)
3651

@@ -61,7 +76,7 @@ func (repo *Repository) CheckAttribute(opts CheckAttributeOpts) (map[string]map[
6176

6277
cmd := NewCommand(cmdArgs...)
6378

64-
if err := cmd.RunInDirPipeline(repo.Path, stdOut, stdErr); err != nil {
79+
if err := cmd.RunInDirTimeoutEnvPipeline(env, -1, repo.Path, stdOut, stdErr); err != nil {
6580
return nil, fmt.Errorf("failed to run check-attr: %v\n%s\n%s", err, stdOut.String(), stdErr.String())
6681
}
6782

modules/git/repo_index.go

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"bytes"
99
"context"
1010
"os"
11+
"path/filepath"
1112
"strings"
1213

1314
"code.gitea.io/gitea/modules/log"
@@ -45,22 +46,23 @@ func (repo *Repository) readTreeToIndex(id SHA1, indexFilename ...string) error
4546
}
4647

4748
// ReadTreeToTemporaryIndex reads a treeish to a temporary index file
48-
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename string, cancel context.CancelFunc, err error) {
49-
tmpIndex, err := os.CreateTemp("", "index")
49+
func (repo *Repository) ReadTreeToTemporaryIndex(treeish string) (filename, tmpDir string, cancel context.CancelFunc, err error) {
50+
tmpDir, err = os.MkdirTemp("", "index")
5051
if err != nil {
5152
return
5253
}
53-
filename = tmpIndex.Name()
54+
55+
filename = filepath.Join(tmpDir, ".tmp-index")
5456
cancel = func() {
55-
err := util.Remove(filename)
57+
err := util.RemoveAll(tmpDir)
5658
if err != nil {
5759
log.Error("failed to remove tmp index file: %v", err)
5860
}
5961
}
6062
err = repo.ReadTreeToIndex(treeish, filename)
6163
if err != nil {
6264
defer cancel()
63-
return "", func() {}, err
65+
return "", "", func() {}, err
6466
}
6567
return
6668
}

modules/git/repo_language_stats_gogit.go

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,10 @@ import (
1111
"bytes"
1212
"context"
1313
"io"
14-
"os"
14+
"strings"
1515

1616
"code.gitea.io/gitea/modules/analyze"
1717
"code.gitea.io/gitea/modules/log"
18-
"code.gitea.io/gitea/modules/util"
1918

2019
"github.com/go-enry/go-enry/v2"
2120
"github.com/go-git/go-git/v5"
@@ -48,35 +47,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
4847
var checker *CheckAttributeReader
4948

5049
if CheckGitVersionAtLeast("1.7.8") == nil {
51-
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
50+
indexFilename, workTree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
5251
if err == nil {
5352
defer deleteTemporaryFile()
54-
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
55-
if err == nil {
56-
defer func() {
57-
_ = util.RemoveAll(tmpWorkTree)
53+
checker = &CheckAttributeReader{
54+
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
55+
Repo: repo,
56+
IndexFile: indexFilename,
57+
WorkTree: workTree,
58+
}
59+
ctx, cancel := context.WithCancel(DefaultContext)
60+
if err := checker.Init(ctx); err != nil {
61+
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
62+
} else {
63+
go func() {
64+
err = checker.Run()
65+
if err != nil {
66+
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
67+
cancel()
68+
}
5869
}()
59-
60-
checker = &CheckAttributeReader{
61-
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
62-
Repo: repo,
63-
IndexFile: indexFilename,
64-
WorkTree: tmpWorkTree,
65-
}
66-
ctx, cancel := context.WithCancel(DefaultContext)
67-
if err := checker.Init(ctx); err != nil {
68-
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
69-
} else {
70-
go func() {
71-
err = checker.Run()
72-
if err != nil {
73-
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
74-
cancel()
75-
}
76-
}()
77-
}
78-
defer cancel()
7970
}
71+
defer cancel()
8072
}
8173
}
8274

@@ -114,6 +106,21 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
114106
sizes[language] += f.Size
115107

116108
return nil
109+
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
110+
// strip off a ? if present
111+
if idx := strings.IndexByte(language, '?'); idx >= 0 {
112+
language = language[:idx]
113+
}
114+
if len(language) != 0 {
115+
// group languages, such as Pug -> HTML; SCSS -> CSS
116+
group := enry.GetLanguageGroup(language)
117+
if len(group) != 0 {
118+
language = group
119+
}
120+
121+
sizes[language] += f.Size
122+
return nil
123+
}
117124
}
118125
}
119126
}

modules/git/repo_language_stats_nogogit.go

Lines changed: 35 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,10 @@ import (
1313
"context"
1414
"io"
1515
"math"
16-
"os"
16+
"strings"
1717

1818
"code.gitea.io/gitea/modules/analyze"
1919
"code.gitea.io/gitea/modules/log"
20-
"code.gitea.io/gitea/modules/util"
2120

2221
"github.com/go-enry/go-enry/v2"
2322
)
@@ -68,35 +67,28 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
6867
var checker *CheckAttributeReader
6968

7069
if CheckGitVersionAtLeast("1.7.8") == nil {
71-
indexFilename, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
70+
indexFilename, worktree, deleteTemporaryFile, err := repo.ReadTreeToTemporaryIndex(commitID)
7271
if err == nil {
7372
defer deleteTemporaryFile()
74-
tmpWorkTree, err := os.MkdirTemp("", "empty-work-dir")
75-
if err == nil {
76-
defer func() {
77-
_ = util.RemoveAll(tmpWorkTree)
73+
checker = &CheckAttributeReader{
74+
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
75+
Repo: repo,
76+
IndexFile: indexFilename,
77+
WorkTree: worktree,
78+
}
79+
ctx, cancel := context.WithCancel(DefaultContext)
80+
if err := checker.Init(ctx); err != nil {
81+
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
82+
} else {
83+
go func() {
84+
err = checker.Run()
85+
if err != nil {
86+
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
87+
cancel()
88+
}
7889
}()
79-
80-
checker = &CheckAttributeReader{
81-
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language"},
82-
Repo: repo,
83-
IndexFile: indexFilename,
84-
WorkTree: tmpWorkTree,
85-
}
86-
ctx, cancel := context.WithCancel(DefaultContext)
87-
if err := checker.Init(ctx); err != nil {
88-
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
89-
} else {
90-
go func() {
91-
err = checker.Run()
92-
if err != nil {
93-
log.Error("Unable to open checker for %s. Error: %v", commitID, err)
94-
cancel()
95-
}
96-
}()
97-
}
98-
defer cancel()
9990
}
91+
defer cancel()
10092
}
10193
}
10294

@@ -138,7 +130,23 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
138130

139131
sizes[language] += f.Size()
140132
continue
133+
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
134+
// strip off a ? if present
135+
if idx := strings.IndexByte(language, '?'); idx >= 0 {
136+
language = language[:idx]
137+
}
138+
if len(language) != 0 {
139+
// group languages, such as Pug -> HTML; SCSS -> CSS
140+
group := enry.GetLanguageGroup(language)
141+
if len(group) != 0 {
142+
language = group
143+
}
144+
145+
sizes[language] += f.Size()
146+
continue
147+
}
141148
}
149+
142150
}
143151
}
144152

modules/highlight/highlight.go

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ func NewContext() {
5555
}
5656

5757
// Code returns a HTML version of code string with chroma syntax highlighting classes
58-
func Code(fileName, code string) string {
58+
func Code(fileName, language, code string) string {
5959
NewContext()
6060

6161
// diff view newline will be passed as empty, change to literal \n so it can be copied
@@ -69,9 +69,23 @@ func Code(fileName, code string) string {
6969
}
7070

7171
var lexer chroma.Lexer
72-
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
73-
//use mapped value to find lexer
74-
lexer = lexers.Get(val)
72+
73+
if len(language) > 0 {
74+
lexer = lexers.Get(language)
75+
76+
if lexer == nil {
77+
// Attempt stripping off the '?'
78+
if idx := strings.IndexByte(language, '?'); idx > 0 {
79+
lexer = lexers.Get(language[:idx])
80+
}
81+
}
82+
}
83+
84+
if lexer == nil {
85+
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
86+
//use mapped value to find lexer
87+
lexer = lexers.Get(val)
88+
}
7589
}
7690

7791
if lexer == nil {
@@ -119,7 +133,7 @@ func CodeFromLexer(lexer chroma.Lexer, code string) string {
119133
}
120134

121135
// File returns a slice of chroma syntax highlighted lines of code
122-
func File(numLines int, fileName string, code []byte) []string {
136+
func File(numLines int, fileName, language string, code []byte) []string {
123137
NewContext()
124138

125139
if len(code) > sizeLimit {
@@ -139,8 +153,16 @@ func File(numLines int, fileName string, code []byte) []string {
139153
htmlw := bufio.NewWriter(&htmlbuf)
140154

141155
var lexer chroma.Lexer
142-
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
143-
lexer = lexers.Get(val)
156+
157+
// provided language overrides everything
158+
if len(language) > 0 {
159+
lexer = lexers.Get(language)
160+
}
161+
162+
if lexer == nil {
163+
if val, ok := highlightMapping[filepath.Ext(fileName)]; ok {
164+
lexer = lexers.Get(val)
165+
}
144166
}
145167

146168
if lexer == nil {

modules/highlight/highlight_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ steps:
9696

9797
for _, tt := range tests {
9898
t.Run(tt.name, func(t *testing.T) {
99-
if got := File(tt.numLines, tt.fileName, []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
99+
if got := File(tt.numLines, tt.fileName, "", []byte(tt.code)); !reflect.DeepEqual(got, tt.want) {
100100
t.Errorf("File() = %v, want %v", got, tt.want)
101101
}
102102
})

modules/indexer/code/search.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro
101101
Language: result.Language,
102102
Color: result.Color,
103103
LineNumbers: lineNumbers,
104-
FormattedLines: highlight.Code(result.Filename, formattedLinesBuffer.String()),
104+
FormattedLines: highlight.Code(result.Filename, "", formattedLinesBuffer.String()),
105105
}, nil
106106
}
107107

modules/repofiles/diff_test.go

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99

1010
"code.gitea.io/gitea/models"
1111
"code.gitea.io/gitea/models/unittest"
12+
"code.gitea.io/gitea/modules/json"
1213
"code.gitea.io/gitea/modules/test"
1314
"code.gitea.io/gitea/services/gitdiff"
1415

@@ -118,13 +119,21 @@ func TestGetDiffPreview(t *testing.T) {
118119
t.Run("with given branch", func(t *testing.T) {
119120
diff, err := GetDiffPreview(ctx.Repo.Repository, branch, treePath, content)
120121
assert.NoError(t, err)
121-
assert.EqualValues(t, expectedDiff, diff)
122+
expectedBs, err := json.Marshal(expectedDiff)
123+
assert.NoError(t, err)
124+
bs, err := json.Marshal(diff)
125+
assert.NoError(t, err)
126+
assert.EqualValues(t, expectedBs, bs)
122127
})
123128

124129
t.Run("empty branch, same results", func(t *testing.T) {
125130
diff, err := GetDiffPreview(ctx.Repo.Repository, "", treePath, content)
126131
assert.NoError(t, err)
127-
assert.EqualValues(t, expectedDiff, diff)
132+
expectedBs, err := json.Marshal(expectedDiff)
133+
assert.NoError(t, err)
134+
bs, err := json.Marshal(diff)
135+
assert.NoError(t, err)
136+
assert.EqualValues(t, expectedBs, bs)
128137
})
129138
}
130139

0 commit comments

Comments
 (0)