Skip to content

Commit 586f66d

Browse files
authored
use struct-based SourceMetadataFunc signature across git sources (#4813)
* use struct-based SourceMetadataFunc signature across git sources * incorporated feedback - pass SourceMetadataInfo by value - remove LegacySourceMetadataFunc
1 parent 6c64db9 commit 586f66d

File tree

6 files changed

+136
-64
lines changed

6 files changed

+136
-64
lines changed

hack/snifftest/main.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,15 @@ func main() {
197197
SkipBinaries: true,
198198
SkipArchives: false,
199199
Concurrency: runtime.NumCPU(),
200-
SourceMetadataFunc: func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData {
200+
SourceMetadataFunc: func(info git.SourceMetadataInfo) *source_metadatapb.MetaData {
201201
return &source_metadatapb.MetaData{
202202
Data: &source_metadatapb.MetaData_Git{
203203
Git: &source_metadatapb.Git{
204-
Commit: commit,
205-
File: file,
206-
Email: email,
207-
Repository: repository,
208-
Timestamp: timestamp,
204+
Commit: info.Commit,
205+
File: info.File,
206+
Email: info.Email,
207+
Repository: info.Repository,
208+
Timestamp: info.Timestamp,
209209
},
210210
},
211211
}

pkg/sources/git/git.go

Lines changed: 91 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -57,12 +57,27 @@ type Source struct {
5757
// WithCustomContentWriter sets the useCustomContentWriter flag on the source.
5858
func (s *Source) WithCustomContentWriter() { s.useCustomContentWriter = true }
5959

60+
// SourceMetadataInfo contains the metadata fields passed to SourceMetadataFunc.
61+
// Using a struct allows adding new fields without breaking existing consumers.
62+
type SourceMetadataInfo struct {
63+
File string
64+
Email string
65+
Commit string
66+
Timestamp string
67+
Repository string
68+
RepositoryLocalPath string
69+
Line int64
70+
}
71+
72+
// SourceMetadataFunc is a function that maps git source metadata to a protobuf MetaData message.
73+
type SourceMetadataFunc func(info SourceMetadataInfo) *source_metadatapb.MetaData
74+
6075
type Git struct {
6176
sourceType sourcespb.SourceType
6277
sourceName string
6378
sourceID sources.SourceID
6479
jobID sources.JobID
65-
sourceMetadataFunc func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData
80+
sourceMetadataFunc SourceMetadataFunc
6681
verify bool
6782
metrics metricsCollector
6883
concurrency *semaphore.Weighted
@@ -76,7 +91,7 @@ type Git struct {
7691
// Config for a Git source.
7792
type Config struct {
7893
Concurrency int
79-
SourceMetadataFunc func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData
94+
SourceMetadataFunc SourceMetadataFunc
8095

8196
SourceName string
8297
JobID sources.JobID
@@ -212,17 +227,17 @@ func (s *Source) Init(aCtx context.Context, name string, jobId sources.JobID, so
212227
SkipBinaries: conn.GetSkipBinaries(),
213228
SkipArchives: conn.GetSkipArchives(),
214229
Concurrency: concurrency,
215-
SourceMetadataFunc: func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData {
230+
SourceMetadataFunc: func(info SourceMetadataInfo) *source_metadatapb.MetaData {
216231
return &source_metadatapb.MetaData{
217232
Data: &source_metadatapb.MetaData_Git{
218233
Git: &source_metadatapb.Git{
219-
Commit: sanitizer.UTF8(commit),
220-
File: sanitizer.UTF8(file),
221-
Email: sanitizer.UTF8(email),
222-
Repository: sanitizer.UTF8(repository),
223-
Timestamp: sanitizer.UTF8(timestamp),
224-
Line: line,
225-
RepositoryLocalPath: sanitizer.UTF8(repositoryLocalPath),
234+
Commit: sanitizer.UTF8(info.Commit),
235+
File: sanitizer.UTF8(info.File),
236+
Email: sanitizer.UTF8(info.Email),
237+
Repository: sanitizer.UTF8(info.Repository),
238+
Timestamp: sanitizer.UTF8(info.Timestamp),
239+
Line: info.Line,
240+
RepositoryLocalPath: sanitizer.UTF8(info.RepositoryLocalPath),
226241
},
227242
},
228243
}
@@ -747,8 +762,14 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
747762
// Scan the commit metadata.
748763
// See https://github.com/trufflesecurity/trufflehog/issues/2683
749764
var (
750-
metadata = s.sourceMetadataFunc("", email, fullHash, when, remoteURL, path, 0)
751-
sb strings.Builder
765+
metadata = s.sourceMetadataFunc(SourceMetadataInfo{
766+
Email: email,
767+
Commit: fullHash,
768+
Timestamp: when,
769+
Repository: remoteURL,
770+
RepositoryLocalPath: path,
771+
})
772+
sb strings.Builder
752773
)
753774
sb.WriteString(email)
754775
sb.WriteString("\n")
@@ -789,7 +810,14 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
789810
continue
790811
}
791812

792-
metadata := s.sourceMetadataFunc(fileName, email, fullHash, when, remoteURL, path, 0)
813+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
814+
File: fileName,
815+
Email: email,
816+
Commit: fullHash,
817+
Timestamp: when,
818+
Repository: remoteURL,
819+
RepositoryLocalPath: path,
820+
})
793821
chunkSkel := &sources.Chunk{
794822
SourceName: s.sourceName,
795823
SourceID: s.sourceID,
@@ -816,7 +844,15 @@ func (s *Git) ScanCommits(ctx context.Context, repo *git.Repository, path string
816844
}
817845

818846
chunkData := func(d *gitparse.Diff) error {
819-
metadata := s.sourceMetadataFunc(fileName, email, fullHash, when, remoteURL, path, int64(diff.LineStart))
847+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
848+
File: fileName,
849+
Email: email,
850+
Commit: fullHash,
851+
Timestamp: when,
852+
Repository: remoteURL,
853+
RepositoryLocalPath: path,
854+
Line: int64(diff.LineStart),
855+
})
820856

821857
reader, err := d.ReadCloser()
822858
if err != nil {
@@ -875,7 +911,14 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email
875911
// Add oversize chunk info
876912
if newChunkBuffer.Len() > 0 {
877913
// Send the existing fragment.
878-
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, "", int64(diff.LineStart+lastOffset))
914+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
915+
File: fileName,
916+
Email: email,
917+
Commit: hash,
918+
Timestamp: when,
919+
Repository: urlMetadata,
920+
Line: int64(diff.LineStart + lastOffset),
921+
})
879922
chunk := sources.Chunk{
880923
SourceName: s.sourceName,
881924
SourceID: s.sourceID,
@@ -895,7 +938,14 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email
895938
}
896939
if len(line) > sources.DefaultChunkSize {
897940
// Send the oversize line.
898-
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, "", int64(diff.LineStart+offset))
941+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
942+
File: fileName,
943+
Email: email,
944+
Commit: hash,
945+
Timestamp: when,
946+
Repository: urlMetadata,
947+
Line: int64(diff.LineStart + offset),
948+
})
899949
chunk := sources.Chunk{
900950
SourceName: s.sourceName,
901951
SourceID: s.sourceID,
@@ -919,7 +969,14 @@ func (s *Git) gitChunk(ctx context.Context, diff *gitparse.Diff, fileName, email
919969
}
920970
// Send anything still in the new chunk buffer
921971
if newChunkBuffer.Len() > 0 {
922-
metadata := s.sourceMetadataFunc(fileName, email, hash, when, urlMetadata, "", int64(diff.LineStart+lastOffset))
972+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
973+
File: fileName,
974+
Email: email,
975+
Commit: hash,
976+
Timestamp: when,
977+
Repository: urlMetadata,
978+
Line: int64(diff.LineStart + lastOffset),
979+
})
923980
chunk := sources.Chunk{
924981
SourceName: s.sourceName,
925982
SourceID: s.sourceID,
@@ -1020,7 +1077,14 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
10201077
continue
10211078
}
10221079

1023-
metadata := s.sourceMetadataFunc(fileName, email, "Staged", when, urlMetadata, path, 0)
1080+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
1081+
File: fileName,
1082+
Email: email,
1083+
Commit: "Staged",
1084+
Timestamp: when,
1085+
Repository: urlMetadata,
1086+
RepositoryLocalPath: path,
1087+
})
10241088
chunkSkel := &sources.Chunk{
10251089
SourceName: s.sourceName,
10261090
SourceID: s.sourceID,
@@ -1036,7 +1100,15 @@ func (s *Git) ScanStaged(ctx context.Context, repo *git.Repository, path string,
10361100
}
10371101

10381102
chunkData := func(d *gitparse.Diff) error {
1039-
metadata := s.sourceMetadataFunc(fileName, email, "Staged", when, urlMetadata, path, int64(diff.LineStart))
1103+
metadata := s.sourceMetadataFunc(SourceMetadataInfo{
1104+
File: fileName,
1105+
Email: email,
1106+
Commit: "Staged",
1107+
Timestamp: when,
1108+
Repository: urlMetadata,
1109+
RepositoryLocalPath: path,
1110+
Line: int64(diff.LineStart),
1111+
})
10401112

10411113
reader, err := d.ReadCloser()
10421114
if err != nil {

pkg/sources/github/github.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -288,19 +288,19 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
288288
SkipBinaries: conn.GetSkipBinaries(),
289289
SkipArchives: conn.GetSkipArchives(),
290290
Concurrency: concurrency,
291-
SourceMetadataFunc: func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData {
291+
SourceMetadataFunc: func(info git.SourceMetadataInfo) *source_metadatapb.MetaData {
292292
return &source_metadatapb.MetaData{
293293
Data: &source_metadatapb.MetaData_Github{
294294
Github: &source_metadatapb.Github{
295-
Commit: sanitizer.UTF8(commit),
296-
File: sanitizer.UTF8(file),
297-
Email: sanitizer.UTF8(email),
298-
Repository: sanitizer.UTF8(repository),
299-
Link: giturl.GenerateLink(repository, commit, file, line),
300-
Timestamp: sanitizer.UTF8(timestamp),
301-
Line: line,
302-
Visibility: s.visibilityOf(aCtx, repository),
303-
RepositoryLocalPath: sanitizer.UTF8(repositoryLocalPath),
295+
Commit: sanitizer.UTF8(info.Commit),
296+
File: sanitizer.UTF8(info.File),
297+
Email: sanitizer.UTF8(info.Email),
298+
Repository: sanitizer.UTF8(info.Repository),
299+
Link: giturl.GenerateLink(info.Repository, info.Commit, info.File, info.Line),
300+
Timestamp: sanitizer.UTF8(info.Timestamp),
301+
Line: info.Line,
302+
Visibility: s.visibilityOf(aCtx, info.Repository),
303+
RepositoryLocalPath: sanitizer.UTF8(info.RepositoryLocalPath),
304304
},
305305
},
306306
}

pkg/sources/github_experimental/github_experimental.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -128,18 +128,18 @@ func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, so
128128
SkipBinaries: false,
129129
SkipArchives: false,
130130
Concurrency: concurrency,
131-
SourceMetadataFunc: func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData {
131+
SourceMetadataFunc: func(info git.SourceMetadataInfo) *source_metadatapb.MetaData {
132132
return &source_metadatapb.MetaData{
133133
Data: &source_metadatapb.MetaData_Github{
134134
Github: &source_metadatapb.Github{
135-
Commit: sanitizer.UTF8(commit),
136-
File: sanitizer.UTF8(file),
137-
Email: sanitizer.UTF8(email),
138-
Repository: sanitizer.UTF8(repository),
139-
Link: giturl.GenerateLink(repository, commit, file, line),
140-
Timestamp: sanitizer.UTF8(timestamp),
141-
Line: line,
142-
Visibility: s.visibilityOf(aCtx, repository),
135+
Commit: sanitizer.UTF8(info.Commit),
136+
File: sanitizer.UTF8(info.File),
137+
Email: sanitizer.UTF8(info.Email),
138+
Repository: sanitizer.UTF8(info.Repository),
139+
Link: giturl.GenerateLink(info.Repository, info.Commit, info.File, info.Line),
140+
Timestamp: sanitizer.UTF8(info.Timestamp),
141+
Line: info.Line,
142+
Visibility: s.visibilityOf(aCtx, info.Repository),
143143
},
144144
},
145145
}

pkg/sources/gitlab/gitlab.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -231,18 +231,18 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
231231
SkipBinaries: conn.GetSkipBinaries(),
232232
SkipArchives: conn.GetSkipArchives(),
233233
Concurrency: concurrency,
234-
SourceMetadataFunc: func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData {
234+
SourceMetadataFunc: func(info git.SourceMetadataInfo) *source_metadatapb.MetaData {
235235
gitlabMetadata := &source_metadatapb.Gitlab{
236-
Commit: sanitizer.UTF8(commit),
237-
File: sanitizer.UTF8(file),
238-
Email: sanitizer.UTF8(email),
239-
Repository: sanitizer.UTF8(repository),
240-
RepositoryLocalPath: sanitizer.UTF8(repositoryLocalPath),
241-
Link: giturl.GenerateLink(repository, commit, file, line),
242-
Timestamp: sanitizer.UTF8(timestamp),
243-
Line: line,
236+
Commit: sanitizer.UTF8(info.Commit),
237+
File: sanitizer.UTF8(info.File),
238+
Email: sanitizer.UTF8(info.Email),
239+
Repository: sanitizer.UTF8(info.Repository),
240+
RepositoryLocalPath: sanitizer.UTF8(info.RepositoryLocalPath),
241+
Link: giturl.GenerateLink(info.Repository, info.Commit, info.File, info.Line),
242+
Timestamp: sanitizer.UTF8(info.Timestamp),
243+
Line: info.Line,
244244
}
245-
proj, ok := s.repoToProjCache.get(repository)
245+
proj, ok := s.repoToProjCache.get(info.Repository)
246246
if ok {
247247
gitlabMetadata.ProjectId = int64(proj.id)
248248
gitlabMetadata.ProjectName = proj.name

pkg/sources/huggingface/huggingface.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -235,19 +235,19 @@ func (s *Source) Init(ctx context.Context, name string, jobID sources.JobID, sou
235235
SourceType: s.Type(),
236236
Verify: s.verify,
237237
Concurrency: concurrency,
238-
SourceMetadataFunc: func(file, email, commit, timestamp, repository, repositoryLocalPath string, line int64) *source_metadatapb.MetaData {
238+
SourceMetadataFunc: func(info git.SourceMetadataInfo) *source_metadatapb.MetaData {
239239
return &source_metadatapb.MetaData{
240240
Data: &source_metadatapb.MetaData_Huggingface{
241241
Huggingface: &source_metadatapb.Huggingface{
242-
Commit: sanitizer.UTF8(commit),
243-
File: sanitizer.UTF8(file),
244-
Email: sanitizer.UTF8(email),
245-
Repository: sanitizer.UTF8(repository),
246-
Link: giturl.GenerateLink(repository, commit, file, line),
247-
Timestamp: sanitizer.UTF8(timestamp),
248-
Line: line,
249-
Visibility: s.visibilityOf(ctx, repository),
250-
ResourceType: s.getResourceType(ctx, repository),
242+
Commit: sanitizer.UTF8(info.Commit),
243+
File: sanitizer.UTF8(info.File),
244+
Email: sanitizer.UTF8(info.Email),
245+
Repository: sanitizer.UTF8(info.Repository),
246+
Link: giturl.GenerateLink(info.Repository, info.Commit, info.File, info.Line),
247+
Timestamp: sanitizer.UTF8(info.Timestamp),
248+
Line: info.Line,
249+
Visibility: s.visibilityOf(ctx, info.Repository),
250+
ResourceType: s.getResourceType(ctx, info.Repository),
251251
},
252252
},
253253
}

0 commit comments

Comments
 (0)