Skip to content

Commit f6445af

Browse files
Merge branch 'main' into feature/oss-209
2 parents c3a4de9 + 18c7b1f commit f6445af

File tree

10 files changed

+136
-25
lines changed

10 files changed

+136
-25
lines changed

main.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,7 @@ func run(state overseer.State) {
463463

464464
// OSS Default simplified gitlab enumeration
465465
feature.UseSimplifiedGitlabEnumeration.Store(true)
466+
feature.GitlabProjectsPerPage.Store(100)
466467

467468
// OSS Default using github graphql api for issues, pr's and comments
468469
feature.UseGithubGraphqlAPI.Store(true)

pkg/analyzer/analyzers/launchdarkly/requests.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ var (
3838
webhooksKey: "/v2/webhooks",
3939
/*
4040
TODO:
41-
release piplelines: https://launchdarkly.com/docs/api/release-pipelines-beta/get-all-release-pipelines (Beta)
41+
release pipelines: https://launchdarkly.com/docs/api/release-pipelines-beta/get-all-release-pipelines (Beta)
4242
insight deployments: https://launchdarkly.com/docs/api/insights-deployments-beta/get-deployments (Beta)
4343
delivery configuration: https://launchdarkly.com/docs/api/integration-delivery-configurations-beta/get-integration-delivery-configuration-by-environment (Beta)
4444
metrics: https://launchdarkly.com/docs/api/metrics-beta/get-metric-groups (Beta)

pkg/detectors/kontent/kontent.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ var (
2525
apiKeyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"kontent"}) + common.BuildRegexJWT("30,34", "200,400", "40,43"))
2626
envIDPat = regexp.MustCompile(detectors.PrefixRegex([]string{"kontent", "env"}) + common.UUIDPattern)
2727

28-
// API return this error when the environment does not exist or the api key does not have the persmission to access that environment
28+
// API return this error when the environment does not exist or the api key does not have the permission to access that environment
2929
envErr = "The specified API key does not provide the permissions required to access the environment"
3030
)
3131

pkg/detectors/satismeterwritekey/satismeterwritekey_integration_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ func TestSatismeterWritekey_FromChunk(t *testing.T) {
6969
s: Scanner{},
7070
args: args{
7171
ctx: context.Background(),
72-
data: []byte(fmt.Sprintf("You can find a satismeterwritekey project %s satismeter writekey %s and satismeter token %s in here but not vaild", inactiveProjectID, inactiveWriteKey, inactiveToken)), // the secret would satisfy the regex but not pass validation,
72+
data: []byte(fmt.Sprintf("You can find a satismeterwritekey project %s satismeter writekey %s and satismeter token %s in here but not valid", inactiveProjectID, inactiveWriteKey, inactiveToken)), // the secret would satisfy the regex but not pass validation,
7373
verify: true,
7474
},
7575
want: []detectors.Result{

pkg/engine/gitlab.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@ func (e *Engine) ScanGitLab(ctx context.Context, c sources.GitlabConfig) (source
6363
}
6464

6565
connection.NoCleanup = c.NoCleanup
66+
6667
connection.PrintLegacyJson = c.PrintLegacyJSON
6768

6869
var conn anypb.Any

pkg/feature/feature.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ var (
1010
UserAgentSuffix AtomicString
1111
UseSimplifiedGitlabEnumeration atomic.Bool
1212
UseGitMirror atomic.Bool
13+
GitlabProjectsPerPage atomic.Int64
1314
UseGithubGraphqlAPI atomic.Bool // use github graphql api to fetch issues, pr's and comments
1415
)
1516

pkg/sources/github/github.go

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,11 @@ func (c *filteredRepoCache) includeRepo(s string) bool {
206206
return false
207207
}
208208

209+
// wantRepo returns true if the repository should be included based on include/exclude patterns
210+
func (c *filteredRepoCache) wantRepo(s string) bool {
211+
return !c.ignoreRepo(s) && c.includeRepo(s)
212+
}
213+
209214
// Init returns an initialized GitHub source.
210215
func (s *Source) Init(aCtx context.Context, name string, jobID sources.JobID, sourceID sources.SourceID, verify bool, connection *anypb.Any, concurrency int) error {
211216
err := git.CmdCheck()
@@ -434,14 +439,29 @@ func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) e
434439
// Double make sure that all enumerated repositories in the
435440
// filteredRepoCache have an entry in the repoInfoCache.
436441
for _, repo := range s.filteredRepoCache.Values() {
437-
ctx := context.WithValue(ctx, "repo", repo)
442+
// Extract the repository name from the URL for filtering
443+
repoName := repo
444+
if strings.Contains(repo, "/") {
445+
// Try to extract org/repo name from URL
446+
if strings.Contains(repo, "github.com") {
447+
parts := strings.Split(repo, "/")
448+
if len(parts) >= 2 {
449+
repoName = parts[len(parts)-2] + "/" + strings.TrimSuffix(parts[len(parts)-1], ".git")
450+
}
451+
}
452+
}
438453

439-
repo, err := s.ensureRepoInfoCache(ctx, repo, &unitErrorReporter{reporter})
440-
if err != nil {
441-
ctx.Logger().Error(err, "error caching repo info")
442-
_ = dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err))
454+
// Final filter check - only include repositories that pass the filter
455+
if s.filteredRepoCache.wantRepo(repoName) {
456+
ctx = context.WithValue(ctx, "repo", repo)
457+
458+
repo, err := s.ensureRepoInfoCache(ctx, repo, &unitErrorReporter{reporter})
459+
if err != nil {
460+
ctx.Logger().Error(err, "error caching repo info")
461+
_ = dedupeReporter.UnitErr(ctx, fmt.Errorf("error caching repo info: %w", err))
462+
}
463+
s.repos = append(s.repos, repo)
443464
}
444-
s.repos = append(s.repos, repo)
445465
}
446466
githubReposEnumerated.WithLabelValues(s.name).Set(float64(len(s.repos)))
447467
ctx.Logger().Info("Completed enumeration", "num_repos", len(s.repos), "num_orgs", s.orgsCache.Count(), "num_members", len(s.memberCache))

pkg/sources/github/github_test.go

Lines changed: 70 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,41 @@ func TestNormalizeRepos(t *testing.T) {
423423
}
424424
}
425425

426+
func TestNormalizeRepo(t *testing.T) {
427+
// Test that normalizeRepo correctly identifies URLs with protocols
428+
source := &Source{}
429+
430+
// Test case 1: HTTP URL
431+
result, err := source.normalizeRepo("https://github.com/org/repo.git")
432+
assert.NoError(t, err)
433+
assert.Contains(t, result, "github.com/org/repo")
434+
435+
// Test case 2: HTTP URL without .git
436+
result, err = source.normalizeRepo("http://github.com/org/repo")
437+
assert.NoError(t, err)
438+
assert.Contains(t, result, "github.com/org/repo")
439+
440+
// Test case 3: Git protocol URL
441+
result, err = source.normalizeRepo("git://github.com/org/repo.git")
442+
assert.NoError(t, err)
443+
assert.Contains(t, result, "github.com/org/repo")
444+
445+
// Test case 4: SSH URL
446+
result, err = source.normalizeRepo("ssh://[email protected]/org/repo.git")
447+
assert.NoError(t, err)
448+
assert.Contains(t, result, "github.com/org/repo")
449+
450+
// Test case 5: Org/repo format (should convert to full URL)
451+
result, err = source.normalizeRepo("org/repo")
452+
assert.NoError(t, err)
453+
assert.Contains(t, result, "github.com/org/repo")
454+
455+
// Test case 6: Invalid format (no protocol, no slash)
456+
_, err = source.normalizeRepo("invalid")
457+
assert.Error(t, err)
458+
assert.Contains(t, err.Error(), "no repositories found")
459+
}
460+
426461
func TestHandleRateLimit(t *testing.T) {
427462
s := initTestSource(&sourcespb.GitHub{Credential: &sourcespb.GitHub_Unauthenticated{}})
428463
ctx := context.Background()
@@ -493,7 +528,7 @@ func TestEnumerateWithToken(t *testing.T) {
493528
JSON(map[string]string{"login": "super-secret-user"})
494529

495530
gock.New("https://api.github.com").
496-
Get("/users/super-secret-user/repos").
531+
Get("/user/repos").
497532
MatchParam("per_page", "100").
498533
Reply(200).
499534
JSON([]map[string]string{{"clone_url": "https://github.com/super-secret-user/super-secret-repo.git", "full_name": "super-secret-user/super-secret-repo"}})
@@ -574,7 +609,7 @@ func TestEnumerate(t *testing.T) {
574609

575610
//
576611
gock.New("https://api.github.com").
577-
Get("/users/super-secret-user/repos").
612+
Get("/user/repos").
578613
Reply(200).
579614
JSON(`[{"name": "super-secret-repo", "full_name": "super-secret-user/super-secret-repo", "owner": {"login": "super-secret-user"}, "clone_url": "https://github.com/super-secret-user/super-secret-repo.git", "has_wiki": false, "size": 1}]`)
580615

@@ -978,6 +1013,39 @@ func Test_ScanMultipleTargets_MultipleErrors(t *testing.T) {
9781013
}
9791014
}
9801015

1016+
func TestRepositoryFiltering(t *testing.T) {
1017+
// Test that the filteredRepoCache correctly filters repositories
1018+
source := &Source{}
1019+
1020+
// Test case 1: No filters specified (should include everything)
1021+
cache1 := source.newFilteredRepoCache(context.Background(), simple.NewCache[string](), []string{}, []string{})
1022+
assert.True(t, cache1.wantRepo("org/repo1"))
1023+
assert.True(t, cache1.wantRepo("org/repo2"))
1024+
assert.True(t, cache1.wantRepo("org/repo3"))
1025+
1026+
// Test case 2: Include filter specified (should only include matching repos)
1027+
cache2 := source.newFilteredRepoCache(context.Background(), simple.NewCache[string](), []string{"org/repo1", "org/repo2"}, []string{})
1028+
assert.True(t, cache2.wantRepo("org/repo1"))
1029+
assert.True(t, cache2.wantRepo("org/repo2"))
1030+
assert.False(t, cache2.wantRepo("org/repo3"))
1031+
1032+
// Test case 3: Exclude filter specified (should exclude matching repos)
1033+
cache3 := source.newFilteredRepoCache(context.Background(), simple.NewCache[string](), []string{}, []string{"org/repo1"})
1034+
assert.False(t, cache3.wantRepo("org/repo1"))
1035+
assert.True(t, cache3.wantRepo("org/repo2"))
1036+
assert.True(t, cache3.wantRepo("org/repo3"))
1037+
1038+
// Test case 4: Both include and exclude filters (exclude takes precedence)
1039+
cache4 := source.newFilteredRepoCache(context.Background(), simple.NewCache[string](), []string{"org/repo1"}, []string{"org/repo1"})
1040+
assert.False(t, cache4.wantRepo("org/repo1"))
1041+
1042+
// Test case 5: Wildcard patterns
1043+
cache5 := source.newFilteredRepoCache(context.Background(), simple.NewCache[string](), []string{"org/*"}, []string{})
1044+
assert.True(t, cache5.wantRepo("org/repo1"))
1045+
assert.True(t, cache5.wantRepo("org/repo2"))
1046+
assert.False(t, cache5.wantRepo("other/repo1"))
1047+
}
1048+
9811049
func noopReporter() sources.UnitReporter {
9821050
return sources.VisitorReporter{
9831051
VisitUnit: func(context.Context, sources.SourceUnit) error {

pkg/sources/github/repo.go

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"fmt"
66
"io"
77
"net/http"
8+
"regexp"
89
"strings"
910
"sync"
1011

@@ -273,6 +274,12 @@ func (s *Source) processRepos(ctx context.Context, target string, reporter sourc
273274
}
274275

275276
repoName, repoURL := r.GetFullName(), r.GetCloneURL()
277+
278+
// Check if we should process this repository based on the filter
279+
if !s.filteredRepoCache.wantRepo(repoName) {
280+
continue
281+
}
282+
276283
s.totalRepoSize += r.GetSize()
277284
s.filteredRepoCache.Set(repoName, repoURL)
278285
s.cacheRepoInfo(r)
@@ -349,10 +356,17 @@ func (s *Source) wikiIsReachable(ctx context.Context, repoURL string) bool {
349356

350357
// normalizeRepo normalizes a GitHub repository URL or name to its canonical form.
351358
func (s *Source) normalizeRepo(repo string) (string, error) {
352-
// if the string contains a '/', assume it's a GitHub repository.
353-
if strings.ContainsRune(repo, '/') {
359+
360+
// If it's a full URL (has protocol), normalize it
361+
if regexp.MustCompile(`^[a-z]+://`).MatchString(repo) {
362+
354363
return giturl.NormalizeGithubRepo(repo)
355364
}
365+
// If it's a repository name (contains / but not http), convert to full URL first
366+
if strings.Contains(repo, "/") && !regexp.MustCompile(`^[a-z]+://`).MatchString(repo) {
367+
fullURL := "https://github.com/" + repo
368+
return giturl.NormalizeGithubRepo(fullURL)
369+
}
356370

357371
return "", fmt.Errorf("no repositories found for %s", repo)
358372
}

pkg/sources/gitlab/gitlab.go

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,12 @@ type Source struct {
6666

6767
useAuthInUrl bool
6868

69-
clonePath string
70-
noCleanup bool
69+
clonePath string
70+
noCleanup bool
71+
7172
printLegacyJSON bool
73+
74+
projectsPerPage int
7275
}
7376

7477
// WithCustomContentWriter sets the useCustomContentWriter flag on the source.
@@ -171,6 +174,11 @@ func (s *Source) Init(ctx context.Context, name string, jobId sources.JobID, sou
171174
s.clonePath = conn.GetClonePath()
172175
s.noCleanup = conn.GetNoCleanup()
173176
s.printLegacyJSON = conn.GetPrintLegacyJson()
177+
s.projectsPerPage = int(feature.GitlabProjectsPerPage.Load())
178+
179+
if s.projectsPerPage > 100 {
180+
return fmt.Errorf("invalid config: maximum allowed projects per page for gitlab is 100")
181+
}
174182

175183
// configuration uses the inverse logic of the `useAuthInUrl` flag.
176184
s.useAuthInUrl = !conn.RemoveAuthInUrl
@@ -546,10 +554,10 @@ func (s *Source) getAllProjectRepos(
546554
}
547555

548556
const (
549-
orderBy = "id" // TODO: Use keyset pagination (https://docs.gitlab.com/ee/api/rest/index.html#keyset-based-pagination)
550-
paginationLimit = 100 // Default is 20, max is 100.
557+
orderBy = "id"
551558
)
552-
listOpts := gitlab.ListOptions{PerPage: paginationLimit}
559+
// Trufflehog default per page 100 unless set to other value through feature flag. If 0 provided in feature flag gitlab default it to 20
560+
listOpts := gitlab.ListOptions{PerPage: s.projectsPerPage}
553561

554562
projectQueryOptions := &gitlab.ListProjectsOptions{OrderBy: gitlab.Ptr(orderBy), ListOptions: listOpts}
555563
for {
@@ -654,14 +662,13 @@ func (s *Source) getAllProjectReposV2(
654662
) error {
655663
gitlabReposEnumerated.WithLabelValues(s.name).Set(0)
656664

657-
const paginationLimit = 100 // default is 20, max is 100.
658-
659665
// example: https://gitlab.com/gitlab-org/api/client-go/-/blob/main/examples/pagination.go#L55
660666
listOpts := gitlab.ListOptions{
661667
OrderBy: "id",
662668
Pagination: "keyset", // https://docs.gitlab.com/api/rest/#keyset-based-pagination
663-
PerPage: paginationLimit,
664-
Sort: "asc",
669+
// Trufflehog default per page 100 unless set to other value through feature flag. If 0 provided in feature flag gitlab default it to 20
670+
PerPage: s.projectsPerPage,
671+
Sort: "asc",
665672
}
666673

667674
projectQueryOptions := &gitlab.ListProjectsOptions{
@@ -756,11 +763,10 @@ func (s *Source) getAllProjectReposInGroups(
756763

757764
var projectsWithNamespace []string
758765
const (
759-
orderBy = "id"
760-
paginationLimit = 100
766+
orderBy = "id"
761767
)
762768

763-
listOpts := gitlab.ListOptions{PerPage: paginationLimit}
769+
listOpts := gitlab.ListOptions{PerPage: s.projectsPerPage}
764770
projectOpts := &gitlab.ListGroupProjectsOptions{
765771
ListOptions: listOpts,
766772
OrderBy: gitlab.Ptr(orderBy),

0 commit comments

Comments
 (0)