Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [1.2.2] - 2025-12-08

### Security
- **Fixed URL security vulnerabilities** in string matching patterns
- Fixed arbitrary position domain/protocol matching that could be exploited
- Prevented URL spoofing attacks via malicious URLs (e.g., `evil-github.com-fake.ru`)
- Replaced unsafe substring checks with proper URL parsing using `urlparse()`
- All domain checks now validate `netloc` exactly matches expected domain
- Protocol checks use `startswith()` instead of substring matching
- Affected files: `src/ossval/parsers/spdx.py`, `src/ossval/analyzers/repo_finder.py`, `src/ossval/core.py`
- Severity: Medium - Could allow URL spoofing/bypass in repository URL validation

## [1.2.1] - 2025-12-08

### Added
Expand Down Expand Up @@ -158,6 +170,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Efficient SLOC counting with pygount
- Optimized repository URL discovery

[Unreleased]: https://github.com/SemClone/ossval/compare/v1.2.1...HEAD
[Unreleased]: https://github.com/SemClone/ossval/compare/v1.2.2...HEAD
[1.2.2]: https://github.com/SemClone/ossval/compare/v1.2.1...v1.2.2
[1.2.1]: https://github.com/SemClone/ossval/compare/v1.0.1...v1.2.1
[1.0.1]: https://github.com/SemClone/ossval/releases/tag/v1.0.1
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "ossval"
version = "1.2.1"
version = "1.2.2"
description = "Open Source Software Valuation - Calculate development cost savings from OSS dependencies"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
2 changes: 1 addition & 1 deletion src/ossval/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""OSSVAL: Open Source Software Valuation Tool."""

__version__ = "1.2.1"
__version__ = "1.2.2"

from ossval.core import analyze, parse_sbom, quick_estimate
from ossval.models import AnalysisConfig, AnalysisResult, Region, ProjectType
Expand Down
37 changes: 25 additions & 12 deletions src/ossval/analyzers/repo_finder.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,17 +108,25 @@ def _is_valid_git_url(url: str) -> bool:
"""Check if URL looks like a valid git repository."""
if not url:
return False

url_lower = url.lower()
# Check for common git hosting platforms (with or without trailing slash)
return (
"github.com" in url_lower
or "gitlab.com" in url_lower
or "bitbucket.org" in url_lower
or url_lower.startswith("git+")
or url_lower.endswith(".git")
or url_lower.endswith(".git/")
or url_lower.startswith("git://")
)

# Check for git protocol prefixes
if url_lower.startswith(("git+", "git://", "git@")):
return True

# Check for .git suffix
if url_lower.endswith(".git") or url_lower.endswith(".git/"):
return True

# Check for common git hosting platforms by parsing domain
try:
# Add protocol if missing for parsing
parse_url = url_lower if "://" in url_lower else f"https://{url_lower}"
parsed = urlparse(parse_url)
return parsed.netloc in ["github.com", "gitlab.com", "bitbucket.org"]
except Exception:
return False


def _normalize_git_url(url: str) -> str:
Expand Down Expand Up @@ -149,8 +157,13 @@ def _normalize_git_url(url: str) -> str:

# Ensure it starts with http:// or https://
if not url.startswith(("http://", "https://")):
if "github.com" in url or "gitlab.com" in url or "bitbucket.org" in url:
url = f"https://{url}"
# Parse URL with dummy protocol to check domain safely
try:
parsed = urlparse(f"https://{url}")
if parsed.netloc in ["github.com", "gitlab.com", "bitbucket.org"]:
url = f"https://{url}"
except Exception:
pass

# Add .git suffix if it's a GitHub/GitLab/Bitbucket URL
try:
Expand Down
14 changes: 10 additions & 4 deletions src/ossval/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from datetime import datetime
from pathlib import Path
from typing import List, Optional
from urllib.parse import urlparse

from ossval import __version__
from ossval.analyzers import (
Expand Down Expand Up @@ -179,10 +180,15 @@ async def _analyze_package(
package.warnings.append(f"Error calculating maintainability index: {str(e)}")

# Analyze health metrics (GitHub only)
if package.repository_url and "github.com" in package.repository_url.lower():
health = await analyze_health(package.repository_url, config.github_token)
if health:
package.health = health
if package.repository_url:
try:
parsed = urlparse(package.repository_url.lower())
if parsed.netloc == "github.com":
health = await analyze_health(package.repository_url, config.github_token)
if health:
package.health = health
except Exception:
pass

return package

Expand Down
56 changes: 44 additions & 12 deletions src/ossval/parsers/spdx.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import re
from pathlib import Path
from typing import List
from urllib.parse import urlparse

from ossval.models import Package, SourceType
from ossval.parsers.base import BaseParser, ParseResult
Expand Down Expand Up @@ -32,6 +33,39 @@ def can_parse(self, filepath: str) -> bool:
return False
return False

@staticmethod
def _is_git_url(url: str) -> bool:
"""
Safely check if a URL is a git repository URL.

Checks:
- Protocol starts with git+ (e.g., git+https://)
- Domain is github.com, gitlab.com, or bitbucket.org

Args:
url: URL string to check

Returns:
True if it's a git URL, False otherwise
"""
if not url or url == "NOASSERTION":
return False

# Check if it starts with git+ protocol
if url.startswith("git+"):
return True

# Parse URL and check domain
try:
# Handle git@ SSH URLs
if url.startswith("git@"):
return True

parsed = urlparse(url if "://" in url else f"https://{url}")
return parsed.netloc in ["github.com", "gitlab.com", "bitbucket.org"]
except Exception:
return False

def parse(self, filepath: str) -> ParseResult:
"""Parse SPDX SBOM file."""
path = Path(filepath)
Expand Down Expand Up @@ -86,10 +120,9 @@ def _parse_json(self, filepath: str) -> List[Package]:
# Extract download location as fallback
if not repository_url:
download_location = pkg_data.get("downloadLocation", "")
if download_location and download_location != "NOASSERTION":
if self._is_git_url(download_location):
# Try to extract git URL
if "git+" in download_location or "github.com" in download_location:
repository_url = download_location.replace("git+", "").split("#")[0]
repository_url = download_location.replace("git+", "").split("#")[0]

package = Package(
name=name,
Expand Down Expand Up @@ -133,18 +166,17 @@ def _parse_tag_value(self, filepath: str) -> List[Package]:
ecosystem = self._extract_ecosystem_from_purl(f"pkg:{purl}")
if ecosystem:
current_package["ecosystem"] = ecosystem
elif "git+" in ref_line or "github.com" in ref_line:
# Extract repository URL
url_match = re.search(r"(https?://[^\s]+)", ref_line)
if url_match:
else:
# Try to extract repository URL
url_match = re.search(r"(https?://[^\s]+|git\+[^\s]+)", ref_line)
if url_match and self._is_git_url(url_match.group(1)):
current_package["repository_url"] = url_match.group(1)
elif line.startswith("PackageDownloadLocation:") and in_package:
download_loc = line.split(":", 1)[1].strip()
if download_loc and download_loc != "NOASSERTION":
if "git+" in download_loc or "github.com" in download_loc:
current_package["repository_url"] = (
download_loc.replace("git+", "").split("#")[0]
)
if self._is_git_url(download_loc):
current_package["repository_url"] = (
download_loc.replace("git+", "").split("#")[0]
)

# Don't forget the last package
if in_package and current_package.get("name"):
Expand Down