Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Added
- Upcoming changes...

## [1.37.0] - 2025-10-17
### Added
- Added delta folder and file copy command

## [1.36.0] - 2025-10-08
### Added
- Add `--recursive-threshold` argument to folder scan command
Expand Down Expand Up @@ -684,3 +688,4 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
[1.34.0]: https://github.com/scanoss/scanoss.py/compare/v1.33.0...v1.34.0
[1.35.0]: https://github.com/scanoss/scanoss.py/compare/v1.34.0...v1.35.0
[1.36.0]: https://github.com/scanoss/scanoss.py/compare/v1.35.0...v1.36.0
[1.37.0]: https://github.com/scanoss/scanoss.py/compare/v1.36.0...v1.37.0
2 changes: 1 addition & 1 deletion src/scanoss/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,4 @@
THE SOFTWARE.
"""

__version__ = '1.36.0'
__version__ = '1.37.0'
70 changes: 69 additions & 1 deletion src/scanoss/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import pypac

from scanoss.cryptography import Cryptography, create_cryptography_config_from_args
from scanoss.delta import Delta
from scanoss.export.dependency_track import DependencyTrackExporter
from scanoss.inspection.dependency_track.project_violation import (
DependencyTrackProjectViolationPolicyCheck,
Expand Down Expand Up @@ -919,6 +920,33 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
)
p_folder_hash.set_defaults(func=folder_hash)

# Sub-command: delta
p_delta = subparsers.add_parser(
'delta',
aliases=['dl'],
description=f'SCANOSS Delta commands: {__version__}',
help='Delta support commands',
)

delta_sub = p_delta.add_subparsers(
title='Delta Commands',
dest='subparsercmd',
description='Delta sub-commands',
help='Delta sub-commands'
)

# Delta Sub-command: copy
p_copy = delta_sub.add_parser(
'copy',
aliases=['cp'],
description=f'Copy file list into delta dir: {__version__}',
help='Copy the given list of files into a delta directory',
)
p_copy.add_argument('--input', '-i', type=str, required=True, help='Input file with diff list')
p_copy.add_argument('--folder', '-fd', type=str, help='Delta folder to copy into')
p_copy.add_argument('--root', '-rd', type=str, help='Root directory to place delta folder')
p_copy.set_defaults(func=delta_copy)

# Output options
for p in [
p_scan,
Expand All @@ -939,6 +967,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
p_crypto_hints,
p_crypto_versions_in_range,
c_licenses,
p_copy,
]:
p.add_argument('--output', '-o', type=str, help='Output result file name (optional - default stdout).')

Expand Down Expand Up @@ -1136,6 +1165,7 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
p_crypto_versions_in_range,
c_licenses,
e_dt,
p_copy
]:
p.add_argument('--debug', '-d', action='store_true', help='Enable debug messages')
p.add_argument('--trace', '-t', action='store_true', help='Enable trace messages, including API posts')
Expand All @@ -1156,7 +1186,8 @@ def setup_args() -> None: # noqa: PLR0912, PLR0915
sys.exit(1)
elif (
args.subparser
in ('utils', 'ut', 'component', 'comp', 'inspect', 'insp', 'ins', 'crypto', 'cr', 'export', 'exp')
in ('utils', 'ut', 'component', 'comp', 'inspect', 'insp', 'ins',
'crypto', 'cr', 'export', 'exp', 'delta', 'dl')
) and not args.subparsercmd:
parser.parse_args([args.subparser, '--help']) # Force utils helps to be displayed
sys.exit(1)
Expand Down Expand Up @@ -2603,6 +2634,43 @@ def initialise_empty_file(filename: str):
print_stderr(f'Error: Unable to create output file {filename}: {e}')
sys.exit(1)

def delta_copy(parser, args):
"""
Handle delta copy command.

Copies files listed in an input file to a target directory while preserving
their directory structure. Creates a unique delta directory if none is specified.

Parameters
----------
parser : ArgumentParser
Command line parser object for help display
args : Namespace
Parsed command line arguments containing:
- input: Path to file containing list of files to copy
- folder: Optional target directory path
- output: Optional output file path
"""
# Validate required input file parameter
if args.input is None:
print_stderr('ERROR: Input file is required for copying')
parser.parse_args([args.subparser, args.subparsercmd, '-h'])
sys.exit(1)
# Initialise output file if specified
if args.output:
initialise_empty_file(args.output)
try:
# Create and configure delta copy command
delta = Delta(debug=args.debug, trace=args.trace, quiet=args.quiet, filepath=args.input, folder=args.folder,
output=args.output, root_dir=args.root)
# Execute copy and exit with appropriate status code
status, _ = delta.copy()
sys.exit(status)
except Exception as e:
print_stderr(e)
if args.debug:
traceback.print_exc()
sys.exit(1)

def main():
"""
Expand Down
197 changes: 197 additions & 0 deletions src/scanoss/delta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
"""
SPDX-License-Identifier: MIT

Copyright (c) 2025, SCANOSS

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
"""
import os
import shutil
import tempfile
from typing import Optional

from .scanossbase import ScanossBase


class Delta(ScanossBase):
"""
Handle delta scan operations by copying files into a dedicated delta directory.

This class manages the creation of delta directories and copying of specified files
while preserving the directory structure. Files are read from an input file where each
line contains a file path to copy.
"""

def __init__( # noqa: PLR0913
self,
debug: bool = False,
trace: bool = False,
quiet: bool = False,
filepath: str = None,
folder: str = None,
output: str = None,
root_dir: str = None,
):
"""
Initialise the Delta instance.

:param debug: Enable debug logging.
:param trace: Enable trace logging.
:param quiet: Enable quiet mode (suppress non-essential output).
:param filepath: Path to an input file containing a list of files to copy.
:param folder: A target delta directory path (auto-generated if not provided).
:param output: Output file path for the delta directory location (stdout if not provided).
"""
super().__init__(debug, trace, quiet)
self.filepath = filepath
self.folder = folder
self.output = output
self.root_dir = root_dir if root_dir else '.'

def copy(self, input_file: str = None):
"""
Copy files listed in the input file to the delta directory.

Reads the input file line by line, where each line contains a file path.
Creates the delta directory if it doesn't exist, then copies each file
while preserving its directory structure.

:return: Tuple of (status_code, folder_path) where status_code is 0 for success,
1 for error, and folder_path is the delta directory path
"""
input_file = input_file if input_file else self.filepath
if not input_file:
self.print_stderr('ERROR: No input file specified')
return 1, ''
# Validate that an input file exists
if not os.path.isfile(input_file):
self.print_stderr(f'ERROR: Input file {input_file} does not exist or is not a file')
return 1, ''
# Load the input file and validate it contains valid file paths
files = self.load_input_file(input_file)
if files is None:
return 1, ''
# Create delta dir (folder)
delta_folder = self.create_delta_dir(self.folder, self.root_dir)
if not delta_folder:
return 1, ''
# Print delta folder location to output
self.print_to_file_or_stdout(delta_folder, self.output)
# Process each file and copy it to the delta dir
for source_file in files:
# Normalise the source path to handle ".." and redundant separators
normalised_source = os.path.normpath(source_file)
if '..' in normalised_source:
self.print_stderr(f'WARNING: Source path escapes root directory for {source_file}. Skipping.')
continue
# Resolve to the absolute path for source validation
abs_source = os.path.abspath(os.path.join(self.root_dir, normalised_source))
# Check if the source file exists and is a file
if not os.path.exists(abs_source) or not os.path.isfile(abs_source):
self.print_stderr(f'WARNING: File {source_file} does not exist or is not a file, skipping')
continue
Comment on lines +97 to +108
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Harden path validation: reject absolute/drive-qualified paths and block symlink escapes; use realpath for copy.

Prevents cross-platform escapes (Windows drive letters) and copying through symlinks that resolve outside root_dir.

@@
-        for source_file in files:
-            # Normalise the source path to handle ".." and redundant separators
-            normalised_source = os.path.normpath(source_file)
-            if '..' in normalised_source:
-                self.print_stderr(f'WARNING: Source path escapes root directory for {source_file}. Skipping.')
-                continue
-            # Resolve to the absolute path for source validation
-            abs_source = os.path.abspath(os.path.join(self.root_dir, normalised_source))
-            # Check if the source file exists and is a file
-            if not os.path.exists(abs_source) or not os.path.isfile(abs_source):
-                self.print_stderr(f'WARNING: File {source_file} does not exist or is not a file, skipping')
-                continue
+        for source_file in files:
+            # Normalize the source path to handle ".." and redundant separators
+            normalised_source = os.path.normpath(source_file)
+            # Reject absolute or drive-qualified paths (cross‑platform)
+            drive, _ = os.path.splitdrive(normalised_source)
+            if os.path.isabs(normalised_source) or drive:
+                self.print_stderr(f'WARNING: Absolute or drive-qualified path {source_file} not allowed. Skipping.')
+                continue
+            # Reject traversal that escapes the root
+            if normalised_source == '..' or normalised_source.startswith('..' + os.sep):
+                self.print_stderr(f'WARNING: Source path escapes root directory for {source_file}. Skipping.')
+                continue
+            # Resolve to the absolute path for source validation
+            abs_source = os.path.abspath(os.path.join(self.root_dir, normalised_source))
+            # Resolve symlinks and ensure the real path stays within root_dir
+            real_root = os.path.realpath(self.root_dir)
+            real_source = os.path.realpath(abs_source)
+            if not real_source.startswith(real_root + os.sep):
+                self.print_stderr(f'WARNING: Resolved source {real_source} escapes root directory. Skipping.')
+                continue
+            # Check if the resolved source is a regular file
+            if not os.path.isfile(real_source):
+                self.print_stderr(f'WARNING: File {source_file} does not exist or is not a file, skipping.')
+                continue
@@
-                self.print_debug(f'Copying {source_file} to {dest_path} ...')
-                shutil.copy(abs_source, dest_path)
+                self.print_debug(f'Copying {source_file} to {dest_path} ...')
+                shutil.copy(real_source, dest_path)

Also applies to: 118-126

# Use a normalised source for destination to prevent traversal
dest_path = os.path.normpath(os.path.join(self.root_dir, delta_folder, normalised_source.lstrip(os.sep)))
# Final safety check: ensure destination is within the delta folder
abs_dest = os.path.abspath(dest_path)
abs_folder = os.path.abspath(os.path.join(self.root_dir, delta_folder))
if not abs_dest.startswith(abs_folder + os.sep):
self.print_stderr(
f'WARNING: Destination path ({abs_dest}) escapes delta directory for {source_file}. Skipping.')
continue
# Create the destination directory if it doesn't exist and copy the file
try:
dest_dir = os.path.dirname(dest_path)
if dest_dir:
self.print_trace(f'Creating directory {dest_dir}...')
os.makedirs(dest_dir, exist_ok=True)
self.print_debug(f'Copying {source_file} to {dest_path} ...')
shutil.copy(abs_source, dest_path)
except (OSError, shutil.Error) as e:
self.print_stderr(f'ERROR: Failed to copy {source_file} to {dest_path}: {e}')
return 1, ''
return 0, delta_folder

def create_delta_dir(self, folder: str, root_dir: str = '.') -> str or None:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion | 🟠 Major

Use proper union type syntax for return annotations.

The return type str or None is not valid type annotation syntax. Use str | None (Python 3.10+) or Optional[str] from typing.

Apply this diff:

-    def create_delta_dir(self, folder: str, root_dir: str = '.') -> str or None:
+    def create_delta_dir(self, folder: str, root_dir: str = '.') -> str | None:
🤖 Prompt for AI Agents
In src/scanoss/delta.py around line 130, the return annotation uses invalid
syntax ("str or None"); change the signature to use a proper union type like
"Optional[str]" or "str | None" (prefer Optional[str] for older Python
compatibility), and if you choose Optional[str] add "from typing import
Optional" to the module imports; update the function signature accordingly and
ensure any type-checking or docstrings reflect the new annotation.

"""
Create the delta directory.

If no folder is specified, creates a unique temporary directory with
a 'delta-' prefix in the current directory. If a folder is specified,
validates that it doesn't already exist before creating it.

:param root_dir: Root directory to create the delta directory in (default: current directory)
:param folder: Optional target directory
:return: Path to the delta directory, or None if it already exists or creation fails
"""
if folder:
# Resolve a relative folder under root_dir so checks/creation apply to the right place
resolved = folder if os.path.isabs(folder) else os.path.join(root_dir, folder)
resolved = os.path.normpath(resolved)
# Validate the target directory doesn't already exist and create it
if os.path.exists(resolved):
self.print_stderr(f'ERROR: Folder {resolved} already exists.')
return None
else:
try:
self.print_debug(f'Creating delta directory {resolved}...')
os.makedirs(resolved)
except (OSError, IOError) as e:
self.print_stderr(f'ERROR: Failed to create directory {resolved}: {e}')
return None
Comment on lines +143 to +157
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Bug: create_delta_dir returns the un-resolved folder when provided; return the resolved path.

When folder is relative, you create <root_dir>/<folder> but return folder, which makes printed/output path inconsistent with the actual directory used.

         if folder:
             # Resolve a relative folder under root_dir so checks/creation apply to the right place
             resolved = folder if os.path.isabs(folder) else os.path.join(root_dir, folder)
             resolved = os.path.normpath(resolved)
@@
                 try:
                     self.print_debug(f'Creating delta directory {resolved}...')
                     os.makedirs(resolved)
                 except (OSError, IOError) as e:
                     self.print_stderr(f'ERROR: Failed to create directory {resolved}: {e}')
                     return None
+                # Ensure we return the resolved path
+                folder = resolved
@@
-        return os.path.normpath(folder)
+        return os.path.normpath(folder)

Also applies to: 166-166

🤖 Prompt for AI Agents
In src/scanoss/delta.py around lines 142 to 156 (and also at line 166), the
function resolves a relative folder into a full path (root_dir + folder) and
creates that directory, but still returns the original un-resolved folder which
causes inconsistent/incorrect paths; change the function to return the resolved
(normalized) path variable instead of the original folder variable and ensure
any subsequent return at line 166 also returns the resolved path so callers
always receive the actual created directory path.

else:
# Create a unique temporary directory in the given root directory
try:
self.print_debug(f'Creating temporary delta directory in {root_dir} ...')
folder = tempfile.mkdtemp(prefix="delta-", dir=root_dir)
if folder:
folder = os.path.relpath(folder, start=root_dir) # Get the relative path from root_dir
self.print_debug(f'Created temporary delta directory: {folder}')
except (OSError, IOError) as e:
self.print_stderr(f'ERROR: Failed to create temporary directory in {root_dir}: {e}')
return None
return folder

def load_input_file(self, input_file: str) -> Optional[list[str]]:
"""
Loads and parses the input file line by line. Each line in the input
file represents a source file path, which will be stripped of trailing
whitespace and appended to the resulting list if it is not empty.

:param input_file: The path to the input file to be read.
:type input_file: String
:return: A list of source file paths extracted from the input file,
or None if an error occurs or the file path is invalid.
:rtype: An array list[str] or None
"""
files = []
if input_file:
try:
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
source_file = line.rstrip()
if source_file:
# Save the file path without any leading separators
files.append(source_file.lstrip(os.sep))
# End of for loop
except (OSError, IOError) as e:
self.print_stderr(f'ERROR: Failed to read input file; {input_file}: {e}')
return None
self.print_debug(f'Loaded {len(files)} files from input file.')
return files