Skip to content

Commit f45587b

Browse files
committed
flake8/linter fixes
1 parent 0028d38 commit f45587b

File tree

3 files changed

+28
-31
lines changed

3 files changed

+28
-31
lines changed

scripts/sagemaker_feature_group_issue.py

Lines changed: 19 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,43 +8,42 @@
88
# Toggle this flag to test spawn mode fix
99
USE_SPAWN_MODE = True # Set to True to fix the Tahoe hang issue
1010

11-
if __name__ == '__main__':
11+
if __name__ == "__main__":
1212
if USE_SPAWN_MODE:
1313
print("Using SPAWN mode (fix for Tahoe)")
14-
multiprocessing.set_start_method('spawn', force=True)
15-
multiprocess.set_start_method('spawn', force=True)
14+
multiprocessing.set_start_method("spawn", force=True)
15+
multiprocess.set_start_method("spawn", force=True)
1616
else:
1717
print("Using default fork mode (will hang on Tahoe)")
1818

1919
# Create fake data
20-
data = pd.DataFrame({
21-
'record_id': [f'id_{i}' for i in range(10)],
22-
'feature_1': [float(i) for i in range(10)],
23-
'feature_2': [float(i * 2) for i in range(10)],
24-
'event_time': [time.time()] * 10
25-
})
20+
data = pd.DataFrame(
21+
{
22+
"record_id": [f"id_{i}" for i in range(10)],
23+
"feature_1": [float(i) for i in range(10)],
24+
"feature_2": [float(i * 2) for i in range(10)],
25+
"event_time": [time.time()] * 10,
26+
}
27+
)
2628

2729
# Setup SageMaker session
2830
sagemaker_session = sagemaker.Session()
2931

3032
# Define feature group
31-
feature_group_name = 'temp_delete_me'
32-
feature_group = FeatureGroup(
33-
name=feature_group_name,
34-
sagemaker_session=sagemaker_session
35-
)
33+
feature_group_name = "temp_delete_me"
34+
feature_group = FeatureGroup(name=feature_group_name, sagemaker_session=sagemaker_session)
3635

3736
# Create feature definitions
3837
feature_group.load_feature_definitions(data_frame=data)
3938

4039
# Create feature group
4140
print("Creating feature group...")
4241
feature_group.create(
43-
s3_uri=f's3://{sagemaker_session.default_bucket()}/featurestore',
44-
record_identifier_name='record_id',
45-
event_time_feature_name='event_time',
42+
s3_uri=f"s3://{sagemaker_session.default_bucket()}/featurestore",
43+
record_identifier_name="record_id",
44+
event_time_feature_name="event_time",
4645
role_arn=sagemaker.get_execution_role(),
47-
enable_online_store=True
46+
enable_online_store=True,
4847
)
4948

5049
# Wait for feature group to be created (can take 1-2 minutes)
@@ -58,10 +57,5 @@
5857

5958
# This will hang on macOS Tahoe with USE_SPAWN_MODE=False
6059
print("Starting ingest...")
61-
feature_group.ingest(
62-
data_frame=data,
63-
max_workers=2,
64-
max_processes=2,
65-
wait=True
66-
)
67-
print("Ingest completed!")
60+
feature_group.ingest(data_frame=data, max_workers=2, max_processes=2, wait=True)
61+
print("Ingest completed!")

scripts/test_feature_set_creation.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
import multiprocessing
77
import multiprocess
88

9-
if __name__ == '__main__':
10-
multiprocessing.set_start_method('spawn', force=True)
11-
multiprocess.set_start_method('spawn', force=True)
9+
if __name__ == "__main__":
10+
multiprocessing.set_start_method("spawn", force=True)
11+
multiprocess.set_start_method("spawn", force=True)
1212

1313
s3_path = "s3://workbench-public-data/comp_chem/aqsol_public_data.csv"
1414
aqsol_data = wr.s3.read_csv(s3_path)
@@ -17,4 +17,4 @@
1717
to_features = PandasToFeatures("temp_delete_me")
1818
to_features.set_input(aqsol_data, id_column="id")
1919
to_features.set_output_tags(["aqsol", "public"])
20-
to_features.transform()
20+
to_features.transform()

src/workbench/core/transforms/pandas_transforms/pandas_to_features.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -335,16 +335,19 @@ def mac_spawn_hack(self):
335335
This forces spawn mode on macOS to avoid the hang.
336336
"""
337337
import platform
338+
338339
if platform.system() == "Darwin": # macOS
339340
self.log.warning("macOS detected, forcing 'spawn' mode for multiprocessing (Tahoe hang workaround)")
340341
import multiprocessing
342+
341343
try:
342344
import multiprocess
343-
multiprocess.set_start_method('spawn', force=True)
345+
346+
multiprocess.set_start_method("spawn", force=True)
344347
except (RuntimeError, ImportError):
345348
pass # Already set or multiprocess not available
346349
try:
347-
multiprocessing.set_start_method('spawn', force=True)
350+
multiprocessing.set_start_method("spawn", force=True)
348351
except RuntimeError:
349352
pass # Already set
350353

0 commit comments

Comments
 (0)