Skip to content

Commit 6b7e4c8

Browse files
chore: update pyspark matrix tests (#1779)
* chore: update pyspark matrix tests * chore: update pyspark dependencies install * chore: remove strict cast validations * fix(linting): code formatting --------- Co-authored-by: Azory YData Bot <[email protected]>
1 parent c3ce66c commit 6b7e4c8

File tree

3 files changed

+31
-14
lines changed

3 files changed

+31
-14
lines changed

.github/workflows/tests.yml

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -126,22 +126,26 @@ jobs:
126126
continue-on-error: false
127127
strategy:
128128
matrix:
129-
python-version: [ "3.9", "3.10", "3.11", "3.12" ]
130-
pyspark-version: [ "3.4" , "3.5" ]
129+
include:
130+
# Legacy line (Spark 3.5.x)
131+
- { python-version: "3.10", pyspark-version: "3.5" }
132+
- { python-version: "3.11", pyspark-version: "3.5" }
133+
# Current line (Spark 4.0.x)
134+
- { python-version: "3.11", pyspark-version: "4.0" }
135+
- { python-version: "3.10", pyspark-version: "4.0" }
136+
- { python-version: "3.12", pyspark-version: "4.0" }
131137

132138
name: Tests Spark | Python ${{ matrix.python-version }} | PySpark ${{ matrix.pyspark-version }}
133139

134140
steps:
135141
- name: Checkout Code
136142
uses: actions/checkout@v4
137143

138-
- name: Install Java (OpenJDK 11)
139-
run: |
140-
sudo apt-get update
141-
sudo apt-get install -y openjdk-11-jdk
142-
echo "JAVA_HOME=/usr/lib/jvm/java-11-openjdk-amd64" >> $GITHUB_ENV
143-
echo "PATH=$JAVA_HOME/bin:$PATH" >> $GITHUB_ENV
144-
java -version
144+
- name: Setup Java 17 (Temurin)
145+
uses: actions/setup-java@v4
146+
with:
147+
distribution: temurin
148+
java-version: '17'
145149

146150
- name: Setup Python
147151
uses: actions/setup-python@v5
@@ -159,10 +163,15 @@ jobs:
159163
160164
- name: Install Dependencies
161165
run: |
162-
python -m pip install --upgrade pip setuptools wheel
163-
pip install pyarrow>4.0.0 pyspark=="${{ matrix.pyspark-version }}" --no-cache-dir
164-
echo "ARROW_PRE_0_15_IPC_FORMAT=1" >> $GITHUB_ENV
166+
python -m pip install -U pip setuptools wheel
167+
pip install "pyspark~=${{ matrix.pyspark-version }}" "pyarrow>4.0.0" --no-cache-dir
168+
pip install ".[test]"
169+
# Make PySpark use this Python and bind locally; give it a safe tmp dir
170+
echo "PYSPARK_PYTHON=$(which python)" >> $GITHUB_ENV
171+
echo "PYSPARK_DRIVER_PYTHON=$(which python)" >> $GITHUB_ENV
165172
echo "SPARK_LOCAL_IP=127.0.0.1" >> $GITHUB_ENV
173+
echo "SPARK_LOCAL_DIRS=$RUNNER_TEMP/spark-tmp" >> $GITHUB_ENV
174+
mkdir -p "$RUNNER_TEMP/spark-tmp"
166175
167176
- name: Run Tests
168177
run: |

examples/features/spark_example.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,10 @@
1414

1515
if __name__ == "__main__":
1616
spark_session = (
17-
SparkSession.builder.appName("SparkProfiling").master("local[*]").getOrCreate()
17+
SparkSession.builder.appName("SparkProfiling")
18+
.master("local[*]")
19+
.config("spark.sql.ansi.enabled", "false")
20+
.getOrCreate()
1821
)
1922

2023
print(spark_session.sparkContext.uiWebUrl) # noqa: T201

tests/conftest.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,12 @@ def spark_session(spark_context):
105105
"""
106106
if not has_spark:
107107
pytest.skip("Skipping Spark tests because PySpark is not installed.")
108-
spark = SparkSession.builder.config(conf=spark_context.getConf()).getOrCreate()
108+
spark = (
109+
SparkSession.builder.master("local[*]")
110+
.appName("pytest")
111+
.config("spark.sql.ansi.enabled", "false") # <-- restore permissive casts
112+
.getOrCreate()
113+
)
109114

110115
yield spark
111116

0 commit comments

Comments
 (0)