Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
5 changes: 5 additions & 0 deletions .github/workflows/util/install-spark-resources.sh
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,11 @@ case "$1" in
cd ${INSTALL_DIR} && \
install_spark "3.5.5" "3" "2.13"
;;
4.0)
# Spark-4.0, scala 2.12 // using 2.12 as a hack as 4.0 does not have 2.13 suffix
cd ${INSTALL_DIR} && \
install_spark "4.0.1" "3" "2.12"
;;
*)
echo "Spark version is expected to be specified."
exit 1
Expand Down
100 changes: 100 additions & 0 deletions .github/workflows/velox_backend_x86.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1297,3 +1297,103 @@ jobs:
df -a
bash dev/buildbundle-veloxbe.sh --run_setup_script=OFF --build_arrow=OFF --spark_version=3.4 --enable_gpu=ON
ccache -s

spark-test-spark40:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
container: apache/gluten:centos-8-jdk17
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare
run: |
dnf module -y install python39 && \
alternatives --set python3 /usr/bin/python3.9 && \
pip3 install setuptools==77.0.3 && \
pip3 install pyspark==3.5.5 cython && \
pip3 install pandas==2.2.3 pyarrow==20.0.0
- name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update
run: |
rm -rf /opt/shims/spark40
bash .github/workflows/util/install-spark-resources.sh 4.0
- name: Build and Run unit test for Spark 4.0.0 with scala-2.13 (other tests)
run: |
cd $GITHUB_WORKSPACE/
export SPARK_SCALA_VERSION=2.13
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox \
-Pspark-ut -DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \
-DtagsToExclude=org.apache.spark.tags.ExtendedSQLTest,org.apache.gluten.tags.UDFTest,org.apache.gluten.tags.EnhancedFeaturesTest,org.apache.gluten.tags.SkipTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-test-log
path: |
**/target/*.log
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*

spark-test-spark40-slow:
needs: build-native-lib-centos-7
runs-on: ubuntu-22.04
container: apache/gluten:centos-8-jdk17
steps:
- uses: actions/checkout@v2
- name: Download All Artifacts
uses: actions/download-artifact@v4
with:
name: velox-native-lib-centos-7-${{github.sha}}
path: ./cpp/build/releases
- name: Download Arrow Jars
uses: actions/download-artifact@v4
with:
name: arrow-jars-centos-7-${{github.sha}}
path: /root/.m2/repository/org/apache/arrow/
- name: Prepare Spark Resources for Spark 4.0.1 #TODO remove after image update
run: |
rm -rf /opt/shims/spark40
bash .github/workflows/util/install-spark-resources.sh 4.0
- name: Build and Run unit test for Spark 4.0 (slow tests)
run: |
cd $GITHUB_WORKSPACE/
yum install -y java-17-openjdk-devel
export JAVA_HOME=/usr/lib/jvm/java-17-openjdk
export PATH=$JAVA_HOME/bin:$PATH
java -version
$MVN_CMD clean test -Pspark-4.0 -Pscala-2.13 -Pjava-17 -Pbackends-velox -Pspark-ut \
-DargLine="-Dspark.test.home=/opt/shims/spark40/spark_home/" \
-DtagsToInclude=org.apache.spark.tags.ExtendedSQLTest
- name: Upload test report
if: always()
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-report
path: '**/surefire-reports/TEST-*.xml'
- name: Upload unit tests log files
if: ${{ !success() }}
uses: actions/upload-artifact@v4
with:
name: ${{ github.job }}-test-log
path: |
**/target/*.log
**/gluten-ut/**/hs_err_*.log
**/gluten-ut/**/core.*
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ class NativeBenchmarkPlanGenerator extends VeloxWholeStageTransformerSuite {
}
}

test("Test plan json non-empty - AQE on") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("Test plan json non-empty - AQE on", "3.5") {
withSQLConf(
SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "true",
GlutenConfig.CACHE_WHOLE_STAGE_TRANSFORMER_CONTEXT.key -> "true") {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
checkLengthAndPlan(df, 2)
}

test("is_not_null") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("is_not_null", "3.5") {
val df = runQueryAndCompare(
"select l_orderkey from lineitem where l_comment is not null " +
"and l_orderkey = 1") { _ => }
Expand Down Expand Up @@ -176,7 +177,8 @@ class MiscOperatorSuite extends VeloxWholeStageTransformerSuite with AdaptiveSpa
checkLengthAndPlan(df, 0)
}

test("and pushdown") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("and pushdown", "3.5") {
val df = runQueryAndCompare(
"select l_orderkey from lineitem where l_orderkey > 2 " +
"and l_orderkey = 1") { _ => }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ class VeloxHashJoinSuite extends VeloxWholeStageTransformerSuite {
val wholeStages = plan.collect { case wst: WholeStageTransformer => wst }
if (SparkShimLoader.getSparkVersion.startsWith("3.2.")) {
assert(wholeStages.length == 1)
} else if (SparkShimLoader.getSparkVersion.startsWith("3.5.")) {
} else if (
SparkShimLoader.getSparkVersion.startsWith("3.5.") ||
SparkShimLoader.getSparkVersion.startsWith("4.0.")
) {
assert(wholeStages.length == 5)
} else {
assert(wholeStages.length == 3)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
.set("spark.executor.cores", "1")
}

test("arrow_udf test: without projection") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("arrow_udf test: without projection", "3.5") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
.toDF("a", "b")
Expand All @@ -59,7 +60,8 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
checkAnswer(df2, expected)
}

test("arrow_udf test: with unrelated projection") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("arrow_udf test: with unrelated projection", "3.5") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
.toDF("a", "b")
Expand All @@ -79,7 +81,8 @@ class ArrowEvalPythonExecSuite extends WholeStageTransformerSuite {
checkAnswer(df, expected)
}

test("arrow_udf test: with preprojection") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("arrow_udf test: with preprojection", "3.5") {
lazy val base =
Seq(("1", 1), ("1", 2), ("2", 1), ("2", 2), ("3", 1), ("3", 2), ("0", 1), ("3", 0))
.toDF("a", "b")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ class ArithmeticAnsiValidateSuite extends FunctionsValidateSuite {
.set(SQLConf.ANSI_ENABLED.key, "true")
}

test("add") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("add", "3.5") {
runQueryAndCompare("SELECT int_field1 + 100 FROM datatab WHERE int_field1 IS NOT NULL") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
Expand All @@ -48,7 +49,8 @@ class ArithmeticAnsiValidateSuite extends FunctionsValidateSuite {
}
}

test("multiply") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("multiply", "3.5") {
runQueryAndCompare("SELECT int_field1 * 2 FROM datatab WHERE int_field1 IS NOT NULL") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,8 @@ abstract class DateFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

testWithMinSparkVersion("timestampadd", "3.3") {
// TODO: fix on spark-4.0
testWithRangeSparkVersion("timestampadd", "3.3", "3.5") {
withTempPath {
path =>
val ts = Timestamp.valueOf("2020-02-29 00:00:00.500")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

test("json_array_length") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("json_array_length", "3.5") {
runQueryAndCompare(
s"select *, json_array_length(string_field1) " +
s"from datatab limit 5")(checkGlutenOperatorMatch[ProjectExecTransformer])
Expand Down Expand Up @@ -348,7 +349,8 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

test("json_object_keys") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("json_object_keys", "3.5") {
withTempPath {
path =>
Seq[String](
Expand Down Expand Up @@ -378,7 +380,8 @@ class JsonFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

test("to_json function") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("to_json function", "3.5") {
withTable("t") {
spark.sql(
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -522,7 +522,8 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

test("raise_error, assert_true") {
// TODO: fix on spark-4.0
testWithMaxSparkVersion("raise_error, assert_true", "3.5") {
runQueryAndCompare("""SELECT assert_true(l_orderkey >= 1), l_orderkey
| from lineitem limit 100""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
Expand Down Expand Up @@ -555,7 +556,7 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

test("version") {
testWithMaxSparkVersion("version", "3.5") {
runQueryAndCompare("""SELECT version() from lineitem limit 10""".stripMargin) {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
Expand Down Expand Up @@ -1097,7 +1098,8 @@ abstract class ScalarFunctionsValidateSuite extends FunctionsValidateSuite {
}
}

testWithMinSparkVersion("try_cast", "3.4") {
// TODO: fix on spark-4.0
testWithRangeSparkVersion("try_cast", "3.4", "3.5") {
withTempView("try_cast_table") {
withTempPath {
path =>
Expand Down
11 changes: 6 additions & 5 deletions dev/docker/Dockerfile.centos8-dynamic-build
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,12 @@ RUN set -ex; \
wget -nv https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz -P /opt/; \
git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten; \
cd /opt/gluten/.github/workflows/util/; \
./install-spark-resources.sh 3.2; \
./install-spark-resources.sh 3.3; \
./install-spark-resources.sh 3.4; \
./install-spark-resources.sh 3.5; \
./install-spark-resources.sh 3.5-scala2.13; \
./install_spark_resources.sh 3.2; \
./install_spark_resources.sh 3.3; \
./install_spark_resources.sh 3.4; \
./install_spark_resources.sh 3.5; \
./install_spark_resources.sh 3.5-scala2.13; \
./install_spark_resources.sh 4.0; \
if [ "$(uname -m)" = "aarch64" ]; then \
export CPU_TARGET="aarch64"; \
fi; \
Expand Down
11 changes: 6 additions & 5 deletions dev/docker/Dockerfile.centos9-dynamic-build
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,12 @@ RUN set -ex; \
wget -nv https://archive.apache.org/dist/hadoop/common/hadoop-2.8.5/hadoop-2.8.5.tar.gz -P /opt/; \
git clone --depth=1 https://github.com/apache/incubator-gluten /opt/gluten; \
cd /opt/gluten/.github/workflows/util/; \
./install-spark-resources.sh 3.2; \
./install-spark-resources.sh 3.3; \
./install-spark-resources.sh 3.4; \
./install-spark-resources.sh 3.5; \
./install-spark-resources.sh 3.5-scala2.13; \
./install_spark_resources.sh 3.2; \
./install_spark_resources.sh 3.3; \
./install_spark_resources.sh 3.4; \
./install_spark_resources.sh 3.5; \
./install_spark_resources.sh 3.5-scala2.13; \
./install_spark_resources.sh 4.0; \
if [ "$(uname -m)" = "aarch64" ]; then \
export CPU_TARGET="aarch64"; \
fi; \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,12 @@ abstract class GlutenQueryTest extends PlanTest {
private def getExecutedPlan(plan: SparkPlan): Seq[SparkPlan] = {
val subTree = plan match {
case exec: AdaptiveSparkPlanExec =>
getExecutedPlan(exec.executedPlan)
if (isSparkVersionGE("3.5")) {
val finalPlan = exec.getClass.getDeclaredField("finalPhysicalPlan")
getExecutedPlan(finalPlan.get(exec).asInstanceOf[SparkPlan])
} else {
getExecutedPlan(exec.executedPlan)
}
case cmd: CommandResultExec =>
getExecutedPlan(cmd.commandPhysicalPlan)
case s: ShuffleQueryStageExec =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ import org.apache.spark.sql.catalyst.InternalRow
import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeSet}
import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnaryNode}
import org.apache.spark.sql.catalyst.plans.physical.Partitioning
import org.apache.spark.sql.classic.ClassicConversions._
import org.apache.spark.sql.classic.ClassicDataset
import org.apache.spark.sql.execution.{SparkPlan, SparkStrategy, UnaryExecNode}
import org.apache.spark.sql.execution.datasources.LogicalRelation
import org.apache.spark.sql.vectorized.ColumnarBatch
Expand Down Expand Up @@ -59,6 +61,10 @@ case class DummyFilterColumnarExec(child: SparkPlan) extends UnaryExecNode {
}

object DummyFilterColumnarStrategy extends SparkStrategy {
// TODO: remove this if we can suppress unused import error.
locally {
new ColumnConstructorExt(Column)
}
override def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match {
case r: DummyFilterColumnar =>
DummyFilterColumnarExec(planLater(r.child)) :: Nil
Expand All @@ -73,7 +79,7 @@ object DummyFilterColmnarHelper {
case p => p
}

Dataset.ofRows(spark, modifiedPlan)
ClassicDataset.ofRows(spark, modifiedPlan)
}

def withSession(builders: Seq[SparkSessionExtensionsProvider])(f: SparkSession => Unit): Unit = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import org.apache.gluten.sql.shims.SparkShimLoader

import org.apache.spark.sql.catalyst.plans.logical
import org.apache.spark.sql.catalyst.util.{sideBySide, stackTraceToString}
import org.apache.spark.sql.classic.ClassicConversions._
import org.apache.spark.sql.execution.SQLExecution

import org.apache.commons.io.FileUtils
Expand Down Expand Up @@ -81,6 +82,10 @@ trait GlutenSQLTestsTrait extends QueryTest with GlutenSQLTestsBaseTrait {
}

object GlutenQueryTestUtil extends Assertions {
// TODO: remove this if we can suppress unused import error.
locally {
new ColumnConstructorExt(Column)
}

/**
* Runs the plan and makes sure the answer matches the expected result.
Expand Down
Loading
Loading