diff --git a/.gitignore b/.gitignore index 5e938f1c2..00b347cdc 100644 --- a/.gitignore +++ b/.gitignore @@ -372,4 +372,8 @@ hs_err_pid* .ionide/ # Mac dev -.DS_Store \ No newline at end of file +.DS_Store + +# Scala intermideate build files +**/.bloop/ +**/.metals/ diff --git a/README.md b/README.md index e23e85cd2..1d250dbd8 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ .NET for Apache Spark is compliant with .NET Standard - a formal specification of .NET APIs that are common across .NET implementations. This means you can use .NET for Apache Spark anywhere you write .NET code allowing you to reuse all the knowledge, skills, code, and libraries you already have as a .NET developer. -.NET for Apache Spark runs on Windows, Linux, and macOS using .NET 6, or Windows using .NET Framework. It also runs on all major cloud providers including [Azure HDInsight Spark](deployment/README.md#azure-hdinsight-spark), [Amazon EMR Spark](deployment/README.md#amazon-emr-spark), [AWS](deployment/README.md#databricks) & [Azure](deployment/README.md#databricks) Databricks. +.NET for Apache Spark runs on Windows, Linux, and macOS using .NET 8, or Windows using .NET Framework. It also runs on all major cloud providers including [Azure HDInsight Spark](deployment/README.md#azure-hdinsight-spark), [Amazon EMR Spark](deployment/README.md#amazon-emr-spark), [AWS](deployment/README.md#databricks) & [Azure](deployment/README.md#databricks) Databricks. **Note**: We currently have a Spark Project Improvement Proposal JIRA at [SPIP: .NET bindings for Apache Spark](https://issues.apache.org/jira/browse/SPARK-27006) to work with the community towards getting .NET support by default into Apache Spark. We highly encourage you to participate in the discussion. @@ -40,7 +40,7 @@ 2.4* - v2.1.1 + v2.1.1 3.0 @@ -50,6 +50,9 @@ 3.2 + + + 3.5 @@ -61,7 +64,7 @@ .NET for Apache Spark releases are available [here](https://github.com/dotnet/spark/releases) and NuGet packages are available [here](https://www.nuget.org/packages/Microsoft.Spark). ## Get Started -These instructions will show you how to run a .NET for Apache Spark app using .NET 6. +These instructions will show you how to run a .NET for Apache Spark app using .NET 8. - [Windows Instructions](docs/getting-started/windows-instructions.md) - [Ubuntu Instructions](docs/getting-started/ubuntu-instructions.md) - [MacOs Instructions](docs/getting-started/macos-instructions.md) diff --git a/azure-pipelines-e2e-tests-template.yml b/azure-pipelines-e2e-tests-template.yml index c7e304ecd..445d4a27b 100644 --- a/azure-pipelines-e2e-tests-template.yml +++ b/azure-pipelines-e2e-tests-template.yml @@ -58,10 +58,10 @@ stages: mvn -version - task: UseDotNet@2 - displayName: 'Use .NET 6 sdk' + displayName: 'Use .NET 8 sdk' inputs: packageType: sdk - version: 6.x + version: 8.x installationPath: $(Agent.ToolsDirectory)/dotnet - task: DownloadBuildArtifacts@0 @@ -71,7 +71,7 @@ stages: downloadPath: $(Build.ArtifactStagingDirectory) - pwsh: | - $framework = "net6.0" + $framework = "net8.0" if ($env:AGENT_OS -eq 'Windows_NT') { $runtimeIdentifier = "win-x64" diff --git a/benchmark/README.md b/benchmark/README.md index fb2c99f12..89cb54e15 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -60,7 +60,7 @@ TPCH timing results is written to stdout in the following form: `TPCH_Result, ``` - **Note**: Ensure that you build the worker and application with .NET 6 in order to run hardware acceleration queries. + **Note**: Ensure that you build the worker and application with .NET 8 in order to run hardware acceleration queries. ## Python 1. Upload [run_python_benchmark.sh](run_python_benchmark.sh) and all [python tpch benchmark](python/) files to the cluster. diff --git a/benchmark/csharp/Tpch/Tpch.csproj b/benchmark/csharp/Tpch/Tpch.csproj index 53f324200..b9b54ffcc 100644 --- a/benchmark/csharp/Tpch/Tpch.csproj +++ b/benchmark/csharp/Tpch/Tpch.csproj @@ -2,8 +2,8 @@ Exe - net461;net6.0 - net6.0 + net48;net8.0 + net8.0 Tpch Tpch @@ -16,7 +16,7 @@ - + true diff --git a/deployment/README.md b/deployment/README.md index dc87f3ac0..a65651616 100644 --- a/deployment/README.md +++ b/deployment/README.md @@ -63,7 +63,7 @@ Microsoft.Spark.Worker is a backend component that lives on the individual worke ## Azure HDInsight Spark [Azure HDInsight Spark](https://docs.microsoft.com/en-us/azure/hdinsight/spark/apache-spark-overview) is the Microsoft implementation of Apache Spark in the cloud that allows users to launch and configure Spark clusters in Azure. You can use HDInsight Spark clusters to process your data stored in Azure (e.g., [Azure Storage](https://azure.microsoft.com/en-us/services/storage/) and [Azure Data Lake Storage](https://docs.microsoft.com/en-us/azure/storage/blobs/data-lake-storage-introduction)). -> **Note:** Azure HDInsight Spark is Linux-based. Therefore, if you are interested in deploying your app to Azure HDInsight Spark, make sure your app is .NET Standard compatible and that you use [.NET 6 compiler](https://dotnet.microsoft.com/download) to compile your app. +> **Note:** Azure HDInsight Spark is Linux-based. Therefore, if you are interested in deploying your app to Azure HDInsight Spark, make sure your app is .NET Standard compatible and that you use [.NET 8 compiler](https://dotnet.microsoft.com/download) to compile your app. ### Deploy Microsoft.Spark.Worker *Note that this step is required only once* @@ -115,7 +115,7 @@ EOF ## Amazon EMR Spark [Amazon EMR](https://docs.aws.amazon.com/emr/latest/ManagementGuide/emr-what-is-emr.html) is a managed cluster platform that simplifies running big data frameworks on AWS. -> **Note:** AWS EMR Spark is Linux-based. Therefore, if you are interested in deploying your app to AWS EMR Spark, make sure your app is .NET Standard compatible and that you use [.NET 6 compiler](https://dotnet.microsoft.com/download) to compile your app. +> **Note:** AWS EMR Spark is Linux-based. Therefore, if you are interested in deploying your app to AWS EMR Spark, make sure your app is .NET Standard compatible and that you use [.NET 8 compiler](https://dotnet.microsoft.com/download) to compile your app. ### Deploy Microsoft.Spark.Worker *Note that this step is only required at cluster creation* @@ -160,7 +160,7 @@ foo@bar:~$ aws emr add-steps \ ## Databricks [Databricks](http://databricks.com) is a platform that provides cloud-based big data processing using Apache Spark. -> **Note:** [Azure](https://azure.microsoft.com/en-us/services/databricks/) and [AWS](https://databricks.com/aws) Databricks is Linux-based. Therefore, if you are interested in deploying your app to Databricks, make sure your app is .NET Standard compatible and that you use [.NET 6 compiler](https://dotnet.microsoft.com/download) to compile your app. +> **Note:** [Azure](https://azure.microsoft.com/en-us/services/databricks/) and [AWS](https://databricks.com/aws) Databricks is Linux-based. Therefore, if you are interested in deploying your app to Databricks, make sure your app is .NET Standard compatible and that you use [.NET 8 compiler](https://dotnet.microsoft.com/download) to compile your app. Databricks allows you to submit Spark .NET apps to an existing active cluster or create a new cluster everytime you launch a job. This requires the **Microsoft.Spark.Worker** to be installed **first** before you submit a Spark .NET app. diff --git a/docs/building/ubuntu-instructions.md b/docs/building/ubuntu-instructions.md index dc72ad9ee..6ed624e7e 100644 --- a/docs/building/ubuntu-instructions.md +++ b/docs/building/ubuntu-instructions.md @@ -6,7 +6,7 @@ Building Spark .NET on Ubuntu 18.04 - [Pre-requisites](#pre-requisites) - [Building](#building) - [Building Spark .NET Scala Extensions Layer](#building-spark-net-scala-extensions-layer) - - [Building .NET Sample Applications using .NET Core CLI](#building-net-sample-applications-using-net-core-cli) + - [Building .NET Sample Applications using .NET 8 CLI](#building-net-sample-applications-using-net-8-cli) - [Run Samples](#run-samples) # Open Issues: @@ -16,7 +16,7 @@ Building Spark .NET on Ubuntu 18.04 If you already have all the pre-requisites, skip to the [build](ubuntu-instructions.md#building) steps below. - 1. Download and install **[.NET 6 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** - installing the SDK will add the `dotnet` toolchain to your path. + 1. Download and install **[.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** - installing the SDK will add the `dotnet` toolchain to your path. 2. Install **[OpenJDK 8](https://openjdk.java.net/install/)** - You can use the following command: ```bash @@ -110,33 +110,34 @@ Let us now build the Spark .NET Scala extension layer. This is easy to do: ``` cd src/scala -mvn clean package +mvn clean package ``` You should see JARs created for the supported Spark versions: * `microsoft-spark-2-3/target/microsoft-spark-2-3_2.11-.jar` * `microsoft-spark-2-4/target/microsoft-spark-2-4_2.11-.jar` * `microsoft-spark-3-0/target/microsoft-spark-3-0_2.12-.jar` +* `microsoft-spark-3-0/target/microsoft-spark-3-5_2.12-.jar` -## Building .NET Sample Applications using .NET 6 CLI +## Building .NET Sample Applications using .NET 8 CLI 1. Build the Worker ```bash cd ~/dotnet.spark/src/csharp/Microsoft.Spark.Worker/ - dotnet publish -f net6.0 -r linux-x64 + dotnet publish -f net8.0 -r linux-x64 ```
📙 Click to see sample console output ```bash - user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f net6.0 -r linux-x64 + user@machine:/home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker$ dotnet publish -f net8.0 -r linux-x64 Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core Copyright (C) Microsoft Corporation. All rights reserved. Restore completed in 36.03 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj. Restore completed in 35.94 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll - Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/net6.0/linux-x64/Microsoft.Spark.Worker.dll - Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/net6.0/linux-x64/publish/ + Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/net8.0/linux-x64/Microsoft.Spark.Worker.dll + Microsoft.Spark.Worker -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/net8.0/linux-x64/publish/ ```
@@ -144,31 +145,31 @@ You should see JARs created for the supported Spark versions: 2. Build the Samples ```bash cd ~/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/ - dotnet publish -f net6.0 -r linux-x64 + dotnet publish -f net8.0 -r linux-x64 ```
📙 Click to see sample console output ```bash - user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f net6.0 -r linux-x64 + user@machine:/home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples$ dotnet publish -f net8.0 -r linux-x64 Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core Copyright (C) Microsoft Corporation. All rights reserved. Restore completed in 37.11 ms for /home/user/dotnet.spark/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj. Restore completed in 281.63 ms for /home/user/dotnet.spark/examples/Microsoft.Spark.CSharp.Examples/Microsoft.Spark.CSharp.Examples.csproj. Microsoft.Spark -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark/Debug/netstandard2.0/Microsoft.Spark.dll - Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/net6.0/linux-x64/Microsoft.Spark.CSharp.Examples.dll - Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/net6.0/linux-x64/publish/ + Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/net8.0/linux-x64/Microsoft.Spark.CSharp.Examples.dll + Microsoft.Spark.CSharp.Examples -> /home/user/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/net8.0/linux-x64/publish/ ```
# Run Samples -Once you build the samples, you can use `spark-submit` to submit your .NET 6 apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. +Once you build the samples, you can use `spark-submit` to submit your .NET 8 apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. - 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/net6.0/linux-x64/publish`) - 2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/net6.0/linux-x64/publish`) + 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.Worker/Debug/net8.0/linux-x64/publish`) + 2. Open a terminal and go to the directory where your app binary has been generated (e.g., `~/dotnet.spark/artifacts/bin/Microsoft.Spark.CSharp.Examples/Debug/net8.0/linux-x64/publish`) 3. Running your app follows the basic structure: ```bash spark-submit \ diff --git a/docs/building/windows-instructions.md b/docs/building/windows-instructions.md index 4b55a92e0..263f46ee5 100644 --- a/docs/building/windows-instructions.md +++ b/docs/building/windows-instructions.md @@ -8,7 +8,7 @@ Building Spark .NET on Windows - [Building Spark .NET Scala Extensions Layer](#building-spark-net-scala-extensions-layer) - [Building .NET Samples Application](#building-net-samples-application) - [Using Visual Studio for .NET Framework](#using-visual-studio-for-net-framework) - - [Using .NET CLI for .NET 6](#using-net-cli-for-net-6) + - [Using .NET CLI for .NET 8](#using-net-cli-for-net-8) - [Run Samples](#run-samples) # Open Issues: @@ -20,12 +20,12 @@ Building Spark .NET on Windows If you already have all the pre-requisites, skip to the [build](windows-instructions.md#building) steps below. - 1. Download and install the **[.NET 6 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** - installing the SDK will add the `dotnet` toolchain to your path. + 1. Download and install the **[.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/8.0)** - installing the SDK will add the `dotnet` toolchain to your path. 2. Install **[Visual Studio 2019](https://www.visualstudio.com/downloads/)** (Version 16.4 or later). The Community version is completely free. When configuring your installation, include these components at minimum: * .NET desktop development * All Required Components - * .NET Framework 4.6.1 Development Tools - * .NET 6 cross-platform development + * .NET Framework 4.8 Development Tools + * .NET 8 cross-platform development * All Required Components 3. Install **[Java 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)** - Select the appropriate version for your operating system e.g., jdk-8u201-windows-x64.exe for Win x64 machine. @@ -90,7 +90,7 @@ git clone https://github.com/dotnet/spark.git c:\github\dotnet-spark When you submit a .NET application, Spark .NET has the necessary logic written in Scala that inform Apache Spark how to handle your requests (e.g., request to create a new Spark Session, request to transfer data from .NET side to JVM side etc.). This logic can be found in the [Spark .NET Scala Source Code](../../src/scala). -Regardless of whether you are using .NET Framework or .NET 6, you will need to build the Spark .NET Scala extension layer. This is easy to do: +Regardless of whether you are using .NET Framework or .NET 8, you will need to build the Spark .NET Scala extension layer. This is easy to do: ```powershell cd src\scala @@ -100,6 +100,7 @@ You should see JARs created for the supported Spark versions: * `microsoft-spark-2-3\target\microsoft-spark-2-3_2.11-.jar` * `microsoft-spark-2-4\target\microsoft-spark-2-4_2.11-.jar` * `microsoft-spark-3-0\target\microsoft-spark-3-0_2.12-.jar` +* `microsoft-spark-3-0\target\microsoft-spark-3-5_2.12-.jar` ## Building .NET Samples Application @@ -148,59 +149,59 @@ You should see JARs created for the supported Spark versions: -### Using .NET CLI for .NET 6 +### Using .NET CLI for .NET 8 -> Note: We are currently working on automating .NET 6 builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually. +> Note: We are currently working on automating .NET 8 builds for Spark .NET. Until then, we appreciate your patience in performing some of the steps manually. 1. Build the Worker ```powershell cd C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\ - dotnet publish -f net6.0 -r win-x64 + dotnet publish -f net8.0 -r win-x64 ```
📙 Click to see sample console output ```powershell - PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f net6.0 -r win-x64 + PS C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker> dotnet publish -f net8.0 -r win-x64 Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core Copyright (C) Microsoft Corporation. All rights reserved. Restore completed in 299.95 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. Restore completed in 306.62 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark.Worker\Microsoft.Spark.Worker.csproj. Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll - Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net6.0\win-x64\Microsoft.Spark.Worker.dll - Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net6.0\win-x64\publish\ + Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net8.0\win-x64\Microsoft.Spark.Worker.dll + Microsoft.Spark.Worker -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net8.0\win-x64\publish\ ```
2. Build the Samples ```powershell cd C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\ - dotnet publish -f net6.0 -r win-x64 + dotnet publish -f net8.0 -r win-x64 ```
📙 Click to see sample console output ```powershell - PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f net6.0 -r win10-x64 + PS C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples> dotnet publish -f net8.0 -r win10-x64 Microsoft (R) Build Engine version 16.0.462+g62fb89029d for .NET Core Copyright (C) Microsoft Corporation. All rights reserved. Restore completed in 44.22 ms for C:\github\dotnet-spark\src\csharp\Microsoft.Spark\Microsoft.Spark.csproj. Restore completed in 336.94 ms for C:\github\dotnet-spark\examples\Microsoft.Spark.CSharp.Examples\Microsoft.Spark.CSharp.Examples.csproj. Microsoft.Spark -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark\Debug\netstandard2.0\Microsoft.Spark.dll - Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net6.0\win-x64\Microsoft.Spark.CSharp.Examples.dll - Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net6.0\win-x64\publish\ + Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net8.0\win-x64\Microsoft.Spark.CSharp.Examples.dll + Microsoft.Spark.CSharp.Examples -> C:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net8.0\win-x64\publish\ ```
# Run Samples -Once you build the samples, running them will be through `spark-submit` regardless of whether you are targeting .NET Framework or .NET 6 apps. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. +Once you build the samples, running them will be through `spark-submit`. Make sure you have followed the [pre-requisites](#pre-requisites) section and installed Apache Spark. - 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net6.0\win-x64\publish` for .NET 6) - 2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet\spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net461` for .NET Framework, `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net6.0\win1-x64\publish` for .NET 6) + 1. Set the `DOTNET_WORKER_DIR` or `PATH` environment variable to include the path where the `Microsoft.Spark.Worker` binary has been generated (e.g., `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.Worker\Debug\net8.0\win-x64\publish` for .NET 8) + 2. Open Powershell and go to the directory where your app binary has been generated (e.g., `c:\github\dotnet-spark\artifacts\bin\Microsoft.Spark.CSharp.Examples\Debug\net8.0\win1-x64\publish` for .NET 8) 3. Running your app follows the basic structure: ```powershell spark-submit.cmd ` diff --git a/docs/getting-started/macos-instructions.md b/docs/getting-started/macos-instructions.md index 3a7c55d5e..fd9e54b67 100644 --- a/docs/getting-started/macos-instructions.md +++ b/docs/getting-started/macos-instructions.md @@ -1,10 +1,10 @@ # Getting Started with Spark .NET on MacOS -These instructions will show you how to run a .NET for Apache Spark app using .NET 6 on MacOSX. +These instructions will show you how to run a .NET for Apache Spark app using .NET 8 on MacOSX. ## Pre-requisites -- Download and install **[.NET 6 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** +- Download and install **[.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** - Install **[Java 8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)** - Select the appropriate version for your operating system e.g., `jdk-8u231-macosx-x64.dmg`. - Install using the installer and verify you are able to run `java` from your command-line diff --git a/docs/getting-started/ubuntu-instructions.md b/docs/getting-started/ubuntu-instructions.md index 36c048177..3e7535802 100644 --- a/docs/getting-started/ubuntu-instructions.md +++ b/docs/getting-started/ubuntu-instructions.md @@ -1,10 +1,10 @@ # Getting Started with Spark.NET on Ubuntu -These instructions will show you how to run a .NET for Apache Spark app using .NET 6 on Ubuntu 18.04. +These instructions will show you how to run a .NET for Apache Spark app using .NET 8 on Ubuntu 18.04. ## Pre-requisites -- Download and install the following: **[.NET 6 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** | **[OpenJDK 8](https://openjdk.java.net/install/)** | **[Apache Spark 2.4.1](https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz)** +- Download and install the following: **[.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** | **[OpenJDK 8](https://openjdk.java.net/install/)** | **[Apache Spark 2.4.1](https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz)** - Download and install **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release: - Select a **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release from .NET for Apache Spark GitHub Releases page and download into your local machine (e.g., `~/bin/Microsoft.Spark.Worker`). - **IMPORTANT** Create a [new environment variable](https://help.ubuntu.com/community/EnvironmentVariables) `DOTNET_WORKER_DIR` and set it to the directory where you downloaded and extracted the Microsoft.Spark.Worker (e.g., `~/bin/Microsoft.Spark.Worker`). diff --git a/docs/getting-started/windows-instructions.md b/docs/getting-started/windows-instructions.md index b5e2c8713..7ecb5757f 100644 --- a/docs/getting-started/windows-instructions.md +++ b/docs/getting-started/windows-instructions.md @@ -1,10 +1,10 @@ # Getting Started with Spark .NET on Windows -These instructions will show you how to run a .NET for Apache Spark app using .NET 6 on Windows. +These instructions will show you how to run a .NET for Apache Spark app using .NET 8 on Windows. ## Pre-requisites -- Download and install the following: **[.NET 6 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** | **[Visual Studio 2019](https://www.visualstudio.com/downloads/)** | **[Java 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)** | **[Apache Spark 2.4.1](https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz)** +- Download and install the following: **[.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/8.0)** | **[Visual Studio 2019](https://www.visualstudio.com/downloads/)** | **[Java 1.8](https://www.oracle.com/technetwork/java/javase/downloads/jdk8-downloads-2133151.html)** | **[Apache Spark 2.4.1](https://archive.apache.org/dist/spark/spark-2.4.1/spark-2.4.1-bin-hadoop2.7.tgz)** - Download and install **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release: - Select a **[Microsoft.Spark.Worker](https://github.com/dotnet/spark/releases)** release from .NET for Apache Spark GitHub Releases page and download into your local machine (e.g., `c:\bin\Microsoft.Spark.Worker\`). - **IMPORTANT** Create a [new environment variable](https://www.java.com/en/download/help/path.xml) `DOTNET_WORKER_DIR` and set it to the directory where you downloaded and extracted the Microsoft.Spark.Worker (e.g., `c:\bin\Microsoft.Spark.Worker`). diff --git a/docs/migration-guide.md b/docs/migration-guide.md index daf13571e..ea7b48078 100644 --- a/docs/migration-guide.md +++ b/docs/migration-guide.md @@ -2,7 +2,7 @@ - [Upgrading from Microsoft.Spark 0.x to 1.0](#upgrading-from-microsoftspark-0x-to-10) ## Upgrading from Microsoft.Spark 0.x to 1.0 -- Limited support for [.NET Framework](https://dotnet.microsoft.com/learn/dotnet/what-is-dotnet-framework). Please migrate to **[.NET 6 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/6.0)** instead. +- Limited support for [.NET Framework](https://dotnet.microsoft.com/learn/dotnet/what-is-dotnet-framework). Please migrate to **[.NET 8 SDK](https://dotnet.microsoft.com/en-us/download/dotnet/8.0)** instead. - `Microsoft.Spark.Sql.Streaming.DataStreamWriter.Foreach` does not work in .NET Framework ([#576](https://github.com/dotnet/spark/issues/576)) - `Microsoft.Spark.Worker` should be upgraded to 1.0 as `Microsoft.Spark.Worker` 0.x is not forward-compatible with `Microsoft.Spark` 1.0. - `Microsoft.Spark` should be upgraded to 1.0 as `Microsoft.Spark.Worker` 1.0 is not backward-compatible with `Microsoft.Spark` 0.x. diff --git a/docs/udf-guide.md b/docs/udf-guide.md index 5eb0a24fc..236247fd1 100644 --- a/docs/udf-guide.md +++ b/docs/udf-guide.md @@ -53,7 +53,7 @@ Since UDFs are functions that need to be executed on the workers, they have to b ## Good to know while implementing UDFs -One behavior to be aware of while implementing UDFs in .NET for Apache Spark is how the target of the UDF gets serialized. .NET for Apache Spark uses .NET 6, which does not support serializing delegates, so it is instead done by using reflection to serialize the target where the delegate is defined. When multiple delegates are defined in a common scope, they have a shared closure that becomes the target of reflection for serialization. Let's take an example to illustrate what that means. +One behavior to be aware of while implementing UDFs in .NET for Apache Spark is how the target of the UDF gets serialized. .NET for Apache Spark uses .NET 8, which does not support serializing delegates, so it is instead done by using reflection to serialize the target where the delegate is defined. When multiple delegates are defined in a common scope, they have a shared closure that becomes the target of reflection for serialization. Let's take an example to illustrate what that means. The following code snippet defines two string variables that are being referenced in two function delegates that return the respective strings as result: @@ -168,4 +168,4 @@ public class C Here we see that `func` and `func2` no longer share a closure and have their own separate closures `<>c__DisplayClass0_0` and `<>c__DisplayClass0_1` respectively. When used as the target for serialization, nothing other than the referenced variables will get serialized for the delegate. This behavior is important to keep in mind while implementing multiple UDFs in a common scope. -To learn more about UDFs in general, please review the following articles that explain UDFs and how to use them: [UDFs in databricks(scala)](https://docs.databricks.com/spark/latest/spark-sql/udf-scala.html), [Spark UDFs and some gotchas](https://medium.com/@achilleus/spark-udfs-we-can-use-them-but-should-we-use-them-2c5a561fde6d). \ No newline at end of file +To learn more about UDFs in general, please review the following articles that explain UDFs and how to use them: [UDFs in databricks(scala)](https://docs.databricks.com/spark/latest/spark-sql/udf-scala.html), [Spark UDFs and some gotchas](https://medium.com/@achilleus/spark-udfs-we-can-use-them-but-should-we-use-them-2c5a561fde6d). diff --git a/eng/AfterSolutionBuild.targets b/eng/AfterSolutionBuild.targets index 961a75000..9627f97a0 100644 --- a/eng/AfterSolutionBuild.targets +++ b/eng/AfterSolutionBuild.targets @@ -5,10 +5,10 @@ - <_PublishTarget Include="FullFramework" Framework="net461" RuntimeId="win-x64" /> - <_PublishTarget Include="WindowsCore" Framework="net6.0" RuntimeId="win-x64" /> - <_PublishTarget Include="LinuxCore" Framework="net6.0" RuntimeId="linux-x64" /> - <_PublishTarget Include="MacOSCore" Framework="net6.0" RuntimeId="osx-x64" /> + <_PublishTarget Include="FullFramework" Framework="net48" RuntimeId="win-x64" /> + <_PublishTarget Include="WindowsCore" Framework="net8.0" RuntimeId="win-x64" /> + <_PublishTarget Include="LinuxCore" Framework="net8.0" RuntimeId="linux-x64" /> + <_PublishTarget Include="MacOSCore" Framework="net8.0" RuntimeId="osx-x64" /> Exe - net461;net6.0 - net6.0 + net48;net8.0 + net8.0 Microsoft.Spark.Examples Microsoft.Spark.CSharp.Examples @@ -13,7 +13,7 @@ - + diff --git a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs index 2497d5ef3..85ab974c1 100644 --- a/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs +++ b/examples/Microsoft.Spark.CSharp.Examples/Sql/Batch/VectorUdfs.cs @@ -7,6 +7,7 @@ using Apache.Arrow.Types; using Microsoft.Spark.Sql; using Microsoft.Spark.Sql.Types; +using IntegerType = Microsoft.Spark.Sql.Types.IntegerType; using StructType = Microsoft.Spark.Sql.Types.StructType; namespace Microsoft.Spark.Examples.Sql.Batch diff --git a/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj b/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj index d1d577681..40ffebd11 100644 --- a/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj +++ b/examples/Microsoft.Spark.FSharp.Examples/Microsoft.Spark.FSharp.Examples.fsproj @@ -2,8 +2,8 @@ Exe - net461;net6.0 - net6.0 + net48;net8.0 + net8.0 Microsoft.Spark.Examples Microsoft.Spark.FSharp.Examples false diff --git a/src/csharp/Directory.Build.targets b/src/csharp/Directory.Build.targets index 45f505301..5be8302fb 100644 --- a/src/csharp/Directory.Build.targets +++ b/src/csharp/Directory.Build.targets @@ -3,7 +3,7 @@ - + all runtime; build; native; contentfiles; analyzers diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/Microsoft.Spark.Extensions.Delta.E2ETest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/Microsoft.Spark.Extensions.Delta.E2ETest.csproj index 393813db4..6fee61e25 100644 --- a/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/Microsoft.Spark.Extensions.Delta.E2ETest.csproj +++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Delta.E2ETest/Microsoft.Spark.Extensions.Delta.E2ETest.csproj @@ -1,6 +1,6 @@  - net6.0 + net8.0 diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj index bd592374e..5128070fb 100644 --- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj +++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest/Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest.csproj @@ -1,12 +1,13 @@  - net6.0 + net8.0 Microsoft.Spark.Extensions.DotNet.Interactive.UnitTest + true - + diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj index c6aba59c4..04b7504b3 100644 --- a/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj +++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.DotNet.Interactive/Microsoft.Spark.Extensions.DotNet.Interactive.csproj @@ -2,7 +2,7 @@ Library - net6.0 + net8.0 Microsoft.Spark.Extensions.DotNet.Interactive true true diff --git a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj index 7de956704..66837c232 100644 --- a/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj +++ b/src/csharp/Extensions/Microsoft.Spark.Extensions.Hyperspace.E2ETest/Microsoft.Spark.Extensions.Hyperspace.E2ETest.csproj @@ -1,6 +1,6 @@  - net6.0 + net8.0 diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs index 9b87c39d0..6107b816e 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/SparkContextTests.cs @@ -57,16 +57,22 @@ public void TestSignaturesV2_4_X() /// /// Test signatures for APIs introduced in Spark 3.1.*. + /// In Spark 3.5 Spark throws an exception when trying to delete + /// archive.zip from temp folder, and causes failures of other tests /// - [SkipIfSparkVersionIsLessThan(Versions.V3_1_0)] + [SkipIfSparkVersionIsNotInRange(Versions.V3_1_0, Versions.V3_2_0)] public void TestSignaturesV3_1_X() { SparkContext sc = SparkContext.GetOrCreate(new SparkConf()); string archivePath = $"{TestEnvironment.ResourceDirectory}archive.zip"; + sc.AddArchive(archivePath); - Assert.IsType(sc.ListArchives().ToArray()); + var archives = sc.ListArchives().ToArray(); + + Assert.IsType(archives); + Assert.NotEmpty(archives.Where(a => a.EndsWith("archive.zip"))); } } } diff --git a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/CatalogTests.cs b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/CatalogTests.cs index f5f37dd91..423c8cf4c 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/CatalogTests.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/IpcTests/Sql/CatalogTests.cs @@ -52,6 +52,12 @@ public void TestSignaturesV2_4_X() Assert.IsType(catalog.CurrentDatabase()); Assert.IsType(catalog.DatabaseExists("default")); + + _spark.Sql(@"CREATE FUNCTION my_func1 AS 'test.org.apache.spark.sql.MyDoubleAvg'"); + Assert.IsType(catalog.GetFunction("my_func1")); + Assert.IsType(catalog.GetFunction("default.my_func1")); + Assert.IsType(catalog.GetFunction("spark_catalog.default.my_func1")); + Assert.IsType(catalog.GetFunction("default", "my_func1")); Assert.IsType(catalog.DropGlobalTempView("no-view")); Assert.IsType(catalog.DropTempView("no-view")); @@ -59,7 +65,6 @@ public void TestSignaturesV2_4_X() Assert.IsType(catalog.FunctionExists("functionname")); Assert.IsType(catalog.GetDatabase("default")); Assert.IsType(catalog.GetFunction("abs")); - Assert.IsType(catalog.GetFunction(null, "abs")); Assert.IsType(catalog.GetTable("users")); Assert.IsType
(catalog.GetTable("default", "users")); Assert.IsType(catalog.IsCached("users")); diff --git a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj index b9e2bedd8..e242da0d0 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj +++ b/src/csharp/Microsoft.Spark.E2ETest/Microsoft.Spark.E2ETest.csproj @@ -1,6 +1,7 @@  - net6.0 + net8.0 + true diff --git a/src/csharp/Microsoft.Spark.E2ETest/TestEnvironment.cs b/src/csharp/Microsoft.Spark.E2ETest/TestEnvironment.cs index 37e53b490..d8167897c 100644 --- a/src/csharp/Microsoft.Spark.E2ETest/TestEnvironment.cs +++ b/src/csharp/Microsoft.Spark.E2ETest/TestEnvironment.cs @@ -20,10 +20,10 @@ internal static string ResourceDirectory if (s_resourceDirectory is null) { s_resourceDirectory = - AppDomain.CurrentDomain.BaseDirectory + - Path.DirectorySeparatorChar + - "Resources" + - Path.DirectorySeparatorChar; + Path.Combine( + AppDomain.CurrentDomain.BaseDirectory, + "Resources") + + Path.DirectorySeparatorChar; } return s_resourceDirectory; diff --git a/src/csharp/Microsoft.Spark.UnitTest/Microsoft.Spark.UnitTest.csproj b/src/csharp/Microsoft.Spark.UnitTest/Microsoft.Spark.UnitTest.csproj index d863334da..e635434ae 100644 --- a/src/csharp/Microsoft.Spark.UnitTest/Microsoft.Spark.UnitTest.csproj +++ b/src/csharp/Microsoft.Spark.UnitTest/Microsoft.Spark.UnitTest.csproj @@ -1,13 +1,13 @@  - net6.0 + net8.0 Microsoft.Spark.UnitTest + true - - + diff --git a/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj b/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj index 9e06e5ca8..431f3765b 100644 --- a/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj +++ b/src/csharp/Microsoft.Spark.Worker.UnitTest/Microsoft.Spark.Worker.UnitTest.csproj @@ -1,9 +1,10 @@  - net6.0 + net8.0 + true - + diff --git a/src/csharp/Microsoft.Spark.Worker/Command/RDDCommandExecutor.cs b/src/csharp/Microsoft.Spark.Worker/Command/RDDCommandExecutor.cs index 830903ea9..7ad65bc40 100644 --- a/src/csharp/Microsoft.Spark.Worker/Command/RDDCommandExecutor.cs +++ b/src/csharp/Microsoft.Spark.Worker/Command/RDDCommandExecutor.cs @@ -20,7 +20,9 @@ internal class RDDCommandExecutor [ThreadStatic] private static MemoryStream s_writeOutputStream; [ThreadStatic] +#pragma warning disable SYSLIB0011 // Type or member is obsolete private static BinaryFormatter s_binaryFormatter; +#pragma warning restore SYSLIB0011 // Type or member is obsolete /// /// Executes the commands on the input data read from input stream @@ -111,8 +113,8 @@ private void Serialize( switch (serializerMode) { case CommandSerDe.SerializedMode.Byte: - BinaryFormatter formatter = s_binaryFormatter ??= new BinaryFormatter(); #pragma warning disable SYSLIB0011 // Type or member is obsolete + BinaryFormatter formatter = s_binaryFormatter ??= new BinaryFormatter(); // TODO: Replace BinaryFormatter with a new, secure serializer. formatter.Serialize(stream, message); #pragma warning restore SYSLIB0011 // Type or member is obsolete diff --git a/src/csharp/Microsoft.Spark.Worker/Command/SqlCommandExecutor.cs b/src/csharp/Microsoft.Spark.Worker/Command/SqlCommandExecutor.cs index 91b70381a..8438f3cb5 100644 --- a/src/csharp/Microsoft.Spark.Worker/Command/SqlCommandExecutor.cs +++ b/src/csharp/Microsoft.Spark.Worker/Command/SqlCommandExecutor.cs @@ -329,7 +329,7 @@ protected IEnumerable GetInputIterator(Stream inputStream) // When no input batches were received, return an empty RecordBatch // in order to create and write back the result schema. - int columnCount = reader.Schema.Fields.Count; + int columnCount = reader.Schema.FieldsList.Count; var arrays = new IArrowArray[columnCount]; for (int i = 0; i < columnCount; ++i) { @@ -511,7 +511,7 @@ private IEnumerable> GetArrowInputIterator(Stream in { // When no input batches were received, return empty IArrowArrays // in order to create and write back the result schema. - columnCount = reader.Schema.Fields.Count; + columnCount = reader.Schema.FieldsList.Count; arrays = ArrayPool.Shared.Rent(columnCount); for (int i = 0; i < columnCount; ++i) @@ -739,8 +739,8 @@ private RecordBatch WrapColumnsInStructIfApplicable(RecordBatch batch) { if (_version >= new Version(Versions.V3_0_0)) { - var fields = new Field[batch.Schema.Fields.Count]; - for (int i = 0; i < batch.Schema.Fields.Count; ++i) + var fields = new Field[batch.Schema.FieldsList.Count]; + for (int i = 0; i < batch.Schema.FieldsList.Count; ++i) { fields[i] = batch.Schema.GetFieldByIndex(i); } diff --git a/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj b/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj index cd5e6d0eb..4f371e869 100644 --- a/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj +++ b/src/csharp/Microsoft.Spark.Worker/Microsoft.Spark.Worker.csproj @@ -1,9 +1,10 @@  Exe - net461;net6.0 - net6.0 + net48;net8.0 + net8.0 Microsoft.Spark.Worker + true true @@ -11,7 +12,7 @@ - + diff --git a/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs b/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs index 353358e44..17b857256 100644 --- a/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs +++ b/src/csharp/Microsoft.Spark.Worker/Processor/BroadcastVariableProcessor.cs @@ -3,7 +3,6 @@ // See the LICENSE file in the project root for more information. using System; -using System.Diagnostics; using System.IO; using System.Net; using System.Runtime.Serialization.Formatters.Binary; @@ -46,8 +45,9 @@ internal BroadcastVariables Process(Stream stream) broadcastVars.Secret); } } - +#pragma warning disable SYSLIB0011 // Type or member is obsolete var formatter = new BinaryFormatter(); +#pragma warning restore SYSLIB0011 // Type or member is obsolete for (int i = 0; i < broadcastVars.Count; ++i) { long bid = SerDe.ReadInt64(stream); diff --git a/src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs b/src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs index 231d4c9b0..fbf49a20d 100644 --- a/src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs +++ b/src/csharp/Microsoft.Spark.Worker/Processor/TaskContextProcessor.cs @@ -22,8 +22,8 @@ internal TaskContext Process(Stream stream) return (_version.Major, _version.Minor) switch { (2, 4) => TaskContextProcessorV2_4_X.Process(stream), - (3, _) t when t.Minor < 4 => TaskContextProcessorV3_0_X.Process(stream), - (3, _) => TaskContextProcessorV3_5_X.Process(stream), + (3, _) t when t.Minor < 3 => TaskContextProcessorV3_0_X.Process(stream), + (3, _) => TaskContextProcessorV3_3_X.Process(stream), _ => throw new NotSupportedException($"Spark {_version} not supported.") }; } @@ -41,9 +41,8 @@ private static TaskContext ReadTaskContext_2_x(Stream stream) AttemptId = SerDe.ReadInt64(stream), }; - // Needed for 3.3.4+, 3.4.x, 3.5.x - // https://github.com/apache/spark/commit/f6e6d1157ac988d7c5809fcb08b577631bdea8eb - private static TaskContext ReadTaskContext_3_5(Stream stream) + // Needed for 3.3.0+, SPARK-36173 + private static TaskContext ReadTaskContext_3_3(Stream stream) => new() { IsBarrier = SerDe.ReadBool(stream), @@ -107,11 +106,11 @@ internal static TaskContext Process(Stream stream) } } - private static class TaskContextProcessorV3_5_X + private static class TaskContextProcessorV3_3_X { internal static TaskContext Process(Stream stream) { - TaskContext taskContext = ReadTaskContext_3_5(stream); + TaskContext taskContext = ReadTaskContext_3_3(stream); ReadTaskContextResources(stream); ReadTaskContextProperties(stream, taskContext); diff --git a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj index 6bd14033e..93bd2fbf5 100644 --- a/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj +++ b/src/csharp/Microsoft.Spark/Microsoft.Spark.csproj @@ -10,6 +10,7 @@ .NET for Apache Spark https://github.com/dotnet/spark/tree/master/docs/release-notes spark;dotnet;csharp + true @@ -28,12 +29,13 @@ - - - - - - + + + + + + + diff --git a/src/csharp/Microsoft.Spark/Sql/Catalog/Catalog.cs b/src/csharp/Microsoft.Spark/Sql/Catalog/Catalog.cs index ab15be82e..fbd107948 100644 --- a/src/csharp/Microsoft.Spark/Sql/Catalog/Catalog.cs +++ b/src/csharp/Microsoft.Spark/Sql/Catalog/Catalog.cs @@ -248,23 +248,24 @@ public Database GetDatabase(string dbName) => new Database((JvmObjectReference)Reference.Invoke("getDatabase", dbName)); /// - /// Get the function with the specified name. If you are trying to get an in-built - /// function then use the unqualified name. + /// Get the function with the specified name. This function can be a temporary function + /// or a function. /// /// Is either a qualified or unqualified name that designates a - /// function. If no database identifier is provided, it refers to a temporary function or - /// a function in the current database. + /// function. It follows the same resolution rule with SQL: search for built-in/temp + /// functions first then functions in the current database(namespace). /// `Function` object which includes the class name, database, description, /// whether it is temporary and the name of the function. public Function GetFunction(string functionName) => new Function((JvmObjectReference)Reference.Invoke("getFunction", functionName)); /// - /// Get the function with the specified name. If you are trying to get an in-built function - /// then pass null as the dbName. + /// Get the function with the specified name in the specified database under the Hive + /// Metastore. + /// To get built-in functions, or functions in other catalogs, please use `getFunction(functionName)` with + /// qualified function name instead. /// - /// Is a name that designates a database. Built-in functions will be - /// in database null rather than default. + /// Is an unqualified name that designates a database. Can't be null /// Is an unqualified name that designates a function in the /// specified database. /// `Function` object which includes the class name, database, description,